Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reorganize the project #2

Merged
merged 24 commits into from
Mar 17, 2020
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
b65c70f
convert the script and tests into a package with a __main__ and a tes…
keewis Mar 11, 2020
e98940a
add a module to make adding new formats easier
keewis Mar 16, 2020
12ca7c3
rewrite the register func to take format objects and call it in __init__
keewis Mar 16, 2020
09fada6
add a dummy format that detects every line and reformats none
keewis Mar 16, 2020
063acae
rename the dummy format and register it
keewis Mar 16, 2020
41d1f48
only work with the line data when extracting or reformatting
keewis Mar 16, 2020
7d2e575
add a split-by-lines version of the test docstring
keewis Mar 16, 2020
e292545
add a doctest format module
keewis Mar 16, 2020
cb97bce
return the category along with the processed lines
keewis Mar 16, 2020
7e0ebd0
also return the prompt length
keewis Mar 16, 2020
0d40f28
register using the format object's name attribute
keewis Mar 16, 2020
8a11ca7
limit the caught exceptions to TokenErrors
keewis Mar 16, 2020
3c5520c
rewrite the main logic and provide more informative error messages
keewis Mar 16, 2020
4d70da9
move the functions into different modules
keewis Mar 16, 2020
535325a
import the register function into the main package
keewis Mar 16, 2020
913d2fd
add tests for the classification function
keewis Mar 16, 2020
2f12c57
Apply suggestions from code review
keewis Mar 16, 2020
d7b3cb2
use consume(iterable, n=1) instead of next
keewis Mar 17, 2020
71c77f2
shorten the error message a bit
keewis Mar 17, 2020
4c8f09a
remove the obsolete blackdoc test file
keewis Mar 17, 2020
a341470
rename classify to detect_format
keewis Mar 17, 2020
6cb15c2
remove unclassify
keewis Mar 17, 2020
b7c80f9
rewrite the message updating function to only parse messages
keewis Mar 17, 2020
685ecea
rename category to code_format
keewis Mar 17, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
450 changes: 0 additions & 450 deletions blackdoc.py

This file was deleted.

16 changes: 16 additions & 0 deletions blackdoc/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from .blacken import blacken
from .classification import classify, unclassify
from .formats import register_format # noqa


def line_numbers(lines):
yield from enumerate(lines, start=1)


def format_lines(lines, mode=None):
numbered = line_numbers(lines)

labeled = classify(numbered)
blackened = blacken(labeled, mode=mode)

return unclassify(blackened)
252 changes: 252 additions & 0 deletions blackdoc/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
import argparse
import pathlib
import sys

import black

from . import format_lines


def collect_files(src, include, exclude):
root = black.find_project_root(tuple(src))
report = black.Report()

for path in src:
if path.is_dir():
yield from black.gen_python_files_in_dir(
path, root, include, exclude, report, black.get_gitignore(root),
)
elif path.is_file() or str(path) == "-":
yield path
else:
print(f"invalid path: {path}", file=sys.stderr)


def format_and_overwrite(path, mode):
try:
with open(path, mode="rb") as f:
content, encoding, newline = black.decode_bytes(f.read())

lines = content.split("\n")

new_content = "\n".join(format_lines(lines, mode))

if new_content == content:
result = "unchanged"
else:
print(f"reformatted {path}")
result = "reformatted"

with open(path, "w", encoding=encoding, newline=newline) as f:
f.write(new_content)
except black.InvalidInput as e:
print(f"error: cannot format {path.absolute()}: {e}")
result = "error"

return result


def format_and_check(path, mode):
try:
with open(path, mode="rb") as f:
content, _, _ = black.decode_bytes(f.read())

lines = content.split("\n")

new_content = "\n".join(format_lines(lines, mode))

if new_content == content:
result = "unchanged"
else:
print(f"would reformat {path}")
result = "reformatted"
except black.InvalidInput as e:
print(f"error: cannot format {path.absolute()}: {e}")
result = "error"

return result


def report_changes(n_reformatted, n_unchanged, n_error):
def noun(n):
return "file" if n < 2 else "files"

reports = []
if n_reformatted > 0:
reports.append(f"{n_reformatted} {noun(n_reformatted)} reformatted")

if n_unchanged > 0:
reports.append(f"{n_unchanged} {noun(n_unchanged)} left unchanged")

if n_error > 0:
reports.append(f"{n_error} {noun(n_error)} fails to reformat")

return ", ".join(reports) + "."


def report_possible_changes(n_reformatted, n_unchanged, n_error):
def noun(n):
return "file" if n < 2 else "files"

reports = []
if n_reformatted > 0:
reports.append(f"{n_reformatted} {noun(n_reformatted)} would be reformatted")

if n_unchanged > 0:
reports.append(f"{n_unchanged} {noun(n_unchanged)} would be left unchanged")

if n_error > 0:
reports.append(f"{n_error} {noun(n_error)} would fail to reformat")

return ", ".join(reports) + "."


def statistics(sources):
from collections import Counter

statistics = Counter(sources.values())

n_unchanged = statistics.pop("unchanged", 0)
n_reformatted = statistics.pop("reformatted", 0)
n_error = statistics.pop("error", 0)

if len(statistics) != 0:
raise RuntimeError(f"unknown results: {statistics.keys()}")

return n_reformatted, n_unchanged, n_error


def process(args):
if not args.src:
print("No Path provided. Nothing to do 😴")
return 0

try:
include_regex = black.re_compile_maybe_verbose(args.include)
except black.re.error:
print(
f"Invalid regular expression for include given: {args.include!r}",
file=sys.stderr,
)
return 2

try:
exclude_regex = black.re_compile_maybe_verbose(args.exclude)
except black.re.error:
print(
f"Invalid regular expression for exclude given: {args.exclude!r}",
file=sys.stderr,
)
return 2

sources = set(collect_files(args.src, include_regex, exclude_regex))
if len(sources) == 0:
print("No Python files are present to be formatted. Nothing to do 😴")
return 0

target_versions = set(
black.TargetVersion[version.upper()]
for version in getattr(args, "target_versions", ())
)
mode = black.FileMode(
line_length=args.line_length, target_versions=target_versions,
)

actions = {
"inplace": format_and_overwrite,
"check": format_and_check,
}

action = actions.get(args.action)

changed_sources = {source: action(source, mode) for source in sources}
n_reformatted, n_unchanged, n_error = statistics(changed_sources)

report_formatters = {
"inplace": report_changes,
"check": report_possible_changes,
}

report = report_formatters.get(args.action)(n_reformatted, n_unchanged, n_error)

if args.action == "check" and n_reformatted > 0:
return_code = 1
else:
return_code = 0

print("Oh no! 💥 💔 💥" if return_code else "All done! ✨ 🍰 ✨")
print(report)
return return_code


parser = argparse.ArgumentParser(
description="run black on documentation code snippets (e.g. doctest)",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"-t",
"--target-versions",
action="append",
choices=[v.name.lower() for v in black.TargetVersion],
help=(
"Python versions that should be supported by Black's output. (default: "
"per-file auto-detection)"
),
default=argparse.SUPPRESS,
)
parser.add_argument(
"-l",
"--line-length",
metavar="INT",
type=int,
default=black.DEFAULT_LINE_LENGTH,
help="How many characters per line to allow.",
)
parser.add_argument(
"--check",
dest="action",
action="store_const",
const="check",
default="inplace",
help=(
"Don't write the files back, just return the status. Return code 0 "
"means nothing would change. Return code 1 means some files would be "
"reformatted. Return code 123 means there was an internal error."
),
)
parser.add_argument(
"--include",
metavar="TEXT",
type=str,
default=black.DEFAULT_INCLUDES,
help=(
"A regular expression that matches files and directories that should be "
"included on recursive searches. An empty value means all files are "
"included regardless of the name. Use forward slashes for directories on "
"all platforms (Windows, too). Exclusions are calculated first, inclusions "
"later."
),
)
parser.add_argument(
"--exclude",
metavar="TEXT",
type=str,
default=black.DEFAULT_EXCLUDES,
help=(
"A regular expression that matches files and directories that should be "
"excluded on recursive searches. An empty value means no paths are excluded. "
"Use forward slashes for directories on all platforms (Windows, too). "
"Exclusions are calculated first, inclusions later."
),
)
parser.add_argument(
"src",
action="store",
type=pathlib.Path,
nargs="*",
default=None,
help="one or more paths to work on",
)

args = parser.parse_args()
sys.exit(process(args))
47 changes: 47 additions & 0 deletions blackdoc/blacken.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import copy
import re

import black
from blib2to3.pgen2.tokenize import TokenError

from .formats import extract_code, reformat_code


def update_line_number(message, original_number):
line_re = re.compile(r"(?P<line_number>\d+):(?P<column_number>\d+):")
match = line_re.search(message)
if match:
line_number, column_number = map(int, match.groups())
new_line_number = line_number + original_number - 1

message = line_re.sub(f"{new_line_number}:{column_number}:", message)
return message


def blacken(lines, mode=None):
for original_line_range, category, line_unit in lines:
if category == "none":
yield category, line_unit
continue

indentation_depth, prompt_length, code = extract_code(line_unit, category)

current_mode = black.FileMode() if mode is None else copy.copy(mode)
current_mode.line_length -= indentation_depth + prompt_length

try:
blackened = black.format_str(code, mode=current_mode).rstrip()
except TokenError as e:
apparent_line_num, column = e.args[1]
message = e.args[0]
lineno = original_line_range[0] + (apparent_line_num - 1)
faulty_line = code.split("\n")[(apparent_line_num - 1) - 1]

raise black.InvalidInput(f"{lineno}:{column}: {message}: {faulty_line}")
except black.InvalidInput as e:
message = update_line_number(str(e), original_line_range[0])
raise black.InvalidInput(message)

reformatted = reformat_code(blackened, category, indentation_depth)

yield category, reformatted
28 changes: 28 additions & 0 deletions blackdoc/classification.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import more_itertools

from .formats import detection_funcs


def classify(lines):
lines = more_itertools.peekable(lines)
while lines:
maybe_detected = (
(name, func(lines))
for name, func in detection_funcs.items()
if name != "none"
)
detected = {name: value for name, value in maybe_detected if value is not None}

if not detected:
yield detection_funcs["none"](lines)
elif len(detected) > 1:
raise RuntimeError(
f"cannot classify line: {', '.join(detected.values())} claim it: {lines.peek()}"
)
else:
yield more_itertools.one(detected.values())


def unclassify(labelled_lines):
for _, line in labelled_lines:
yield line
31 changes: 31 additions & 0 deletions blackdoc/formats/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import textwrap

from . import doctest, none
from .register import detection_funcs # noqa
from .register import extraction_funcs, reformatting_funcs, register_format


def extract_code(line_unit, category):
dedented = textwrap.dedent(line_unit)
indentation_depth = line_unit.find(dedented[:5])

func = extraction_funcs.get(category, None)
if func is None:
raise RuntimeError(f"unknown code format: {category}")

prompt_length, extracted = func(dedented)
return indentation_depth, prompt_length, extracted


def reformat_code(line_unit, category, indentation_depth):
func = reformatting_funcs.get(category, None)
if func is None:
raise RuntimeError(f"unknown code format: {category}")

reformatted = func(line_unit)

return textwrap.indent(reformatted, " " * indentation_depth)


for module in (none, doctest):
register_format(module.name, module)
Loading