keewis · keewis · Mar 17, 2020 · Mar 11, 2020 · Mar 16, 2020 · Mar 16, 2020
diff --git a/blackdoc.py b/blackdoc.py
diff --git a/blackdoc/__init__.py b/blackdoc/__init__.py
@@ -0,0 +1,16 @@
+from .blacken import blacken
+from .classification import classify, unclassify
+from .formats import register_format  # noqa
+
+
+def line_numbers(lines):
+    yield from enumerate(lines, start=1)
+
+
+def format_lines(lines, mode=None):
+    numbered = line_numbers(lines)
+
+    labeled = classify(numbered)
+    blackened = blacken(labeled, mode=mode)
+
+    return unclassify(blackened)
diff --git a/blackdoc/__main__.py b/blackdoc/__main__.py
@@ -0,0 +1,252 @@
+import argparse
+import pathlib
+import sys
+
+import black
+
+from . import format_lines
+
+
+def collect_files(src, include, exclude):
+    root = black.find_project_root(tuple(src))
+    report = black.Report()
+
+    for path in src:
+        if path.is_dir():
+            yield from black.gen_python_files_in_dir(
+                path, root, include, exclude, report, black.get_gitignore(root),
+            )
+        elif path.is_file() or str(path) == "-":
+            yield path
+        else:
+            print(f"invalid path: {path}", file=sys.stderr)
+
+
+def format_and_overwrite(path, mode):
+    try:
+        with open(path, mode="rb") as f:
+            content, encoding, newline = black.decode_bytes(f.read())
+
+        lines = content.split("\n")
+
+        new_content = "\n".join(format_lines(lines, mode))
+
+        if new_content == content:
+            result = "unchanged"
+        else:
+            print(f"reformatted {path}")
+            result = "reformatted"
+
+        with open(path, "w", encoding=encoding, newline=newline) as f:
+            f.write(new_content)
+    except black.InvalidInput as e:
+        print(f"error: cannot format {path.absolute()}: {e}")
+        result = "error"
+
+    return result
+
+
+def format_and_check(path, mode):
+    try:
+        with open(path, mode="rb") as f:
+            content, _, _ = black.decode_bytes(f.read())
+
+        lines = content.split("\n")
+
+        new_content = "\n".join(format_lines(lines, mode))
+
+        if new_content == content:
+            result = "unchanged"
+        else:
+            print(f"would reformat {path}")
+            result = "reformatted"
+    except black.InvalidInput as e:
+        print(f"error: cannot format {path.absolute()}: {e}")
+        result = "error"
+
+    return result
+
+
+def report_changes(n_reformatted, n_unchanged, n_error):
+    def noun(n):
+        return "file" if n < 2 else "files"
+
+    reports = []
+    if n_reformatted > 0:
+        reports.append(f"{n_reformatted} {noun(n_reformatted)} reformatted")
+
+    if n_unchanged > 0:
+        reports.append(f"{n_unchanged} {noun(n_unchanged)} left unchanged")
+
+    if n_error > 0:
+        reports.append(f"{n_error} {noun(n_error)} fails to reformat")
+
+    return ", ".join(reports) + "."
+
+
+def report_possible_changes(n_reformatted, n_unchanged, n_error):
+    def noun(n):
+        return "file" if n < 2 else "files"
+
+    reports = []
+    if n_reformatted > 0:
+        reports.append(f"{n_reformatted} {noun(n_reformatted)} would be reformatted")
+
+    if n_unchanged > 0:
+        reports.append(f"{n_unchanged} {noun(n_unchanged)} would be left unchanged")
+
+    if n_error > 0:
+        reports.append(f"{n_error} {noun(n_error)} would fail to reformat")
+
+    return ", ".join(reports) + "."
+
+
+def statistics(sources):
+    from collections import Counter
+
+    statistics = Counter(sources.values())
+
+    n_unchanged = statistics.pop("unchanged", 0)
+    n_reformatted = statistics.pop("reformatted", 0)
+    n_error = statistics.pop("error", 0)
+
+    if len(statistics) != 0:
+        raise RuntimeError(f"unknown results: {statistics.keys()}")
+
+    return n_reformatted, n_unchanged, n_error
+
+
+def process(args):
+    if not args.src:
+        print("No Path provided. Nothing to do 😴")
+        return 0
+
+    try:
+        include_regex = black.re_compile_maybe_verbose(args.include)
+    except black.re.error:
+        print(
+            f"Invalid regular expression for include given: {args.include!r}",
+            file=sys.stderr,
+        )
+        return 2
+
+    try:
+        exclude_regex = black.re_compile_maybe_verbose(args.exclude)
+    except black.re.error:
+        print(
+            f"Invalid regular expression for exclude given: {args.exclude!r}",
+            file=sys.stderr,
+        )
+        return 2
+
+    sources = set(collect_files(args.src, include_regex, exclude_regex))
+    if len(sources) == 0:
+        print("No Python files are present to be formatted. Nothing to do 😴")
+        return 0
+
+    target_versions = set(
+        black.TargetVersion[version.upper()]
+        for version in getattr(args, "target_versions", ())
+    )
+    mode = black.FileMode(
+        line_length=args.line_length, target_versions=target_versions,
+    )
+
+    actions = {
+        "inplace": format_and_overwrite,
+        "check": format_and_check,
+    }
+
+    action = actions.get(args.action)
+
+    changed_sources = {source: action(source, mode) for source in sources}
+    n_reformatted, n_unchanged, n_error = statistics(changed_sources)
+
+    report_formatters = {
+        "inplace": report_changes,
+        "check": report_possible_changes,
+    }
+
+    report = report_formatters.get(args.action)(n_reformatted, n_unchanged, n_error)
+
+    if args.action == "check" and n_reformatted > 0:
+        return_code = 1
+    else:
+        return_code = 0
+
+    print("Oh no! 💥 💔 💥" if return_code else "All done! ✨ 🍰 ✨")
+    print(report)
+    return return_code
+
+
+parser = argparse.ArgumentParser(
+    description="run black on documentation code snippets (e.g. doctest)",
+    formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+)
+parser.add_argument(
+    "-t",
+    "--target-versions",
+    action="append",
+    choices=[v.name.lower() for v in black.TargetVersion],
+    help=(
+        "Python versions that should be supported by Black's output. (default: "
+        "per-file auto-detection)"
+    ),
+    default=argparse.SUPPRESS,
+)
+parser.add_argument(
+    "-l",
+    "--line-length",
+    metavar="INT",
+    type=int,
+    default=black.DEFAULT_LINE_LENGTH,
+    help="How many characters per line to allow.",
+)
+parser.add_argument(
+    "--check",
+    dest="action",
+    action="store_const",
+    const="check",
+    default="inplace",
+    help=(
+        "Don't write the files back, just return the status.  Return code 0 "
+        "means nothing would change.  Return code 1 means some files would be "
+        "reformatted.  Return code 123 means there was an internal error."
+    ),
+)
+parser.add_argument(
+    "--include",
+    metavar="TEXT",
+    type=str,
+    default=black.DEFAULT_INCLUDES,
+    help=(
+        "A regular expression that matches files and directories that should be "
+        "included on recursive searches.  An empty value means all files are "
+        "included regardless of the name.  Use forward slashes for directories on "
+        "all platforms (Windows, too).  Exclusions are calculated first, inclusions "
+        "later."
+    ),
+)
+parser.add_argument(
+    "--exclude",
+    metavar="TEXT",
+    type=str,
+    default=black.DEFAULT_EXCLUDES,
+    help=(
+        "A regular expression that matches files and directories that should be "
+        "excluded on recursive searches.  An empty value means no paths are excluded. "
+        "Use forward slashes for directories on all platforms (Windows, too).  "
+        "Exclusions are calculated first, inclusions later."
+    ),
+)
+parser.add_argument(
+    "src",
+    action="store",
+    type=pathlib.Path,
+    nargs="*",
+    default=None,
+    help="one or more paths to work on",
+)
+
+args = parser.parse_args()
+sys.exit(process(args))
diff --git a/blackdoc/blacken.py b/blackdoc/blacken.py
@@ -0,0 +1,47 @@
+import copy
+import re
+
+import black
+from blib2to3.pgen2.tokenize import TokenError
+
+from .formats import extract_code, reformat_code
+
+
+def update_line_number(message, original_number):
+    line_re = re.compile(r"(?P<line_number>\d+):(?P<column_number>\d+):")
+    match = line_re.search(message)
+    if match:
+        line_number, column_number = map(int, match.groups())
+        new_line_number = line_number + original_number - 1
+
+        message = line_re.sub(f"{new_line_number}:{column_number}:", message)
+    return message
+
+
+def blacken(lines, mode=None):
+    for original_line_range, category, line_unit in lines:
+        if category == "none":
+            yield category, line_unit
+            continue
+
+        indentation_depth, prompt_length, code = extract_code(line_unit, category)
+
+        current_mode = black.FileMode() if mode is None else copy.copy(mode)
+        current_mode.line_length -= indentation_depth + prompt_length
+
+        try:
+            blackened = black.format_str(code, mode=current_mode).rstrip()
+        except TokenError as e:
+            apparent_line_num, column = e.args[1]
+            message = e.args[0]
+            lineno = original_line_range[0] + (apparent_line_num - 1)
+            faulty_line = code.split("\n")[(apparent_line_num - 1) - 1]
+
+            raise black.InvalidInput(f"{lineno}:{column}: {message}: {faulty_line}")
+        except black.InvalidInput as e:
+            message = update_line_number(str(e), original_line_range[0])
+            raise black.InvalidInput(message)
+
+        reformatted = reformat_code(blackened, category, indentation_depth)
+
+        yield category, reformatted
diff --git a/blackdoc/classification.py b/blackdoc/classification.py
@@ -0,0 +1,28 @@
+import more_itertools
+
+from .formats import detection_funcs
+
+
+def classify(lines):
+    lines = more_itertools.peekable(lines)
+    while lines:
+        maybe_detected = (
+            (name, func(lines))
+            for name, func in detection_funcs.items()
+            if name != "none"
+        )
+        detected = {name: value for name, value in maybe_detected if value is not None}
+
+        if not detected:
+            yield detection_funcs["none"](lines)
+        elif len(detected) > 1:
+            raise RuntimeError(
+                f"cannot classify line: {', '.join(detected.values())} claim it: {lines.peek()}"
+            )
+        else:
+            yield more_itertools.one(detected.values())
+
+
+def unclassify(labelled_lines):
+    for _, line in labelled_lines:
+        yield line
diff --git a/blackdoc/formats/__init__.py b/blackdoc/formats/__init__.py
@@ -0,0 +1,31 @@
+import textwrap
+
+from . import doctest, none
+from .register import detection_funcs  # noqa
+from .register import extraction_funcs, reformatting_funcs, register_format
+
+
+def extract_code(line_unit, category):
+    dedented = textwrap.dedent(line_unit)
+    indentation_depth = line_unit.find(dedented[:5])
+
+    func = extraction_funcs.get(category, None)
+    if func is None:
+        raise RuntimeError(f"unknown code format: {category}")
+
+    prompt_length, extracted = func(dedented)
+    return indentation_depth, prompt_length, extracted
+
+
+def reformat_code(line_unit, category, indentation_depth):
+    func = reformatting_funcs.get(category, None)
+    if func is None:
+        raise RuntimeError(f"unknown code format: {category}")
+
+    reformatted = func(line_unit)
+
+    return textwrap.indent(reformatted, " " * indentation_depth)
+
+
+for module in (none, doctest):
+    register_format(module.name, module)