From b65c70fb5ab9b8d2683257cfc55bfa176e3f5ff5 Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Wed, 11 Mar 2020 13:13:05 +0100
Subject: [PATCH 01/24] convert the script and tests into a package with a
 __main__ and a tests module

---
 blackdoc.py                                   | 450 ------------------
 blackdoc/__init__.py                          | 200 ++++++++
 blackdoc/__main__.py                          | 252 ++++++++++
 .../tests/test_blackdoc.py                    |   0
 4 files changed, 452 insertions(+), 450 deletions(-)
 delete mode 100644 blackdoc.py
 create mode 100644 blackdoc/__init__.py
 create mode 100644 blackdoc/__main__.py
 rename test_blackdoc.py => blackdoc/tests/test_blackdoc.py (100%)

diff --git a/blackdoc.py b/blackdoc.py
deleted file mode 100644
index 144e7bc..0000000
--- a/blackdoc.py
+++ /dev/null
@@ -1,450 +0,0 @@
-import copy
-import pathlib
-import sys
-import textwrap
-
-import black
-import more_itertools
-
-doctest_prompt = ">>> "
-doctest_continuation_prompt = "... "
-
-prompt_categories = {
-    "doctest": doctest_prompt,
-}
-continuation_prompt_categories = {
-    "doctest": doctest_continuation_prompt,
-}
-available_prompts = set(prompt_categories.values()) | set(
-    continuation_prompt_categories.values()
-)
-
-
-def extract_prompt(line):
-    stripped = line.lstrip()
-    prompt_length = stripped.find(" ") + 1
-
-    prompt = stripped[:prompt_length]
-
-    return prompt if prompt in available_prompts else None
-
-
-def remove_prompt(line, prompt):
-    if not line.startswith(prompt):
-        raise RuntimeError(
-            f"cannot remove prompt {prompt} from line: prompt not found", line
-        )
-
-    without_prompt = line[len(prompt) :]
-    return without_prompt
-
-
-def add_prompt(line, prompt):
-    return prompt + line
-
-
-def remove_doctest_prompt(code_unit):
-    indentation_depth = code_unit.find(doctest_prompt)
-    code_unit = textwrap.dedent(code_unit)
-
-    # multiline unit
-    if "\n" in code_unit:
-        prompt_line, *continuation_lines = code_unit.split("\n")
-        removed = "\n".join(
-            [
-                remove_prompt(prompt_line, doctest_prompt),
-                *(
-                    remove_prompt(line, doctest_continuation_prompt)
-                    for line in continuation_lines
-                ),
-            ]
-        )
-    else:
-        removed = remove_prompt(code_unit, doctest_prompt)
-
-    return indentation_depth, removed
-
-
-def add_doctest_prompt(code_unit, indentation_depth):
-    if "\n" in code_unit:
-        prompt_line, *continuation_lines = code_unit.split("\n")
-        reformatted = "\n".join(
-            [
-                add_prompt(prompt_line, doctest_prompt),
-                *(
-                    add_prompt(line, doctest_continuation_prompt)
-                    for line in continuation_lines
-                ),
-            ]
-        )
-    else:
-        reformatted = add_prompt(code_unit, doctest_prompt)
-
-    return textwrap.indent(reformatted, " " * indentation_depth)
-
-
-extraction_funcs = {
-    "doctest": remove_doctest_prompt,
-}
-reformatting_funcs = {
-    "doctest": add_doctest_prompt,
-}
-
-
-def classify(lines):
-    """ classify lines by prompt type """
-    prompts = dict(zip(prompt_categories.values(), prompt_categories.keys()))
-    continuation_prompts = dict(
-        zip(
-            continuation_prompt_categories.values(),
-            continuation_prompt_categories.keys(),
-        )
-    )
-
-    for line in lines:
-        maybe_prompt = extract_prompt(line)
-        category = (
-            prompts.get(maybe_prompt, None)
-            or continuation_prompts.get(maybe_prompt, None)
-            or "none"
-        )
-
-        yield category, line
-
-
-def continuation_lines(lines, continuation_prompt):
-    # We can't use `itertools.takewhile` because it drops the first non-match
-    # Instead, we peek at the iterable and only remove the element if we take it
-    iterable = more_itertools.peekable(lines) if not hasattr(lines, "peek") else lines
-    while True:
-        try:
-            category, line = iterable.peek()
-        except StopIteration:
-            break
-
-        if extract_prompt(line) != continuation_prompt:
-            break
-
-        # consume the item
-        next(iterable)
-
-        yield line
-
-
-def group_code_units(labelled_lines):
-    """ group together code units """
-    # we need to make this peekable here since otherwise we lose an element
-    lines = more_itertools.peekable(labelled_lines)
-    while True:
-        try:
-            category, line = next(lines)
-        except StopIteration:
-            break
-
-        if category == "none":
-            unit = line
-        else:
-            continuation_prompt = continuation_prompt_categories.get(category, None)
-            if continuation_prompt is None:
-                raise ValueError("unknown prompt category for grouping: {category}")
-            unit = "\n".join([line, *continuation_lines(lines, continuation_prompt)])
-        yield category, unit
-
-
-def blacken(labelled_lines, mode=None):
-    for category, line in labelled_lines:
-        if category == "none":
-            yield category, line
-            continue
-
-        # remove the prompt and save the indentation depth for later
-        converter = extraction_funcs.get(category, None)
-        if converter is None:
-            raise ValueError(f"unknown prompt category for extraction: {category}")
-        indentation_depth, code_unit = converter(line)
-
-        # update the line length
-        prompt_length = indentation_depth + len(prompt_categories[category])
-        current_mode = black.FileMode() if mode is None else copy.copy(mode)
-        current_mode.line_length -= prompt_length
-
-        # blacken the code
-        blackened = black.format_str(code_unit, mode=current_mode).rstrip()
-
-        # add the prompt and reindent
-        converter = reformatting_funcs.get(category, None)
-        if converter is None:
-            raise ValueError(f"unknown prompt category for reformatting: {category}")
-
-        reformatted = converter(blackened, indentation_depth)
-        yield category, reformatted
-
-
-def unclassify(labelled_lines):
-    for _, line in labelled_lines:
-        yield line
-
-
-def format_lines(lines, mode=None):
-    labeled = classify(lines)
-    grouped = group_code_units(labeled)
-    blackened = blacken(grouped, mode=mode)
-
-    return unclassify(blackened)
-
-
-def format_file(path):
-    with open(path) as f:
-        return "\n".join(format_lines(line.rstrip() for line in f)) + "\n"
-
-
-def format_text(text):
-    return "\n".join(format_lines(text.split("\n")))
-
-
-def collect_files(src, include, exclude):
-    root = black.find_project_root(tuple(src))
-    report = black.Report()
-
-    for path in src:
-        if path.is_dir():
-            yield from black.gen_python_files_in_dir(
-                path, root, include, exclude, report, black.get_gitignore(root),
-            )
-        elif path.is_file() or str(path) == "-":
-            yield path
-        else:
-            print(f"invalid path: {path}", file=sys.stderr)
-
-
-def format_and_overwrite(path, mode):
-    try:
-        with open(path, mode="rb") as f:
-            content, encoding, newline = black.decode_bytes(f.read())
-
-        lines = content.split("\n")
-
-        new_content = "\n".join(format_lines(lines, mode))
-
-        if new_content == content:
-            result = "unchanged"
-        else:
-            print(f"reformatted {path}")
-            result = "reformatted"
-
-        with open(path, "w", encoding=encoding, newline=newline) as f:
-            f.write(new_content)
-    except Exception as e:
-        print(f"error: cannot format {path.absolute()}: {e}")
-        result = "error"
-
-    return result
-
-
-def format_and_check(path, mode):
-    try:
-        with open(path, mode="rb") as f:
-            content, _, _ = black.decode_bytes(f.read())
-
-        lines = content.split("\n")
-
-        new_content = "\n".join(format_lines(lines, mode))
-
-        if new_content == content:
-            result = "unchanged"
-        else:
-            print(f"would reformat {path}")
-            result = "reformatted"
-    except Exception as e:
-        print(f"error: cannot format {path.absolute()}: {e}")
-        result = "error"
-
-    return result
-
-
-def report_changes(n_reformatted, n_unchanged, n_error):
-    def noun(n):
-        return "file" if n < 2 else "files"
-
-    reports = []
-    if n_reformatted > 0:
-        reports.append(f"{n_reformatted} {noun(n_reformatted)} reformatted")
-
-    if n_unchanged > 0:
-        reports.append(f"{n_unchanged} {noun(n_unchanged)} left unchanged")
-
-    if n_error > 0:
-        reports.append(f"{n_error} {noun(n_error)} fails to reformat")
-
-    return ", ".join(reports) + "."
-
-
-def report_possible_changes(n_reformatted, n_unchanged, n_error):
-    def noun(n):
-        return "file" if n < 2 else "files"
-
-    reports = []
-    if n_reformatted > 0:
-        reports.append(f"{n_reformatted} {noun(n_reformatted)} would be reformatted")
-
-    if n_unchanged > 0:
-        reports.append(f"{n_unchanged} {noun(n_unchanged)} would be left unchanged")
-
-    if n_error > 0:
-        reports.append(f"{n_error} {noun(n_error)} would fail to reformat")
-
-    return ", ".join(reports) + "."
-
-
-def statistics(sources):
-    from collections import Counter
-
-    statistics = Counter(sources.values())
-
-    n_unchanged = statistics.pop("unchanged", 0)
-    n_reformatted = statistics.pop("reformatted", 0)
-    n_error = statistics.pop("error", 0)
-
-    if len(statistics) != 0:
-        raise RuntimeError(f"unknown results: {statistics.keys()}")
-
-    return n_reformatted, n_unchanged, n_error
-
-
-def process(args):
-    if not args.src:
-        print("No Path provided. Nothing to do 😴")
-        return 0
-
-    try:
-        include_regex = black.re_compile_maybe_verbose(args.include)
-    except black.re.error:
-        print(
-            f"Invalid regular expression for include given: {args.include!r}",
-            file=sys.stderr,
-        )
-        return 2
-
-    try:
-        exclude_regex = black.re_compile_maybe_verbose(args.exclude)
-    except black.re.error:
-        print(
-            f"Invalid regular expression for exclude given: {args.exclude!r}",
-            file=sys.stderr,
-        )
-        return 2
-
-    sources = set(collect_files(args.src, include_regex, exclude_regex))
-    if len(sources) == 0:
-        print("No Python files are present to be formatted. Nothing to do 😴")
-        return 0
-
-    target_versions = set(
-        black.TargetVersion[version.upper()]
-        for version in getattr(args, "target_versions", ())
-    )
-    mode = black.FileMode(
-        line_length=args.line_length, target_versions=target_versions,
-    )
-
-    actions = {
-        "inplace": format_and_overwrite,
-        "check": format_and_check,
-    }
-
-    action = actions.get(args.action)
-
-    changed_sources = {source: action(source, mode) for source in sources}
-    n_reformatted, n_unchanged, n_error = statistics(changed_sources)
-
-    report_formatters = {
-        "inplace": report_changes,
-        "check": report_possible_changes,
-    }
-
-    report = report_formatters.get(args.action)(n_reformatted, n_unchanged, n_error)
-
-    if args.action == "check" and n_reformatted > 0:
-        return_code = 1
-    else:
-        return_code = 0
-
-    print("Oh no! 💥 💔 💥" if return_code else "All done! ✨ 🍰 ✨")
-    print(report)
-    return return_code
-
-
-if __name__ == "__main__":
-    import argparse
-
-    parser = argparse.ArgumentParser(
-        description="run black on documentation code snippets (e.g. doctest)",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
-    parser.add_argument(
-        "-t",
-        "--target-versions",
-        action="append",
-        choices=[v.name.lower() for v in black.TargetVersion],
-        help=(
-            "Python versions that should be supported by Black's output. (default: "
-            "per-file auto-detection)"
-        ),
-        default=argparse.SUPPRESS,
-    )
-    parser.add_argument(
-        "-l",
-        "--line-length",
-        metavar="INT",
-        type=int,
-        default=black.DEFAULT_LINE_LENGTH,
-        help="How many characters per line to allow.",
-    )
-    parser.add_argument(
-        "--check",
-        dest="action",
-        action="store_const",
-        const="check",
-        default="inplace",
-        help=(
-            "Don't write the files back, just return the status.  Return code 0 "
-            "means nothing would change.  Return code 1 means some files would be "
-            "reformatted.  Return code 123 means there was an internal error."
-        ),
-    )
-    parser.add_argument(
-        "--include",
-        metavar="TEXT",
-        type=str,
-        default=black.DEFAULT_INCLUDES,
-        help=(
-            "A regular expression that matches files and directories that should be "
-            "included on recursive searches.  An empty value means all files are "
-            "included regardless of the name.  Use forward slashes for directories on "
-            "all platforms (Windows, too).  Exclusions are calculated first, inclusions "
-            "later."
-        ),
-    )
-    parser.add_argument(
-        "--exclude",
-        metavar="TEXT",
-        type=str,
-        default=black.DEFAULT_EXCLUDES,
-        help=(
-            "A regular expression that matches files and directories that should be "
-            "excluded on recursive searches.  An empty value means no paths are excluded. "
-            "Use forward slashes for directories on all platforms (Windows, too).  "
-            "Exclusions are calculated first, inclusions later."
-        ),
-    )
-    parser.add_argument(
-        "src",
-        action="store",
-        type=pathlib.Path,
-        nargs="*",
-        default=None,
-        help="one or more paths to work on",
-    )
-
-    args = parser.parse_args()
-    sys.exit(process(args))
diff --git a/blackdoc/__init__.py b/blackdoc/__init__.py
new file mode 100644
index 0000000..ac8f74a
--- /dev/null
+++ b/blackdoc/__init__.py
@@ -0,0 +1,200 @@
+import copy
+import textwrap
+
+import black
+import more_itertools
+
+doctest_prompt = ">>> "
+doctest_continuation_prompt = "... "
+
+prompt_categories = {
+    "doctest": doctest_prompt,
+}
+continuation_prompt_categories = {
+    "doctest": doctest_continuation_prompt,
+}
+available_prompts = set(prompt_categories.values()) | set(
+    continuation_prompt_categories.values()
+)
+
+
+def extract_prompt(line):
+    stripped = line.lstrip()
+    prompt_length = stripped.find(" ") + 1
+
+    prompt = stripped[:prompt_length]
+
+    return prompt if prompt in available_prompts else None
+
+
+def remove_prompt(line, prompt):
+    if not line.startswith(prompt):
+        raise RuntimeError(
+            f"cannot remove prompt {prompt} from line: prompt not found", line
+        )
+
+    without_prompt = line[len(prompt) :]
+    return without_prompt
+
+
+def add_prompt(line, prompt):
+    return prompt + line
+
+
+def remove_doctest_prompt(code_unit):
+    indentation_depth = code_unit.find(doctest_prompt)
+    code_unit = textwrap.dedent(code_unit)
+
+    # multiline unit
+    if "\n" in code_unit:
+        prompt_line, *continuation_lines = code_unit.split("\n")
+        removed = "\n".join(
+            [
+                remove_prompt(prompt_line, doctest_prompt),
+                *(
+                    remove_prompt(line, doctest_continuation_prompt)
+                    for line in continuation_lines
+                ),
+            ]
+        )
+    else:
+        removed = remove_prompt(code_unit, doctest_prompt)
+
+    return indentation_depth, removed
+
+
+def add_doctest_prompt(code_unit, indentation_depth):
+    if "\n" in code_unit:
+        prompt_line, *continuation_lines = code_unit.split("\n")
+        reformatted = "\n".join(
+            [
+                add_prompt(prompt_line, doctest_prompt),
+                *(
+                    add_prompt(line, doctest_continuation_prompt)
+                    for line in continuation_lines
+                ),
+            ]
+        )
+    else:
+        reformatted = add_prompt(code_unit, doctest_prompt)
+
+    return textwrap.indent(reformatted, " " * indentation_depth)
+
+
+extraction_funcs = {
+    "doctest": remove_doctest_prompt,
+}
+reformatting_funcs = {
+    "doctest": add_doctest_prompt,
+}
+
+
+def classify(lines):
+    """ classify lines by prompt type """
+    prompts = dict(zip(prompt_categories.values(), prompt_categories.keys()))
+    continuation_prompts = dict(
+        zip(
+            continuation_prompt_categories.values(),
+            continuation_prompt_categories.keys(),
+        )
+    )
+
+    for line in lines:
+        maybe_prompt = extract_prompt(line)
+        category = (
+            prompts.get(maybe_prompt, None)
+            or continuation_prompts.get(maybe_prompt, None)
+            or "none"
+        )
+
+        yield category, line
+
+
+def continuation_lines(lines, continuation_prompt):
+    # We can't use `itertools.takewhile` because it drops the first non-match
+    # Instead, we peek at the iterable and only remove the element if we take it
+    iterable = more_itertools.peekable(lines) if not hasattr(lines, "peek") else lines
+    while True:
+        try:
+            category, line = iterable.peek()
+        except StopIteration:
+            break
+
+        if extract_prompt(line) != continuation_prompt:
+            break
+
+        # consume the item
+        next(iterable)
+
+        yield line
+
+
+def group_code_units(labelled_lines):
+    """ group together code units """
+    # we need to make this peekable here since otherwise we lose an element
+    lines = more_itertools.peekable(labelled_lines)
+    while True:
+        try:
+            category, line = next(lines)
+        except StopIteration:
+            break
+
+        if category == "none":
+            unit = line
+        else:
+            continuation_prompt = continuation_prompt_categories.get(category, None)
+            if continuation_prompt is None:
+                raise ValueError("unknown prompt category for grouping: {category}")
+            unit = "\n".join([line, *continuation_lines(lines, continuation_prompt)])
+        yield category, unit
+
+
+def blacken(labelled_lines, mode=None):
+    for category, line in labelled_lines:
+        if category == "none":
+            yield category, line
+            continue
+
+        # remove the prompt and save the indentation depth for later
+        converter = extraction_funcs.get(category, None)
+        if converter is None:
+            raise ValueError(f"unknown prompt category for extraction: {category}")
+        indentation_depth, code_unit = converter(line)
+
+        # update the line length
+        prompt_length = indentation_depth + len(prompt_categories[category])
+        current_mode = black.FileMode() if mode is None else copy.copy(mode)
+        current_mode.line_length -= prompt_length
+
+        # blacken the code
+        blackened = black.format_str(code_unit, mode=current_mode).rstrip()
+
+        # add the prompt and reindent
+        converter = reformatting_funcs.get(category, None)
+        if converter is None:
+            raise ValueError(f"unknown prompt category for reformatting: {category}")
+
+        reformatted = converter(blackened, indentation_depth)
+        yield category, reformatted
+
+
+def unclassify(labelled_lines):
+    for _, line in labelled_lines:
+        yield line
+
+
+def format_lines(lines, mode=None):
+    labeled = classify(lines)
+    grouped = group_code_units(labeled)
+    blackened = blacken(grouped, mode=mode)
+
+    return unclassify(blackened)
+
+
+def format_file(path):
+    with open(path) as f:
+        return "\n".join(format_lines(line.rstrip() for line in f)) + "\n"
+
+
+def format_text(text):
+    return "\n".join(format_lines(text.split("\n")))
diff --git a/blackdoc/__main__.py b/blackdoc/__main__.py
new file mode 100644
index 0000000..78f919b
--- /dev/null
+++ b/blackdoc/__main__.py
@@ -0,0 +1,252 @@
+import argparse
+import pathlib
+import sys
+
+import black
+
+from . import format_lines
+
+
+def collect_files(src, include, exclude):
+    root = black.find_project_root(tuple(src))
+    report = black.Report()
+
+    for path in src:
+        if path.is_dir():
+            yield from black.gen_python_files_in_dir(
+                path, root, include, exclude, report, black.get_gitignore(root),
+            )
+        elif path.is_file() or str(path) == "-":
+            yield path
+        else:
+            print(f"invalid path: {path}", file=sys.stderr)
+
+
+def format_and_overwrite(path, mode):
+    try:
+        with open(path, mode="rb") as f:
+            content, encoding, newline = black.decode_bytes(f.read())
+
+        lines = content.split("\n")
+
+        new_content = "\n".join(format_lines(lines, mode))
+
+        if new_content == content:
+            result = "unchanged"
+        else:
+            print(f"reformatted {path}")
+            result = "reformatted"
+
+        with open(path, "w", encoding=encoding, newline=newline) as f:
+            f.write(new_content)
+    except Exception as e:
+        print(f"error: cannot format {path.absolute()}: {e}")
+        result = "error"
+
+    return result
+
+
+def format_and_check(path, mode):
+    try:
+        with open(path, mode="rb") as f:
+            content, _, _ = black.decode_bytes(f.read())
+
+        lines = content.split("\n")
+
+        new_content = "\n".join(format_lines(lines, mode))
+
+        if new_content == content:
+            result = "unchanged"
+        else:
+            print(f"would reformat {path}")
+            result = "reformatted"
+    except Exception as e:
+        print(f"error: cannot format {path.absolute()}: {e}")
+        result = "error"
+
+    return result
+
+
+def report_changes(n_reformatted, n_unchanged, n_error):
+    def noun(n):
+        return "file" if n < 2 else "files"
+
+    reports = []
+    if n_reformatted > 0:
+        reports.append(f"{n_reformatted} {noun(n_reformatted)} reformatted")
+
+    if n_unchanged > 0:
+        reports.append(f"{n_unchanged} {noun(n_unchanged)} left unchanged")
+
+    if n_error > 0:
+        reports.append(f"{n_error} {noun(n_error)} fails to reformat")
+
+    return ", ".join(reports) + "."
+
+
+def report_possible_changes(n_reformatted, n_unchanged, n_error):
+    def noun(n):
+        return "file" if n < 2 else "files"
+
+    reports = []
+    if n_reformatted > 0:
+        reports.append(f"{n_reformatted} {noun(n_reformatted)} would be reformatted")
+
+    if n_unchanged > 0:
+        reports.append(f"{n_unchanged} {noun(n_unchanged)} would be left unchanged")
+
+    if n_error > 0:
+        reports.append(f"{n_error} {noun(n_error)} would fail to reformat")
+
+    return ", ".join(reports) + "."
+
+
+def statistics(sources):
+    from collections import Counter
+
+    statistics = Counter(sources.values())
+
+    n_unchanged = statistics.pop("unchanged", 0)
+    n_reformatted = statistics.pop("reformatted", 0)
+    n_error = statistics.pop("error", 0)
+
+    if len(statistics) != 0:
+        raise RuntimeError(f"unknown results: {statistics.keys()}")
+
+    return n_reformatted, n_unchanged, n_error
+
+
+def process(args):
+    if not args.src:
+        print("No Path provided. Nothing to do 😴")
+        return 0
+
+    try:
+        include_regex = black.re_compile_maybe_verbose(args.include)
+    except black.re.error:
+        print(
+            f"Invalid regular expression for include given: {args.include!r}",
+            file=sys.stderr,
+        )
+        return 2
+
+    try:
+        exclude_regex = black.re_compile_maybe_verbose(args.exclude)
+    except black.re.error:
+        print(
+            f"Invalid regular expression for exclude given: {args.exclude!r}",
+            file=sys.stderr,
+        )
+        return 2
+
+    sources = set(collect_files(args.src, include_regex, exclude_regex))
+    if len(sources) == 0:
+        print("No Python files are present to be formatted. Nothing to do 😴")
+        return 0
+
+    target_versions = set(
+        black.TargetVersion[version.upper()]
+        for version in getattr(args, "target_versions", ())
+    )
+    mode = black.FileMode(
+        line_length=args.line_length, target_versions=target_versions,
+    )
+
+    actions = {
+        "inplace": format_and_overwrite,
+        "check": format_and_check,
+    }
+
+    action = actions.get(args.action)
+
+    changed_sources = {source: action(source, mode) for source in sources}
+    n_reformatted, n_unchanged, n_error = statistics(changed_sources)
+
+    report_formatters = {
+        "inplace": report_changes,
+        "check": report_possible_changes,
+    }
+
+    report = report_formatters.get(args.action)(n_reformatted, n_unchanged, n_error)
+
+    if args.action == "check" and n_reformatted > 0:
+        return_code = 1
+    else:
+        return_code = 0
+
+    print("Oh no! 💥 💔 💥" if return_code else "All done! ✨ 🍰 ✨")
+    print(report)
+    return return_code
+
+
+parser = argparse.ArgumentParser(
+    description="run black on documentation code snippets (e.g. doctest)",
+    formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+)
+parser.add_argument(
+    "-t",
+    "--target-versions",
+    action="append",
+    choices=[v.name.lower() for v in black.TargetVersion],
+    help=(
+        "Python versions that should be supported by Black's output. (default: "
+        "per-file auto-detection)"
+    ),
+    default=argparse.SUPPRESS,
+)
+parser.add_argument(
+    "-l",
+    "--line-length",
+    metavar="INT",
+    type=int,
+    default=black.DEFAULT_LINE_LENGTH,
+    help="How many characters per line to allow.",
+)
+parser.add_argument(
+    "--check",
+    dest="action",
+    action="store_const",
+    const="check",
+    default="inplace",
+    help=(
+        "Don't write the files back, just return the status.  Return code 0 "
+        "means nothing would change.  Return code 1 means some files would be "
+        "reformatted.  Return code 123 means there was an internal error."
+    ),
+)
+parser.add_argument(
+    "--include",
+    metavar="TEXT",
+    type=str,
+    default=black.DEFAULT_INCLUDES,
+    help=(
+        "A regular expression that matches files and directories that should be "
+        "included on recursive searches.  An empty value means all files are "
+        "included regardless of the name.  Use forward slashes for directories on "
+        "all platforms (Windows, too).  Exclusions are calculated first, inclusions "
+        "later."
+    ),
+)
+parser.add_argument(
+    "--exclude",
+    metavar="TEXT",
+    type=str,
+    default=black.DEFAULT_EXCLUDES,
+    help=(
+        "A regular expression that matches files and directories that should be "
+        "excluded on recursive searches.  An empty value means no paths are excluded. "
+        "Use forward slashes for directories on all platforms (Windows, too).  "
+        "Exclusions are calculated first, inclusions later."
+    ),
+)
+parser.add_argument(
+    "src",
+    action="store",
+    type=pathlib.Path,
+    nargs="*",
+    default=None,
+    help="one or more paths to work on",
+)
+
+args = parser.parse_args()
+sys.exit(process(args))
diff --git a/test_blackdoc.py b/blackdoc/tests/test_blackdoc.py
similarity index 100%
rename from test_blackdoc.py
rename to blackdoc/tests/test_blackdoc.py

From e98940ab488252ec4f4d5e19c72afaeb29c2cf18 Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Mon, 16 Mar 2020 13:24:43 +0100
Subject: [PATCH 02/24] add a module to make adding new formats easier

---
 blackdoc/formats/__init__.py | 25 +++++++++++++++++++++++++
 blackdoc/formats/register.py | 15 +++++++++++++++
 2 files changed, 40 insertions(+)
 create mode 100644 blackdoc/formats/__init__.py
 create mode 100644 blackdoc/formats/register.py

diff --git a/blackdoc/formats/__init__.py b/blackdoc/formats/__init__.py
new file mode 100644
index 0000000..baf9182
--- /dev/null
+++ b/blackdoc/formats/__init__.py
@@ -0,0 +1,25 @@
+import textwrap
+
+from .register import detection_funcs  # noqa
+from .register import extraction_funcs, reformatting_funcs
+
+
+def extract_code(line_unit, category):
+    dedented = textwrap.dedent(line_unit)
+    indentation_level = line_unit.find(dedented[:5])
+
+    func = extraction_funcs.get(category, None)
+    if func is None:
+        raise RuntimeError(f"unknown code format: {category}")
+
+    return indentation_level, func(dedented)
+
+
+def reformat_code(line_unit, category, indentation_depth):
+    func = reformatting_funcs.get(category, None)
+    if func is None:
+        raise RuntimeError(f"unknown code format: {category}")
+
+    reformatted = func(line_unit)
+
+    return textwrap.indent(reformatted, " " * indentation_depth)
diff --git a/blackdoc/formats/register.py b/blackdoc/formats/register.py
new file mode 100644
index 0000000..1e68c7a
--- /dev/null
+++ b/blackdoc/formats/register.py
@@ -0,0 +1,15 @@
+import warnings
+
+detection_funcs = {}
+extraction_funcs = {}
+reformatting_funcs = {}
+
+
+def register_format(name, detection_func, extraction_func, reformatting_func):
+    """ register a new format """
+    if name in detection_funcs:
+        warnings.warn(f"{name} already registered", RuntimeWarning)
+
+    detection_funcs[name] = detection_func
+    extraction_funcs[name] = extraction_func
+    reformatting_funcs[name] = reformatting_func

From 12ca7c315df1c895f1dc3302bd2b27d584368d1f Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Mon, 16 Mar 2020 13:29:45 +0100
Subject: [PATCH 03/24] rewrite the register func to take format objects and
 call it in __init__

---
 blackdoc/formats/__init__.py | 6 +++++-
 blackdoc/formats/register.py | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/blackdoc/formats/__init__.py b/blackdoc/formats/__init__.py
index baf9182..94a2362 100644
--- a/blackdoc/formats/__init__.py
+++ b/blackdoc/formats/__init__.py
@@ -1,7 +1,7 @@
 import textwrap
 
 from .register import detection_funcs  # noqa
-from .register import extraction_funcs, reformatting_funcs
+from .register import extraction_funcs, reformatting_funcs, register_format
 
 
 def extract_code(line_unit, category):
@@ -23,3 +23,7 @@ def reformat_code(line_unit, category, indentation_depth):
     reformatted = func(line_unit)
 
     return textwrap.indent(reformatted, " " * indentation_depth)
+
+
+for module in ():
+    register_format(module.__name__, module)
diff --git a/blackdoc/formats/register.py b/blackdoc/formats/register.py
index 1e68c7a..b191074 100644
--- a/blackdoc/formats/register.py
+++ b/blackdoc/formats/register.py
@@ -5,11 +5,15 @@
 reformatting_funcs = {}
 
 
-def register_format(name, detection_func, extraction_func, reformatting_func):
+def register_format(name, obj):
     """ register a new format """
     if name in detection_funcs:
         warnings.warn(f"{name} already registered", RuntimeWarning)
 
+    detection_func = getattr(obj, "detection_func")
+    extraction_func = getattr(obj, "extraction_func")
+    reformatting_func = getattr(obj, "reformatting_func")
+
     detection_funcs[name] = detection_func
     extraction_funcs[name] = extraction_func
     reformatting_funcs[name] = reformatting_func

From 09fada66f4fd2b3b33a05023af99241e8294b41b Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Mon, 16 Mar 2020 13:31:41 +0100
Subject: [PATCH 04/24] add a dummy format that detects every line and
 reformats none

---
 blackdoc/formats/no_code.py    | 14 ++++++++++++++
 blackdoc/tests/__init__.py     |  0
 blackdoc/tests/data.py         | 17 +++++++++++++++++
 blackdoc/tests/test_no_code.py | 27 +++++++++++++++++++++++++++
 4 files changed, 58 insertions(+)
 create mode 100644 blackdoc/formats/no_code.py
 create mode 100644 blackdoc/tests/__init__.py
 create mode 100644 blackdoc/tests/data.py
 create mode 100644 blackdoc/tests/test_no_code.py

diff --git a/blackdoc/formats/no_code.py b/blackdoc/formats/no_code.py
new file mode 100644
index 0000000..f1e1a41
--- /dev/null
+++ b/blackdoc/formats/no_code.py
@@ -0,0 +1,14 @@
+import more_itertools
+
+
+def detection_func(lines):
+    number, line = more_itertools.first(lines)
+    return (number, number + 1), line
+
+
+def extraction_func(line):
+    return 0, line
+
+
+def reformatting_func(line, indentation_depth):
+    return line
diff --git a/blackdoc/tests/__init__.py b/blackdoc/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/blackdoc/tests/data.py b/blackdoc/tests/data.py
new file mode 100644
index 0000000..e3d4365
--- /dev/null
+++ b/blackdoc/tests/data.py
@@ -0,0 +1,17 @@
+docstring = """ a function to open files
+
+    with a very long description
+
+    >>> file = open(
+    ...     "very_long_filepath",
+    ...     mode="a",
+    ... )
+    >>> file
+    <_io.TextIOWrapper name='very_long_filepath' mode='w' encoding='UTF-8'>
+
+    text after the first example, spanning
+    multiple lines
+
+    >>> file.closed
+    False
+"""
diff --git a/blackdoc/tests/test_no_code.py b/blackdoc/tests/test_no_code.py
new file mode 100644
index 0000000..c6828c7
--- /dev/null
+++ b/blackdoc/tests/test_no_code.py
@@ -0,0 +1,27 @@
+from blackdoc.formats import no_code
+
+from .data import docstring
+
+
+def test_detection_func():
+    lines = docstring.split("\n")
+
+    line_range = (1, 2)
+    line = lines[0]
+
+    assert no_code.detection_func(enumerate(lines, start=1)) == (line_range, line)
+
+
+def test_extraction_func():
+    lines = docstring.split("\n")
+    depth = 0
+    line = lines[0]
+
+    assert no_code.extraction_func(line) == (depth, line)
+
+
+def test_reformatting_func():
+    lines = docstring.split("\n")
+    line = lines[0]
+
+    assert no_code.reformatting_func(line, indentation_depth=0) == line

From 063acae7814447443021be8cda5170b05f418031 Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Mon, 16 Mar 2020 13:37:58 +0100
Subject: [PATCH 05/24] rename the dummy format and register it

---
 blackdoc/formats/__init__.py                     | 6 ++++--
 blackdoc/formats/{no_code.py => none.py}         | 0
 blackdoc/tests/{test_no_code.py => test_none.py} | 8 ++++----
 3 files changed, 8 insertions(+), 6 deletions(-)
 rename blackdoc/formats/{no_code.py => none.py} (100%)
 rename blackdoc/tests/{test_no_code.py => test_none.py} (56%)

diff --git a/blackdoc/formats/__init__.py b/blackdoc/formats/__init__.py
index 94a2362..632aa25 100644
--- a/blackdoc/formats/__init__.py
+++ b/blackdoc/formats/__init__.py
@@ -1,5 +1,6 @@
 import textwrap
 
+from . import none
 from .register import detection_funcs  # noqa
 from .register import extraction_funcs, reformatting_funcs, register_format
 
@@ -25,5 +26,6 @@ def reformat_code(line_unit, category, indentation_depth):
     return textwrap.indent(reformatted, " " * indentation_depth)
 
 
-for module in ():
-    register_format(module.__name__, module)
+for module in (none,):
+    name = module.__name__.split(".")[-1]
+    register_format(name, module)
diff --git a/blackdoc/formats/no_code.py b/blackdoc/formats/none.py
similarity index 100%
rename from blackdoc/formats/no_code.py
rename to blackdoc/formats/none.py
diff --git a/blackdoc/tests/test_no_code.py b/blackdoc/tests/test_none.py
similarity index 56%
rename from blackdoc/tests/test_no_code.py
rename to blackdoc/tests/test_none.py
index c6828c7..3c957c6 100644
--- a/blackdoc/tests/test_no_code.py
+++ b/blackdoc/tests/test_none.py
@@ -1,4 +1,4 @@
-from blackdoc.formats import no_code
+from blackdoc.formats import none
 
 from .data import docstring
 
@@ -9,7 +9,7 @@ def test_detection_func():
     line_range = (1, 2)
     line = lines[0]
 
-    assert no_code.detection_func(enumerate(lines, start=1)) == (line_range, line)
+    assert none.detection_func(enumerate(lines, start=1)) == (line_range, line)
 
 
 def test_extraction_func():
@@ -17,11 +17,11 @@ def test_extraction_func():
     depth = 0
     line = lines[0]
 
-    assert no_code.extraction_func(line) == (depth, line)
+    assert none.extraction_func(line) == (depth, line)
 
 
 def test_reformatting_func():
     lines = docstring.split("\n")
     line = lines[0]
 
-    assert no_code.reformatting_func(line, indentation_depth=0) == line
+    assert none.reformatting_func(line, indentation_depth=0) == line

From 41d1f4819ecd4acdb6716683011cc71294deeda5 Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Mon, 16 Mar 2020 14:02:02 +0100
Subject: [PATCH 06/24] only work with the line data when extracting or
 reformatting

---
 blackdoc/formats/none.py    | 4 ++--
 blackdoc/tests/test_none.py | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/blackdoc/formats/none.py b/blackdoc/formats/none.py
index f1e1a41..8d0ec01 100644
--- a/blackdoc/formats/none.py
+++ b/blackdoc/formats/none.py
@@ -7,8 +7,8 @@ def detection_func(lines):
 
 
 def extraction_func(line):
-    return 0, line
+    return line
 
 
-def reformatting_func(line, indentation_depth):
+def reformatting_func(line):
     return line
diff --git a/blackdoc/tests/test_none.py b/blackdoc/tests/test_none.py
index 3c957c6..d882e62 100644
--- a/blackdoc/tests/test_none.py
+++ b/blackdoc/tests/test_none.py
@@ -14,14 +14,13 @@ def test_detection_func():
 
 def test_extraction_func():
     lines = docstring.split("\n")
-    depth = 0
     line = lines[0]
 
-    assert none.extraction_func(line) == (depth, line)
+    assert none.extraction_func(line) == line
 
 
 def test_reformatting_func():
     lines = docstring.split("\n")
     line = lines[0]
 
-    assert none.reformatting_func(line, indentation_depth=0) == line
+    assert none.reformatting_func(line) == line

From 7d2e575e2f88bad15a44456ad14f349632f55c0c Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Mon, 16 Mar 2020 14:18:23 +0100
Subject: [PATCH 07/24] add a split-by-lines version of the test docstring

---
 blackdoc/tests/data.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/blackdoc/tests/data.py b/blackdoc/tests/data.py
index e3d4365..76b7d05 100644
--- a/blackdoc/tests/data.py
+++ b/blackdoc/tests/data.py
@@ -15,3 +15,4 @@
     >>> file.closed
     False
 """
+lines = docstring.split("\n")

From e292545035c200b81c224913f2c7f798234dd602 Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Mon, 16 Mar 2020 14:22:31 +0100
Subject: [PATCH 08/24] add a doctest format module

---
 blackdoc/formats/__init__.py   |  4 +--
 blackdoc/formats/doctest.py    | 65 ++++++++++++++++++++++++++++++++++
 blackdoc/tests/test_doctest.py | 53 +++++++++++++++++++++++++++
 3 files changed, 120 insertions(+), 2 deletions(-)
 create mode 100644 blackdoc/formats/doctest.py
 create mode 100644 blackdoc/tests/test_doctest.py

diff --git a/blackdoc/formats/__init__.py b/blackdoc/formats/__init__.py
index 632aa25..6c4d7cc 100644
--- a/blackdoc/formats/__init__.py
+++ b/blackdoc/formats/__init__.py
@@ -1,6 +1,6 @@
 import textwrap
 
-from . import none
+from . import doctest, none
 from .register import detection_funcs  # noqa
 from .register import extraction_funcs, reformatting_funcs, register_format
 
@@ -26,6 +26,6 @@ def reformat_code(line_unit, category, indentation_depth):
     return textwrap.indent(reformatted, " " * indentation_depth)
 
 
-for module in (none,):
+for module in (none, doctest):
     name = module.__name__.split(".")[-1]
     register_format(name, module)
diff --git a/blackdoc/formats/doctest.py b/blackdoc/formats/doctest.py
new file mode 100644
index 0000000..a3c0309
--- /dev/null
+++ b/blackdoc/formats/doctest.py
@@ -0,0 +1,65 @@
+import itertools
+
+import more_itertools
+
+prompt = ">>> "
+continuation_prompt = "... "
+
+
+def continuation_lines(lines):
+    while True:
+        try:
+            line_number, line = lines.peek()
+        except StopIteration:
+            line_number = -1
+            line = ""
+
+        if not line.lstrip().startswith(continuation_prompt):
+            break
+
+        # consume the line
+        next(lines)
+        yield line_number, line
+
+
+def detection_func(lines):
+    try:
+        _, line = lines.peek()
+    except StopIteration:
+        line = ""
+
+    if not line.lstrip().startswith(prompt):
+        return None
+
+    detected_lines = list(
+        itertools.chain([more_itertools.first(lines)], continuation_lines(lines))
+    )
+    line_numbers, lines = map(tuple, more_itertools.unzip(detected_lines))
+
+    line_range = min(line_numbers), max(line_numbers) + 1
+    if set(line_numbers) != set(range(line_range[0], line_range[1])):
+        raise RuntimeError("line numbers are not contiguous")
+
+    return line_range, "\n".join(lines)
+
+
+def extraction_func(line):
+    lines = line.split("\n")
+    if any(line[:4] not in (prompt, continuation_prompt) for line in lines):
+        raise RuntimeError(f"misformatted code unit: {line}")
+
+    extracted_line = "\n".join(line[4:] for line in lines)
+
+    return extracted_line
+
+
+def reformatting_func(line):
+    lines = iter(line.split("\n"))
+
+    reformatted = "\n".join(
+        itertools.chain(
+            more_itertools.always_iterable(prompt + more_itertools.first(lines)),
+            (continuation_prompt + line for line in lines),
+        )
+    )
+    return reformatted
diff --git a/blackdoc/tests/test_doctest.py b/blackdoc/tests/test_doctest.py
new file mode 100644
index 0000000..13e750b
--- /dev/null
+++ b/blackdoc/tests/test_doctest.py
@@ -0,0 +1,53 @@
+import textwrap
+
+import more_itertools
+import pytest
+
+from blackdoc.formats import doctest
+
+from .data import lines
+
+
+@pytest.mark.parametrize(
+    "lines,expected",
+    (
+        pytest.param(lines[0], None, id="no_doctest"),
+        pytest.param(lines[8], ((1, 2), lines[8]), id="single_line"),
+        pytest.param(lines[4:8], ((1, 5), "\n".join(lines[4:8])), id="multiple_lines"),
+    ),
+)
+def test_detection_func(lines, expected):
+    lines = more_itertools.peekable(
+        enumerate(more_itertools.always_iterable(lines), start=1)
+    )
+
+    actual = doctest.detection_func(lines)
+    assert actual == expected
+
+
+@pytest.mark.parametrize(
+    "line",
+    (
+        pytest.param(textwrap.dedent(lines[8]), id="single_line"),
+        pytest.param(textwrap.dedent("\n".join(lines[4:8])), id="multiple_lines"),
+    ),
+)
+def test_extraction_func(line):
+    expected = "\n".join(line.lstrip()[4:] for line in line.split("\n"))
+    actual = doctest.extraction_func(line)
+
+    assert expected == actual
+
+
+@pytest.mark.parametrize(
+    "expected",
+    (
+        pytest.param(textwrap.dedent(lines[8]), id="single_line"),
+        pytest.param(textwrap.dedent("\n".join(lines[4:8])), id="multiple_lines"),
+    ),
+)
+def test_reformatting_func(expected):
+    line = "\n".join(line.lstrip()[4:] for line in expected.split("\n"))
+
+    actual = doctest.reformatting_func(line)
+    assert expected == actual

From cb97bceab86c98843a23e0155ae29d3713cd0123 Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Mon, 16 Mar 2020 16:30:56 +0100
Subject: [PATCH 09/24] return the category along with the processed lines

---
 blackdoc/formats/doctest.py    | 3 ++-
 blackdoc/formats/none.py       | 4 +++-
 blackdoc/tests/test_doctest.py | 8 ++++++--
 blackdoc/tests/test_none.py    | 3 ++-
 4 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/blackdoc/formats/doctest.py b/blackdoc/formats/doctest.py
index a3c0309..bf19413 100644
--- a/blackdoc/formats/doctest.py
+++ b/blackdoc/formats/doctest.py
@@ -2,6 +2,7 @@
 
 import more_itertools
 
+name = "doctest"
 prompt = ">>> "
 continuation_prompt = "... "
 
@@ -40,7 +41,7 @@ def detection_func(lines):
     if set(line_numbers) != set(range(line_range[0], line_range[1])):
         raise RuntimeError("line numbers are not contiguous")
 
-    return line_range, "\n".join(lines)
+    return line_range, name, "\n".join(lines)
 
 
 def extraction_func(line):
diff --git a/blackdoc/formats/none.py b/blackdoc/formats/none.py
index 8d0ec01..e935680 100644
--- a/blackdoc/formats/none.py
+++ b/blackdoc/formats/none.py
@@ -1,9 +1,11 @@
 import more_itertools
 
+name = "none"
+
 
 def detection_func(lines):
     number, line = more_itertools.first(lines)
-    return (number, number + 1), line
+    return (number, number + 1), name, line
 
 
 def extraction_func(line):
diff --git a/blackdoc/tests/test_doctest.py b/blackdoc/tests/test_doctest.py
index 13e750b..224c4fe 100644
--- a/blackdoc/tests/test_doctest.py
+++ b/blackdoc/tests/test_doctest.py
@@ -12,8 +12,12 @@
     "lines,expected",
     (
         pytest.param(lines[0], None, id="no_doctest"),
-        pytest.param(lines[8], ((1, 2), lines[8]), id="single_line"),
-        pytest.param(lines[4:8], ((1, 5), "\n".join(lines[4:8])), id="multiple_lines"),
+        pytest.param(lines[8], ((1, 2), doctest.name, lines[8]), id="single_line"),
+        pytest.param(
+            lines[4:8],
+            ((1, 5), doctest.name, "\n".join(lines[4:8])),
+            id="multiple_lines",
+        ),
     ),
 )
 def test_detection_func(lines, expected):
diff --git a/blackdoc/tests/test_none.py b/blackdoc/tests/test_none.py
index d882e62..c864bea 100644
--- a/blackdoc/tests/test_none.py
+++ b/blackdoc/tests/test_none.py
@@ -8,8 +8,9 @@ def test_detection_func():
 
     line_range = (1, 2)
     line = lines[0]
+    name = none.name
 
-    assert none.detection_func(enumerate(lines, start=1)) == (line_range, line)
+    assert none.detection_func(enumerate(lines, start=1)) == (line_range, name, line)
 
 
 def test_extraction_func():

From 7e0ebd013956ede59af2a8a409c2295d61f754bb Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Mon, 16 Mar 2020 16:33:58 +0100
Subject: [PATCH 10/24] also return the prompt length

---
 blackdoc/formats/__init__.py   | 5 +++--
 blackdoc/formats/doctest.py    | 2 +-
 blackdoc/formats/none.py       | 2 +-
 blackdoc/tests/test_doctest.py | 6 +++++-
 blackdoc/tests/test_none.py    | 3 ++-
 5 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/blackdoc/formats/__init__.py b/blackdoc/formats/__init__.py
index 6c4d7cc..1c81632 100644
--- a/blackdoc/formats/__init__.py
+++ b/blackdoc/formats/__init__.py
@@ -7,13 +7,14 @@
 
 def extract_code(line_unit, category):
     dedented = textwrap.dedent(line_unit)
-    indentation_level = line_unit.find(dedented[:5])
+    indentation_depth = line_unit.find(dedented[:5])
 
     func = extraction_funcs.get(category, None)
     if func is None:
         raise RuntimeError(f"unknown code format: {category}")
 
-    return indentation_level, func(dedented)
+    prompt_length, extracted = func(dedented)
+    return indentation_depth, prompt_length, extracted
 
 
 def reformat_code(line_unit, category, indentation_depth):
diff --git a/blackdoc/formats/doctest.py b/blackdoc/formats/doctest.py
index bf19413..2736eeb 100644
--- a/blackdoc/formats/doctest.py
+++ b/blackdoc/formats/doctest.py
@@ -51,7 +51,7 @@ def extraction_func(line):
 
     extracted_line = "\n".join(line[4:] for line in lines)
 
-    return extracted_line
+    return len(prompt), extracted_line
 
 
 def reformatting_func(line):
diff --git a/blackdoc/formats/none.py b/blackdoc/formats/none.py
index e935680..0f761f3 100644
--- a/blackdoc/formats/none.py
+++ b/blackdoc/formats/none.py
@@ -9,7 +9,7 @@ def detection_func(lines):
 
 
 def extraction_func(line):
-    return line
+    return 0, line
 
 
 def reformatting_func(line):
diff --git a/blackdoc/tests/test_doctest.py b/blackdoc/tests/test_doctest.py
index 224c4fe..0df07db 100644
--- a/blackdoc/tests/test_doctest.py
+++ b/blackdoc/tests/test_doctest.py
@@ -37,7 +37,11 @@ def test_detection_func(lines, expected):
     ),
 )
 def test_extraction_func(line):
-    expected = "\n".join(line.lstrip()[4:] for line in line.split("\n"))
+    prompt_length = len(doctest.prompt)
+    expected = (
+        prompt_length,
+        "\n".join(line.lstrip()[4:] for line in line.split("\n")),
+    )
     actual = doctest.extraction_func(line)
 
     assert expected == actual
diff --git a/blackdoc/tests/test_none.py b/blackdoc/tests/test_none.py
index c864bea..6418acc 100644
--- a/blackdoc/tests/test_none.py
+++ b/blackdoc/tests/test_none.py
@@ -15,9 +15,10 @@ def test_detection_func():
 
 def test_extraction_func():
     lines = docstring.split("\n")
+    prompt_length = 0
     line = lines[0]
 
-    assert none.extraction_func(line) == line
+    assert none.extraction_func(line) == (prompt_length, line)
 
 
 def test_reformatting_func():

From 0d40f28007c4382830c8a8340c1528fefe2b8478 Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Mon, 16 Mar 2020 16:34:23 +0100
Subject: [PATCH 11/24] register using the format object's name attribute

---
 blackdoc/formats/__init__.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/blackdoc/formats/__init__.py b/blackdoc/formats/__init__.py
index 1c81632..196b422 100644
--- a/blackdoc/formats/__init__.py
+++ b/blackdoc/formats/__init__.py
@@ -28,5 +28,4 @@ def reformat_code(line_unit, category, indentation_depth):
 
 
 for module in (none, doctest):
-    name = module.__name__.split(".")[-1]
-    register_format(name, module)
+    register_format(module.name, module)

From 8a11ca758ac1bf2cdc057e38c4b630bb41c3c7d5 Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Mon, 16 Mar 2020 16:35:01 +0100
Subject: [PATCH 12/24] limit the caught exceptions to TokenErrors

---
 blackdoc/__main__.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/blackdoc/__main__.py b/blackdoc/__main__.py
index 78f919b..94dd5d4 100644
--- a/blackdoc/__main__.py
+++ b/blackdoc/__main__.py
@@ -3,6 +3,7 @@
 import sys
 
 import black
+from blib2to3.pgen2.tokenize import TokenError
 
 from . import format_lines
 
@@ -39,7 +40,7 @@ def format_and_overwrite(path, mode):
 
         with open(path, "w", encoding=encoding, newline=newline) as f:
             f.write(new_content)
-    except Exception as e:
+    except TokenError as e:
         print(f"error: cannot format {path.absolute()}: {e}")
         result = "error"
 
@@ -60,7 +61,7 @@ def format_and_check(path, mode):
         else:
             print(f"would reformat {path}")
             result = "reformatted"
-    except Exception as e:
+    except TokenError as e:
         print(f"error: cannot format {path.absolute()}: {e}")
         result = "error"
 

From 3c5520cd6906e7b20056b9df740c2259da1dec24 Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Mon, 16 Mar 2020 17:45:14 +0100
Subject: [PATCH 13/24] rewrite the main logic and provide more informative
 error messages

---
 blackdoc/__init__.py | 217 +++++++++++--------------------------------
 blackdoc/__main__.py |   5 +-
 2 files changed, 54 insertions(+), 168 deletions(-)

diff --git a/blackdoc/__init__.py b/blackdoc/__init__.py
index ac8f74a..4696c1c 100644
--- a/blackdoc/__init__.py
+++ b/blackdoc/__init__.py
@@ -1,180 +1,75 @@
 import copy
-import textwrap
+import re
 
 import black
 import more_itertools
+from blib2to3.pgen2.tokenize import TokenError
 
-doctest_prompt = ">>> "
-doctest_continuation_prompt = "... "
+from .formats import detection_funcs, extract_code, reformat_code
 
-prompt_categories = {
-    "doctest": doctest_prompt,
-}
-continuation_prompt_categories = {
-    "doctest": doctest_continuation_prompt,
-}
-available_prompts = set(prompt_categories.values()) | set(
-    continuation_prompt_categories.values()
-)
 
+def update_line_number(message, original_number):
+    line_re = re.compile(r"(?P<line_number>\d+):(?P<column_number>\d+):")
+    match = line_re.search(message)
+    if match:
+        line_number, column_number = map(int, match.groups())
+        new_line_number = line_number + original_number - 1
 
-def extract_prompt(line):
-    stripped = line.lstrip()
-    prompt_length = stripped.find(" ") + 1
+        message = line_re.sub(f"{new_line_number}:{column_number}:", message)
+        print(message, tuple(map(int, match.groups())))
+    return message
 
-    prompt = stripped[:prompt_length]
 
-    return prompt if prompt in available_prompts else None
+def line_numbers(lines):
+    yield from enumerate(lines, start=1)
 
 
-def remove_prompt(line, prompt):
-    if not line.startswith(prompt):
-        raise RuntimeError(
-            f"cannot remove prompt {prompt} from line: prompt not found", line
+def classify_lines(lines):
+    lines = more_itertools.peekable(lines)
+    while lines:
+        maybe_detected = (
+            (name, func(lines))
+            for name, func in detection_funcs.items()
+            if name != "none"
         )
-
-    without_prompt = line[len(prompt) :]
-    return without_prompt
-
-
-def add_prompt(line, prompt):
-    return prompt + line
-
-
-def remove_doctest_prompt(code_unit):
-    indentation_depth = code_unit.find(doctest_prompt)
-    code_unit = textwrap.dedent(code_unit)
-
-    # multiline unit
-    if "\n" in code_unit:
-        prompt_line, *continuation_lines = code_unit.split("\n")
-        removed = "\n".join(
-            [
-                remove_prompt(prompt_line, doctest_prompt),
-                *(
-                    remove_prompt(line, doctest_continuation_prompt)
-                    for line in continuation_lines
-                ),
-            ]
-        )
-    else:
-        removed = remove_prompt(code_unit, doctest_prompt)
-
-    return indentation_depth, removed
-
-
-def add_doctest_prompt(code_unit, indentation_depth):
-    if "\n" in code_unit:
-        prompt_line, *continuation_lines = code_unit.split("\n")
-        reformatted = "\n".join(
-            [
-                add_prompt(prompt_line, doctest_prompt),
-                *(
-                    add_prompt(line, doctest_continuation_prompt)
-                    for line in continuation_lines
-                ),
-            ]
-        )
-    else:
-        reformatted = add_prompt(code_unit, doctest_prompt)
-
-    return textwrap.indent(reformatted, " " * indentation_depth)
-
-
-extraction_funcs = {
-    "doctest": remove_doctest_prompt,
-}
-reformatting_funcs = {
-    "doctest": add_doctest_prompt,
-}
-
-
-def classify(lines):
-    """ classify lines by prompt type """
-    prompts = dict(zip(prompt_categories.values(), prompt_categories.keys()))
-    continuation_prompts = dict(
-        zip(
-            continuation_prompt_categories.values(),
-            continuation_prompt_categories.keys(),
-        )
-    )
-
-    for line in lines:
-        maybe_prompt = extract_prompt(line)
-        category = (
-            prompts.get(maybe_prompt, None)
-            or continuation_prompts.get(maybe_prompt, None)
-            or "none"
-        )
-
-        yield category, line
-
-
-def continuation_lines(lines, continuation_prompt):
-    # We can't use `itertools.takewhile` because it drops the first non-match
-    # Instead, we peek at the iterable and only remove the element if we take it
-    iterable = more_itertools.peekable(lines) if not hasattr(lines, "peek") else lines
-    while True:
-        try:
-            category, line = iterable.peek()
-        except StopIteration:
-            break
-
-        if extract_prompt(line) != continuation_prompt:
-            break
-
-        # consume the item
-        next(iterable)
-
-        yield line
-
-
-def group_code_units(labelled_lines):
-    """ group together code units """
-    # we need to make this peekable here since otherwise we lose an element
-    lines = more_itertools.peekable(labelled_lines)
-    while True:
-        try:
-            category, line = next(lines)
-        except StopIteration:
-            break
-
-        if category == "none":
-            unit = line
+        detected = {name: value for name, value in maybe_detected if value is not None}
+
+        if not detected:
+            yield detection_funcs["none"](lines)
+        elif len(detected) > 1:
+            raise RuntimeError(
+                f"cannot classify line: {', '.join(detected.values())} claim it: {lines.peek()}"
+            )
         else:
-            continuation_prompt = continuation_prompt_categories.get(category, None)
-            if continuation_prompt is None:
-                raise ValueError("unknown prompt category for grouping: {category}")
-            unit = "\n".join([line, *continuation_lines(lines, continuation_prompt)])
-        yield category, unit
+            yield more_itertools.one(detected.values())
 
 
-def blacken(labelled_lines, mode=None):
-    for category, line in labelled_lines:
+def blacken(lines, mode=None):
+    for original_line_range, category, line_unit in lines:
         if category == "none":
-            yield category, line
+            yield category, line_unit
             continue
 
-        # remove the prompt and save the indentation depth for later
-        converter = extraction_funcs.get(category, None)
-        if converter is None:
-            raise ValueError(f"unknown prompt category for extraction: {category}")
-        indentation_depth, code_unit = converter(line)
+        indentation_depth, prompt_length, code = extract_code(line_unit, category)
 
-        # update the line length
-        prompt_length = indentation_depth + len(prompt_categories[category])
         current_mode = black.FileMode() if mode is None else copy.copy(mode)
-        current_mode.line_length -= prompt_length
+        current_mode.line_length -= indentation_depth + prompt_length
+
+        try:
+            blackened = black.format_str(code, mode=current_mode).rstrip()
+        except TokenError as e:
+            apparent_line_num, column = e.args[1]
+            message = e.args[0]
+            lineno = original_line_range[0] + (apparent_line_num - 1)
+            faulty_line = code.split("\n")[(apparent_line_num - 1) - 1]
 
-        # blacken the code
-        blackened = black.format_str(code_unit, mode=current_mode).rstrip()
+            raise black.InvalidInput(f"{lineno}:{column}: {message}: {faulty_line}")
+        except black.InvalidInput as e:
+            message = update_line_number(str(e), original_line_range[0])
+            raise black.InvalidInput(message)
 
-        # add the prompt and reindent
-        converter = reformatting_funcs.get(category, None)
-        if converter is None:
-            raise ValueError(f"unknown prompt category for reformatting: {category}")
+        reformatted = reformat_code(blackened, category, indentation_depth)
 
-        reformatted = converter(blackened, indentation_depth)
         yield category, reformatted
 
 
@@ -184,17 +79,9 @@ def unclassify(labelled_lines):
 
 
 def format_lines(lines, mode=None):
-    labeled = classify(lines)
-    grouped = group_code_units(labeled)
-    blackened = blacken(grouped, mode=mode)
-
-    return unclassify(blackened)
-
+    numbered = line_numbers(lines)
 
-def format_file(path):
-    with open(path) as f:
-        return "\n".join(format_lines(line.rstrip() for line in f)) + "\n"
+    labeled = classify_lines(numbered)
+    blackened = blacken(labeled, mode=mode)
 
-
-def format_text(text):
-    return "\n".join(format_lines(text.split("\n")))
+    return unclassify(blackened)
diff --git a/blackdoc/__main__.py b/blackdoc/__main__.py
index 94dd5d4..f5fbaf2 100644
--- a/blackdoc/__main__.py
+++ b/blackdoc/__main__.py
@@ -3,7 +3,6 @@
 import sys
 
 import black
-from blib2to3.pgen2.tokenize import TokenError
 
 from . import format_lines
 
@@ -40,7 +39,7 @@ def format_and_overwrite(path, mode):
 
         with open(path, "w", encoding=encoding, newline=newline) as f:
             f.write(new_content)
-    except TokenError as e:
+    except black.InvalidInput as e:
         print(f"error: cannot format {path.absolute()}: {e}")
         result = "error"
 
@@ -61,7 +60,7 @@ def format_and_check(path, mode):
         else:
             print(f"would reformat {path}")
             result = "reformatted"
-    except TokenError as e:
+    except black.InvalidInput as e:
         print(f"error: cannot format {path.absolute()}: {e}")
         result = "error"
 

From 4d70da9a25e3a72766e0be476c66b6e07c7450d9 Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Mon, 16 Mar 2020 17:59:50 +0100
Subject: [PATCH 14/24] move the functions into different modules

---
 blackdoc/__init__.py       | 78 ++------------------------------------
 blackdoc/blacken.py        | 47 +++++++++++++++++++++++
 blackdoc/classification.py | 28 ++++++++++++++
 3 files changed, 78 insertions(+), 75 deletions(-)
 create mode 100644 blackdoc/blacken.py
 create mode 100644 blackdoc/classification.py

diff --git a/blackdoc/__init__.py b/blackdoc/__init__.py
index 4696c1c..a3daa90 100644
--- a/blackdoc/__init__.py
+++ b/blackdoc/__init__.py
@@ -1,87 +1,15 @@
-import copy
-import re
-
-import black
-import more_itertools
-from blib2to3.pgen2.tokenize import TokenError
-
-from .formats import detection_funcs, extract_code, reformat_code
-
-
-def update_line_number(message, original_number):
-    line_re = re.compile(r"(?P<line_number>\d+):(?P<column_number>\d+):")
-    match = line_re.search(message)
-    if match:
-        line_number, column_number = map(int, match.groups())
-        new_line_number = line_number + original_number - 1
-
-        message = line_re.sub(f"{new_line_number}:{column_number}:", message)
-        print(message, tuple(map(int, match.groups())))
-    return message
+from .blacken import blacken
+from .classification import classify, unclassify
 
 
 def line_numbers(lines):
     yield from enumerate(lines, start=1)
 
 
-def classify_lines(lines):
-    lines = more_itertools.peekable(lines)
-    while lines:
-        maybe_detected = (
-            (name, func(lines))
-            for name, func in detection_funcs.items()
-            if name != "none"
-        )
-        detected = {name: value for name, value in maybe_detected if value is not None}
-
-        if not detected:
-            yield detection_funcs["none"](lines)
-        elif len(detected) > 1:
-            raise RuntimeError(
-                f"cannot classify line: {', '.join(detected.values())} claim it: {lines.peek()}"
-            )
-        else:
-            yield more_itertools.one(detected.values())
-
-
-def blacken(lines, mode=None):
-    for original_line_range, category, line_unit in lines:
-        if category == "none":
-            yield category, line_unit
-            continue
-
-        indentation_depth, prompt_length, code = extract_code(line_unit, category)
-
-        current_mode = black.FileMode() if mode is None else copy.copy(mode)
-        current_mode.line_length -= indentation_depth + prompt_length
-
-        try:
-            blackened = black.format_str(code, mode=current_mode).rstrip()
-        except TokenError as e:
-            apparent_line_num, column = e.args[1]
-            message = e.args[0]
-            lineno = original_line_range[0] + (apparent_line_num - 1)
-            faulty_line = code.split("\n")[(apparent_line_num - 1) - 1]
-
-            raise black.InvalidInput(f"{lineno}:{column}: {message}: {faulty_line}")
-        except black.InvalidInput as e:
-            message = update_line_number(str(e), original_line_range[0])
-            raise black.InvalidInput(message)
-
-        reformatted = reformat_code(blackened, category, indentation_depth)
-
-        yield category, reformatted
-
-
-def unclassify(labelled_lines):
-    for _, line in labelled_lines:
-        yield line
-
-
 def format_lines(lines, mode=None):
     numbered = line_numbers(lines)
 
-    labeled = classify_lines(numbered)
+    labeled = classify(numbered)
     blackened = blacken(labeled, mode=mode)
 
     return unclassify(blackened)
diff --git a/blackdoc/blacken.py b/blackdoc/blacken.py
new file mode 100644
index 0000000..64af1bf
--- /dev/null
+++ b/blackdoc/blacken.py
@@ -0,0 +1,47 @@
+import copy
+import re
+
+import black
+from blib2to3.pgen2.tokenize import TokenError
+
+from .formats import extract_code, reformat_code
+
+
+def update_line_number(message, original_number):
+    line_re = re.compile(r"(?P<line_number>\d+):(?P<column_number>\d+):")
+    match = line_re.search(message)
+    if match:
+        line_number, column_number = map(int, match.groups())
+        new_line_number = line_number + original_number - 1
+
+        message = line_re.sub(f"{new_line_number}:{column_number}:", message)
+    return message
+
+
+def blacken(lines, mode=None):
+    for original_line_range, category, line_unit in lines:
+        if category == "none":
+            yield category, line_unit
+            continue
+
+        indentation_depth, prompt_length, code = extract_code(line_unit, category)
+
+        current_mode = black.FileMode() if mode is None else copy.copy(mode)
+        current_mode.line_length -= indentation_depth + prompt_length
+
+        try:
+            blackened = black.format_str(code, mode=current_mode).rstrip()
+        except TokenError as e:
+            apparent_line_num, column = e.args[1]
+            message = e.args[0]
+            lineno = original_line_range[0] + (apparent_line_num - 1)
+            faulty_line = code.split("\n")[(apparent_line_num - 1) - 1]
+
+            raise black.InvalidInput(f"{lineno}:{column}: {message}: {faulty_line}")
+        except black.InvalidInput as e:
+            message = update_line_number(str(e), original_line_range[0])
+            raise black.InvalidInput(message)
+
+        reformatted = reformat_code(blackened, category, indentation_depth)
+
+        yield category, reformatted
diff --git a/blackdoc/classification.py b/blackdoc/classification.py
new file mode 100644
index 0000000..19b06b6
--- /dev/null
+++ b/blackdoc/classification.py
@@ -0,0 +1,28 @@
+import more_itertools
+
+from .formats import detection_funcs
+
+
+def classify(lines):
+    lines = more_itertools.peekable(lines)
+    while lines:
+        maybe_detected = (
+            (name, func(lines))
+            for name, func in detection_funcs.items()
+            if name != "none"
+        )
+        detected = {name: value for name, value in maybe_detected if value is not None}
+
+        if not detected:
+            yield detection_funcs["none"](lines)
+        elif len(detected) > 1:
+            raise RuntimeError(
+                f"cannot classify line: {', '.join(detected.values())} claim it: {lines.peek()}"
+            )
+        else:
+            yield more_itertools.one(detected.values())
+
+
+def unclassify(labelled_lines):
+    for _, line in labelled_lines:
+        yield line

From 535325af4628f54e2ac096d02a7975a23ab0e1c5 Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Mon, 16 Mar 2020 18:00:26 +0100
Subject: [PATCH 15/24] import the register function into the main package

---
 blackdoc/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/blackdoc/__init__.py b/blackdoc/__init__.py
index a3daa90..ccd3dca 100644
--- a/blackdoc/__init__.py
+++ b/blackdoc/__init__.py
@@ -1,5 +1,6 @@
 from .blacken import blacken
 from .classification import classify, unclassify
+from .formats import register_format  # noqa
 
 
 def line_numbers(lines):

From 913d2fd1db525e5223458934a15e5b7b65bee630 Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Mon, 16 Mar 2020 18:22:22 +0100
Subject: [PATCH 16/24] add tests for the classification function

---
 blackdoc/tests/data.py                | 20 +++++++++
 blackdoc/tests/test_classification.py | 60 +++++++++++++++++++++++++++
 2 files changed, 80 insertions(+)
 create mode 100644 blackdoc/tests/test_classification.py

diff --git a/blackdoc/tests/data.py b/blackdoc/tests/data.py
index 76b7d05..5db8fe6 100644
--- a/blackdoc/tests/data.py
+++ b/blackdoc/tests/data.py
@@ -16,3 +16,23 @@
     False
 """
 lines = docstring.split("\n")
+code_units = (1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1)
+line_labels = (
+    "none",
+    "none",
+    "none",
+    "none",
+    "doctest",
+    "doctest",
+    "doctest",
+    "doctest",
+    "doctest",
+    "none",
+    "none",
+    "none",
+    "none",
+    "none",
+    "doctest",
+    "none",
+    "none",
+)
diff --git a/blackdoc/tests/test_classification.py b/blackdoc/tests/test_classification.py
new file mode 100644
index 0000000..6d74bf6
--- /dev/null
+++ b/blackdoc/tests/test_classification.py
@@ -0,0 +1,60 @@
+import more_itertools
+
+from blackdoc import classification
+
+from . import data
+
+
+def print_line_with_range(name, range_, unit):
+    min_, max_ = range_
+    line_numbers = range(min_, max_)
+
+    no_group = " "
+    start_group = "┐"
+    mid_group = "│"
+    end_group = "┘"
+
+    for index, (lineno, line) in enumerate(zip(line_numbers, unit.split("\n"))):
+        if max_ - min_ == 1:
+            classifier = no_group
+        elif index == 0:
+            classifier = start_group
+        elif index == max_ - min_ - 1:
+            classifier = end_group
+        else:
+            classifier = mid_group
+
+        print(f"{name:>8s} {classifier} → {index:02d}: {line}")
+
+
+def print_classification(labeled):
+    for range, name, unit in labeled:
+        print_line_with_range(name, range, unit)
+
+
+def test_classify():
+    lines = enumerate(data.lines, start=1)
+
+    classified = tuple(classification.classify(lines))
+
+    print_classification(classified)
+
+    actual = tuple(max_ - min_ for (min_, max_), _, _ in classified)
+    expected = data.code_units
+    assert expected == actual
+
+    actual = tuple(
+        more_itertools.collapse(
+            [name] * len(lines.split("\n")) for _, name, lines in classified
+        )
+    )
+    expected = data.line_labels
+    assert expected == actual
+
+
+def test_unclassify():
+    labeled = tuple(zip(data.line_labels, data.lines))
+    actual = tuple(classification.unclassify(labeled))
+    expected = tuple(data.lines)
+
+    assert expected == actual

From 2f12c5766588df19727cc7739e30e0318038096a Mon Sep 17 00:00:00 2001
From: keewis <keewis@users.noreply.github.com>
Date: Tue, 17 Mar 2020 00:11:31 +0100
Subject: [PATCH 17/24] Apply suggestions from code review

---
 blackdoc/formats/doctest.py    | 2 +-
 blackdoc/tests/test_doctest.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/blackdoc/formats/doctest.py b/blackdoc/formats/doctest.py
index 2736eeb..9505c9f 100644
--- a/blackdoc/formats/doctest.py
+++ b/blackdoc/formats/doctest.py
@@ -38,7 +38,7 @@ def detection_func(lines):
     line_numbers, lines = map(tuple, more_itertools.unzip(detected_lines))
 
     line_range = min(line_numbers), max(line_numbers) + 1
-    if set(line_numbers) != set(range(line_range[0], line_range[1])):
+    if line_numbers != tuple(range(line_range[0], line_range[1])):
         raise RuntimeError("line numbers are not contiguous")
 
     return line_range, name, "\n".join(lines)
diff --git a/blackdoc/tests/test_doctest.py b/blackdoc/tests/test_doctest.py
index 0df07db..b2d884a 100644
--- a/blackdoc/tests/test_doctest.py
+++ b/blackdoc/tests/test_doctest.py
@@ -11,7 +11,7 @@
 @pytest.mark.parametrize(
     "lines,expected",
     (
-        pytest.param(lines[0], None, id="no_doctest"),
+        pytest.param(lines[0], None, id="no_line"),
         pytest.param(lines[8], ((1, 2), doctest.name, lines[8]), id="single_line"),
         pytest.param(
             lines[4:8],

From d7b3cb2a6c51aceed96eec7c6033a99c5844a1f2 Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Tue, 17 Mar 2020 01:10:26 +0100
Subject: [PATCH 18/24] use consume(iterable, n=1) instead of next

---
 blackdoc/formats/doctest.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/blackdoc/formats/doctest.py b/blackdoc/formats/doctest.py
index 9505c9f..60d735e 100644
--- a/blackdoc/formats/doctest.py
+++ b/blackdoc/formats/doctest.py
@@ -18,8 +18,9 @@ def continuation_lines(lines):
         if not line.lstrip().startswith(continuation_prompt):
             break
 
-        # consume the line
-        next(lines)
+        # actually consume the item
+        more_itertools.consume(lines, n=1)
+
         yield line_number, line
 
 

From 71c77f288472d239fa9cb2781c3f48879e3771b2 Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Tue, 17 Mar 2020 01:12:57 +0100
Subject: [PATCH 19/24] shorten the error message a bit

---
 blackdoc/classification.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/blackdoc/classification.py b/blackdoc/classification.py
index 19b06b6..863e957 100644
--- a/blackdoc/classification.py
+++ b/blackdoc/classification.py
@@ -16,8 +16,9 @@ def classify(lines):
         if not detected:
             yield detection_funcs["none"](lines)
         elif len(detected) > 1:
+            formatted_match_names = ", ".join(sorted(detected.keys()))
             raise RuntimeError(
-                f"cannot classify line: {', '.join(detected.values())} claim it: {lines.peek()}"
+                f"cannot classify line: {formatted_match_names} claim it: {lines.peek()}"
             )
         else:
             yield more_itertools.one(detected.values())

From 4c8f09a58a86d99619b6f8afeb2ce76624a28594 Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Tue, 17 Mar 2020 15:51:11 +0100
Subject: [PATCH 20/24] remove the obsolete blackdoc test file

---
 blackdoc/tests/test_blackdoc.py | 122 --------------------------------
 1 file changed, 122 deletions(-)
 delete mode 100644 blackdoc/tests/test_blackdoc.py

diff --git a/blackdoc/tests/test_blackdoc.py b/blackdoc/tests/test_blackdoc.py
deleted file mode 100644
index 286bab9..0000000
--- a/blackdoc/tests/test_blackdoc.py
+++ /dev/null
@@ -1,122 +0,0 @@
-import more_itertools
-
-import blackdoc
-
-raw_docstring = """ a function to open files
-
-    with a very long description
-
-    >>> file = open(
-    ...     "very_long_filepath",
-    ...     mode="a",
-    ... )
-    >>> file
-    <_io.TextIOWrapper name='very_long_filepath' mode='w' encoding='UTF-8'>
-
-    text after the first example, spanning
-    multiple lines
-
-    >>> file.closed
-    False
-"""
-line_labels = (
-    "none",
-    "none",
-    "none",
-    "none",
-    "doctest",
-    "doctest",
-    "doctest",
-    "doctest",
-    "doctest",
-    "none",
-    "none",
-    "none",
-    "none",
-    "none",
-    "doctest",
-    "none",
-    "none",
-)
-code_units = (1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1)
-docstring = """ a function to open files
-
-    with a very long description
-
-    >>> file = open("very_long_filepath", mode="a",)
-    >>> file
-    <_io.TextIOWrapper name='very_long_filepath' mode='w' encoding='UTF-8'>
-
-    text after the first example, spanning
-    multiple lines
-
-    >>> file.closed
-    False
-"""
-
-prompts = (
-    None,
-    None,
-    None,
-    None,
-    ">>> ",
-    "... ",
-    "... ",
-    "... ",
-    ">>> ",
-    None,
-    None,
-    None,
-    None,
-    None,
-    ">>> ",
-    None,
-    None,
-)
-
-
-def test_extract_prompt():
-    extracted = tuple(
-        blackdoc.extract_prompt(line) for line in raw_docstring.split("\n")
-    )
-    assert extracted == prompts
-
-
-def test_classify():
-    categories, _ = more_itertools.unzip(blackdoc.classify(raw_docstring.split("\n")))
-
-    assert tuple(categories) == line_labels
-
-
-def test_unclassify():
-    labelled_lines = zip(line_labels, raw_docstring.split("\n"))
-    lines = blackdoc.unclassify(labelled_lines)
-
-    assert "\n".join(lines) == raw_docstring
-
-
-def test_group_code_units():
-    labelled_lines = list(zip(line_labels, raw_docstring.split("\n")))
-    grouped = list(blackdoc.group_code_units(labelled_lines))
-
-    assert tuple(len(unit.split("\n")) for _, unit in grouped) == code_units
-
-
-def test_blacken():
-    def join(group):
-        if len(group) == 1:
-            return group
-
-        categories, lines = more_itertools.unzip(group)
-        return more_itertools.first(categories), "\n".join(lines)
-
-    labelled_lines = zip(line_labels, raw_docstring.split("\n"))
-    grouped = (
-        tuple(more_itertools.collapse(join(group)))
-        for group in more_itertools.split_into(labelled_lines, code_units)
-    )
-
-    formatted = blackdoc.blacken(grouped)
-    formatted_docstring = "\n".join(unit for _, unit in formatted)
-
-    assert formatted_docstring == docstring

From a341470dd07fe1a6970af8023a55df6e6c4936a2 Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Tue, 17 Mar 2020 15:51:39 +0100
Subject: [PATCH 21/24] rename classify to detect_format

---
 blackdoc/__init__.py                  |  4 ++--
 blackdoc/classification.py            |  5 +++--
 blackdoc/tests/test_classification.py | 10 +++++-----
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/blackdoc/__init__.py b/blackdoc/__init__.py
index ccd3dca..d7a2fb3 100644
--- a/blackdoc/__init__.py
+++ b/blackdoc/__init__.py
@@ -1,5 +1,5 @@
 from .blacken import blacken
-from .classification import classify, unclassify
+from .classification import detect_format, unclassify
 from .formats import register_format  # noqa
 
 
@@ -10,7 +10,7 @@ def line_numbers(lines):
 def format_lines(lines, mode=None):
     numbered = line_numbers(lines)
 
-    labeled = classify(numbered)
+    labeled = detect_format(numbered)
     blackened = blacken(labeled, mode=mode)
 
     return unclassify(blackened)
diff --git a/blackdoc/classification.py b/blackdoc/classification.py
index 863e957..b1de12e 100644
--- a/blackdoc/classification.py
+++ b/blackdoc/classification.py
@@ -3,7 +3,7 @@
 from .formats import detection_funcs
 
 
-def classify(lines):
+def detect_format(lines):
     lines = more_itertools.peekable(lines)
     while lines:
         maybe_detected = (
@@ -18,7 +18,8 @@ def classify(lines):
         elif len(detected) > 1:
             formatted_match_names = ", ".join(sorted(detected.keys()))
             raise RuntimeError(
-                f"cannot classify line: {formatted_match_names} claim it: {lines.peek()}"
+                "cannot detect code format for line:"
+                f" it is claimed by {formatted_match_names}: {lines.peek()}"
             )
         else:
             yield more_itertools.one(detected.values())
diff --git a/blackdoc/tests/test_classification.py b/blackdoc/tests/test_classification.py
index 6d74bf6..35b226e 100644
--- a/blackdoc/tests/test_classification.py
+++ b/blackdoc/tests/test_classification.py
@@ -32,20 +32,20 @@ def print_classification(labeled):
         print_line_with_range(name, range, unit)
 
 
-def test_classify():
+def test_detect_format():
     lines = enumerate(data.lines, start=1)
 
-    classified = tuple(classification.classify(lines))
+    labeled = tuple(classification.detect_format(lines))
 
-    print_classification(classified)
+    print_classification(labeled)
 
-    actual = tuple(max_ - min_ for (min_, max_), _, _ in classified)
+    actual = tuple(max_ - min_ for (min_, max_), _, _ in labeled)
     expected = data.code_units
     assert expected == actual
 
     actual = tuple(
         more_itertools.collapse(
-            [name] * len(lines.split("\n")) for _, name, lines in classified
+            [name] * len(lines.split("\n")) for _, name, lines in labeled
         )
     )
     expected = data.line_labels

From 6cb15c2efc999710172ff1334dde14c0289da63f Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Tue, 17 Mar 2020 15:53:35 +0100
Subject: [PATCH 22/24] remove unclassify

---
 blackdoc/__init__.py                  | 4 ++--
 blackdoc/blacken.py                   | 4 ++--
 blackdoc/classification.py            | 5 -----
 blackdoc/tests/test_classification.py | 8 --------
 4 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/blackdoc/__init__.py b/blackdoc/__init__.py
index d7a2fb3..74ecaa7 100644
--- a/blackdoc/__init__.py
+++ b/blackdoc/__init__.py
@@ -1,5 +1,5 @@
 from .blacken import blacken
-from .classification import detect_format, unclassify
+from .classification import detect_format
 from .formats import register_format  # noqa
 
 
@@ -13,4 +13,4 @@ def format_lines(lines, mode=None):
     labeled = detect_format(numbered)
     blackened = blacken(labeled, mode=mode)
 
-    return unclassify(blackened)
+    return blackened
diff --git a/blackdoc/blacken.py b/blackdoc/blacken.py
index 64af1bf..aaecc3d 100644
--- a/blackdoc/blacken.py
+++ b/blackdoc/blacken.py
@@ -21,7 +21,7 @@ def update_line_number(message, original_number):
 def blacken(lines, mode=None):
     for original_line_range, category, line_unit in lines:
         if category == "none":
-            yield category, line_unit
+            yield line_unit
             continue
 
         indentation_depth, prompt_length, code = extract_code(line_unit, category)
@@ -44,4 +44,4 @@ def blacken(lines, mode=None):
 
         reformatted = reformat_code(blackened, category, indentation_depth)
 
-        yield category, reformatted
+        yield reformatted
diff --git a/blackdoc/classification.py b/blackdoc/classification.py
index b1de12e..715006a 100644
--- a/blackdoc/classification.py
+++ b/blackdoc/classification.py
@@ -23,8 +23,3 @@ def detect_format(lines):
             )
         else:
             yield more_itertools.one(detected.values())
-
-
-def unclassify(labelled_lines):
-    for _, line in labelled_lines:
-        yield line
diff --git a/blackdoc/tests/test_classification.py b/blackdoc/tests/test_classification.py
index 35b226e..4478743 100644
--- a/blackdoc/tests/test_classification.py
+++ b/blackdoc/tests/test_classification.py
@@ -50,11 +50,3 @@ def test_detect_format():
     )
     expected = data.line_labels
     assert expected == actual
-
-
-def test_unclassify():
-    labeled = tuple(zip(data.line_labels, data.lines))
-    actual = tuple(classification.unclassify(labeled))
-    expected = tuple(data.lines)
-
-    assert expected == actual

From b7c80f93bf36a4d939d7f9b76bdb0f3819d27afe Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Tue, 17 Mar 2020 16:21:40 +0100
Subject: [PATCH 23/24] rewrite the message updating function to only parse
 messages

---
 blackdoc/blacken.py            | 45 ++++++++++++++++++++++------------
 blackdoc/tests/test_blacken.py | 18 ++++++++++++++
 2 files changed, 48 insertions(+), 15 deletions(-)
 create mode 100644 blackdoc/tests/test_blacken.py

diff --git a/blackdoc/blacken.py b/blackdoc/blacken.py
index aaecc3d..2ee9cb9 100644
--- a/blackdoc/blacken.py
+++ b/blackdoc/blacken.py
@@ -7,15 +7,24 @@
 from .formats import extract_code, reformat_code
 
 
-def update_line_number(message, original_number):
-    line_re = re.compile(r"(?P<line_number>\d+):(?P<column_number>\d+):")
-    match = line_re.search(message)
-    if match:
-        line_number, column_number = map(int, match.groups())
-        new_line_number = line_number + original_number - 1
+def parse_message(message):
+    line_re = re.compile(
+        r"^(?P<message>[^:]+): (?P<line_number>\d+):"
+        r"(?P<column_number>\d+): (?P<faulty_line>.+)$"
+    )
 
-        message = line_re.sub(f"{new_line_number}:{column_number}:", message)
-    return message
+    types = {
+        "message": str,
+        "line_number": int,
+        "column_number": int,
+        "faulty_line": str,
+    }
+
+    match = line_re.match(message)
+    if match is None:
+        raise ValueError(f"invalid error message: {message}")
+
+    return tuple(types[key](value) for key, value in match.groupdict().items())
 
 
 def blacken(lines, mode=None):
@@ -29,18 +38,24 @@ def blacken(lines, mode=None):
         current_mode = black.FileMode() if mode is None else copy.copy(mode)
         current_mode.line_length -= indentation_depth + prompt_length
 
+        original_line_number, _ = original_line_range
+
         try:
             blackened = black.format_str(code, mode=current_mode).rstrip()
         except TokenError as e:
-            apparent_line_num, column = e.args[1]
-            message = e.args[0]
-            lineno = original_line_range[0] + (apparent_line_num - 1)
-            faulty_line = code.split("\n")[(apparent_line_num - 1) - 1]
+            message, (apparent_line_number, column) = e.args
 
-            raise black.InvalidInput(f"{lineno}:{column}: {message}: {faulty_line}")
+            lineno = original_line_number + (apparent_line_number - 1)
+            faulty_line = code.split("\n")[(apparent_line_number - 1) - 1]
+
+            raise black.InvalidInput(
+                f"Cannot parse: {lineno}:{column}: {message}: {faulty_line}"
+            )
         except black.InvalidInput as e:
-            message = update_line_number(str(e), original_line_range[0])
-            raise black.InvalidInput(message)
+            message, apparent_line_number, column, faulty_line = parse_message(str(e))
+
+            lineno = original_line_number + (apparent_line_number - 1)
+            raise black.InvalidInput(f"{message}: {lineno}:{column}: {faulty_line}")
 
         reformatted = reformat_code(blackened, category, indentation_depth)
 
diff --git a/blackdoc/tests/test_blacken.py b/blackdoc/tests/test_blacken.py
new file mode 100644
index 0000000..f029ebc
--- /dev/null
+++ b/blackdoc/tests/test_blacken.py
@@ -0,0 +1,18 @@
+import pytest
+
+from blackdoc.blacken import parse_message
+
+
+@pytest.mark.parametrize(
+    "message,expected",
+    (
+        pytest.param(
+            'Cannot parse: 16:10: with new_open("abc) as f:',
+            ("Cannot parse", 16, 10, 'with new_open("abc) as f:'),
+            id="simple_message",
+        ),
+    ),
+)
+def test_parse_message(message, expected):
+    actual = parse_message(message)
+    assert expected == actual

From 685eceaafc9b2aac5aa581fc7c9d1ae57a0f75b2 Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Tue, 17 Mar 2020 16:25:31 +0100
Subject: [PATCH 24/24] rename category to code_format

---
 blackdoc/blacken.py          |  8 ++++----
 blackdoc/formats/__init__.py | 12 ++++++------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/blackdoc/blacken.py b/blackdoc/blacken.py
index 2ee9cb9..4a18bfe 100644
--- a/blackdoc/blacken.py
+++ b/blackdoc/blacken.py
@@ -28,12 +28,12 @@ def parse_message(message):
 
 
 def blacken(lines, mode=None):
-    for original_line_range, category, line_unit in lines:
-        if category == "none":
+    for original_line_range, code_format, line_unit in lines:
+        if code_format == "none":
             yield line_unit
             continue
 
-        indentation_depth, prompt_length, code = extract_code(line_unit, category)
+        indentation_depth, prompt_length, code = extract_code(line_unit, code_format)
 
         current_mode = black.FileMode() if mode is None else copy.copy(mode)
         current_mode.line_length -= indentation_depth + prompt_length
@@ -57,6 +57,6 @@ def blacken(lines, mode=None):
             lineno = original_line_number + (apparent_line_number - 1)
             raise black.InvalidInput(f"{message}: {lineno}:{column}: {faulty_line}")
 
-        reformatted = reformat_code(blackened, category, indentation_depth)
+        reformatted = reformat_code(blackened, code_format, indentation_depth)
 
         yield reformatted
diff --git a/blackdoc/formats/__init__.py b/blackdoc/formats/__init__.py
index 196b422..90f535d 100644
--- a/blackdoc/formats/__init__.py
+++ b/blackdoc/formats/__init__.py
@@ -5,22 +5,22 @@
 from .register import extraction_funcs, reformatting_funcs, register_format
 
 
-def extract_code(line_unit, category):
+def extract_code(line_unit, code_format):
     dedented = textwrap.dedent(line_unit)
     indentation_depth = line_unit.find(dedented[:5])
 
-    func = extraction_funcs.get(category, None)
+    func = extraction_funcs.get(code_format, None)
     if func is None:
-        raise RuntimeError(f"unknown code format: {category}")
+        raise RuntimeError(f"unknown code format: {code_format}")
 
     prompt_length, extracted = func(dedented)
     return indentation_depth, prompt_length, extracted
 
 
-def reformat_code(line_unit, category, indentation_depth):
-    func = reformatting_funcs.get(category, None)
+def reformat_code(line_unit, code_format, indentation_depth):
+    func = reformatting_funcs.get(code_format, None)
     if func is None:
-        raise RuntimeError(f"unknown code format: {category}")
+        raise RuntimeError(f"unknown code format: {code_format}")
 
     reformatted = func(line_unit)