From b65c70fb5ab9b8d2683257cfc55bfa176e3f5ff5 Mon Sep 17 00:00:00 2001 From: Keewis Date: Wed, 11 Mar 2020 13:13:05 +0100 Subject: [PATCH 01/24] convert the script and tests into a package with a __main__ and a tests module --- blackdoc.py | 450 ------------------ blackdoc/__init__.py | 200 ++++++++ blackdoc/__main__.py | 252 ++++++++++ .../tests/test_blackdoc.py | 0 4 files changed, 452 insertions(+), 450 deletions(-) delete mode 100644 blackdoc.py create mode 100644 blackdoc/__init__.py create mode 100644 blackdoc/__main__.py rename test_blackdoc.py => blackdoc/tests/test_blackdoc.py (100%) diff --git a/blackdoc.py b/blackdoc.py deleted file mode 100644 index 144e7bc..0000000 --- a/blackdoc.py +++ /dev/null @@ -1,450 +0,0 @@ -import copy -import pathlib -import sys -import textwrap - -import black -import more_itertools - -doctest_prompt = ">>> " -doctest_continuation_prompt = "... " - -prompt_categories = { - "doctest": doctest_prompt, -} -continuation_prompt_categories = { - "doctest": doctest_continuation_prompt, -} -available_prompts = set(prompt_categories.values()) | set( - continuation_prompt_categories.values() -) - - -def extract_prompt(line): - stripped = line.lstrip() - prompt_length = stripped.find(" ") + 1 - - prompt = stripped[:prompt_length] - - return prompt if prompt in available_prompts else None - - -def remove_prompt(line, prompt): - if not line.startswith(prompt): - raise RuntimeError( - f"cannot remove prompt {prompt} from line: prompt not found", line - ) - - without_prompt = line[len(prompt) :] - return without_prompt - - -def add_prompt(line, prompt): - return prompt + line - - -def remove_doctest_prompt(code_unit): - indentation_depth = code_unit.find(doctest_prompt) - code_unit = textwrap.dedent(code_unit) - - # multiline unit - if "\n" in code_unit: - prompt_line, *continuation_lines = code_unit.split("\n") - removed = "\n".join( - [ - remove_prompt(prompt_line, doctest_prompt), - *( - remove_prompt(line, doctest_continuation_prompt) - for line in continuation_lines - ), - ] - ) - else: - removed = remove_prompt(code_unit, doctest_prompt) - - return indentation_depth, removed - - -def add_doctest_prompt(code_unit, indentation_depth): - if "\n" in code_unit: - prompt_line, *continuation_lines = code_unit.split("\n") - reformatted = "\n".join( - [ - add_prompt(prompt_line, doctest_prompt), - *( - add_prompt(line, doctest_continuation_prompt) - for line in continuation_lines - ), - ] - ) - else: - reformatted = add_prompt(code_unit, doctest_prompt) - - return textwrap.indent(reformatted, " " * indentation_depth) - - -extraction_funcs = { - "doctest": remove_doctest_prompt, -} -reformatting_funcs = { - "doctest": add_doctest_prompt, -} - - -def classify(lines): - """ classify lines by prompt type """ - prompts = dict(zip(prompt_categories.values(), prompt_categories.keys())) - continuation_prompts = dict( - zip( - continuation_prompt_categories.values(), - continuation_prompt_categories.keys(), - ) - ) - - for line in lines: - maybe_prompt = extract_prompt(line) - category = ( - prompts.get(maybe_prompt, None) - or continuation_prompts.get(maybe_prompt, None) - or "none" - ) - - yield category, line - - -def continuation_lines(lines, continuation_prompt): - # We can't use `itertools.takewhile` because it drops the first non-match - # Instead, we peek at the iterable and only remove the element if we take it - iterable = more_itertools.peekable(lines) if not hasattr(lines, "peek") else lines - while True: - try: - category, line = iterable.peek() - except StopIteration: - break - - if extract_prompt(line) != continuation_prompt: - break - - # consume the item - next(iterable) - - yield line - - -def group_code_units(labelled_lines): - """ group together code units """ - # we need to make this peekable here since otherwise we lose an element - lines = more_itertools.peekable(labelled_lines) - while True: - try: - category, line = next(lines) - except StopIteration: - break - - if category == "none": - unit = line - else: - continuation_prompt = continuation_prompt_categories.get(category, None) - if continuation_prompt is None: - raise ValueError("unknown prompt category for grouping: {category}") - unit = "\n".join([line, *continuation_lines(lines, continuation_prompt)]) - yield category, unit - - -def blacken(labelled_lines, mode=None): - for category, line in labelled_lines: - if category == "none": - yield category, line - continue - - # remove the prompt and save the indentation depth for later - converter = extraction_funcs.get(category, None) - if converter is None: - raise ValueError(f"unknown prompt category for extraction: {category}") - indentation_depth, code_unit = converter(line) - - # update the line length - prompt_length = indentation_depth + len(prompt_categories[category]) - current_mode = black.FileMode() if mode is None else copy.copy(mode) - current_mode.line_length -= prompt_length - - # blacken the code - blackened = black.format_str(code_unit, mode=current_mode).rstrip() - - # add the prompt and reindent - converter = reformatting_funcs.get(category, None) - if converter is None: - raise ValueError(f"unknown prompt category for reformatting: {category}") - - reformatted = converter(blackened, indentation_depth) - yield category, reformatted - - -def unclassify(labelled_lines): - for _, line in labelled_lines: - yield line - - -def format_lines(lines, mode=None): - labeled = classify(lines) - grouped = group_code_units(labeled) - blackened = blacken(grouped, mode=mode) - - return unclassify(blackened) - - -def format_file(path): - with open(path) as f: - return "\n".join(format_lines(line.rstrip() for line in f)) + "\n" - - -def format_text(text): - return "\n".join(format_lines(text.split("\n"))) - - -def collect_files(src, include, exclude): - root = black.find_project_root(tuple(src)) - report = black.Report() - - for path in src: - if path.is_dir(): - yield from black.gen_python_files_in_dir( - path, root, include, exclude, report, black.get_gitignore(root), - ) - elif path.is_file() or str(path) == "-": - yield path - else: - print(f"invalid path: {path}", file=sys.stderr) - - -def format_and_overwrite(path, mode): - try: - with open(path, mode="rb") as f: - content, encoding, newline = black.decode_bytes(f.read()) - - lines = content.split("\n") - - new_content = "\n".join(format_lines(lines, mode)) - - if new_content == content: - result = "unchanged" - else: - print(f"reformatted {path}") - result = "reformatted" - - with open(path, "w", encoding=encoding, newline=newline) as f: - f.write(new_content) - except Exception as e: - print(f"error: cannot format {path.absolute()}: {e}") - result = "error" - - return result - - -def format_and_check(path, mode): - try: - with open(path, mode="rb") as f: - content, _, _ = black.decode_bytes(f.read()) - - lines = content.split("\n") - - new_content = "\n".join(format_lines(lines, mode)) - - if new_content == content: - result = "unchanged" - else: - print(f"would reformat {path}") - result = "reformatted" - except Exception as e: - print(f"error: cannot format {path.absolute()}: {e}") - result = "error" - - return result - - -def report_changes(n_reformatted, n_unchanged, n_error): - def noun(n): - return "file" if n < 2 else "files" - - reports = [] - if n_reformatted > 0: - reports.append(f"{n_reformatted} {noun(n_reformatted)} reformatted") - - if n_unchanged > 0: - reports.append(f"{n_unchanged} {noun(n_unchanged)} left unchanged") - - if n_error > 0: - reports.append(f"{n_error} {noun(n_error)} fails to reformat") - - return ", ".join(reports) + "." - - -def report_possible_changes(n_reformatted, n_unchanged, n_error): - def noun(n): - return "file" if n < 2 else "files" - - reports = [] - if n_reformatted > 0: - reports.append(f"{n_reformatted} {noun(n_reformatted)} would be reformatted") - - if n_unchanged > 0: - reports.append(f"{n_unchanged} {noun(n_unchanged)} would be left unchanged") - - if n_error > 0: - reports.append(f"{n_error} {noun(n_error)} would fail to reformat") - - return ", ".join(reports) + "." - - -def statistics(sources): - from collections import Counter - - statistics = Counter(sources.values()) - - n_unchanged = statistics.pop("unchanged", 0) - n_reformatted = statistics.pop("reformatted", 0) - n_error = statistics.pop("error", 0) - - if len(statistics) != 0: - raise RuntimeError(f"unknown results: {statistics.keys()}") - - return n_reformatted, n_unchanged, n_error - - -def process(args): - if not args.src: - print("No Path provided. Nothing to do 😴") - return 0 - - try: - include_regex = black.re_compile_maybe_verbose(args.include) - except black.re.error: - print( - f"Invalid regular expression for include given: {args.include!r}", - file=sys.stderr, - ) - return 2 - - try: - exclude_regex = black.re_compile_maybe_verbose(args.exclude) - except black.re.error: - print( - f"Invalid regular expression for exclude given: {args.exclude!r}", - file=sys.stderr, - ) - return 2 - - sources = set(collect_files(args.src, include_regex, exclude_regex)) - if len(sources) == 0: - print("No Python files are present to be formatted. Nothing to do 😴") - return 0 - - target_versions = set( - black.TargetVersion[version.upper()] - for version in getattr(args, "target_versions", ()) - ) - mode = black.FileMode( - line_length=args.line_length, target_versions=target_versions, - ) - - actions = { - "inplace": format_and_overwrite, - "check": format_and_check, - } - - action = actions.get(args.action) - - changed_sources = {source: action(source, mode) for source in sources} - n_reformatted, n_unchanged, n_error = statistics(changed_sources) - - report_formatters = { - "inplace": report_changes, - "check": report_possible_changes, - } - - report = report_formatters.get(args.action)(n_reformatted, n_unchanged, n_error) - - if args.action == "check" and n_reformatted > 0: - return_code = 1 - else: - return_code = 0 - - print("Oh no! 💥 💔 💥" if return_code else "All done! ✨ 🍰 ✨") - print(report) - return return_code - - -if __name__ == "__main__": - import argparse - - parser = argparse.ArgumentParser( - description="run black on documentation code snippets (e.g. doctest)", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - parser.add_argument( - "-t", - "--target-versions", - action="append", - choices=[v.name.lower() for v in black.TargetVersion], - help=( - "Python versions that should be supported by Black's output. (default: " - "per-file auto-detection)" - ), - default=argparse.SUPPRESS, - ) - parser.add_argument( - "-l", - "--line-length", - metavar="INT", - type=int, - default=black.DEFAULT_LINE_LENGTH, - help="How many characters per line to allow.", - ) - parser.add_argument( - "--check", - dest="action", - action="store_const", - const="check", - default="inplace", - help=( - "Don't write the files back, just return the status. Return code 0 " - "means nothing would change. Return code 1 means some files would be " - "reformatted. Return code 123 means there was an internal error." - ), - ) - parser.add_argument( - "--include", - metavar="TEXT", - type=str, - default=black.DEFAULT_INCLUDES, - help=( - "A regular expression that matches files and directories that should be " - "included on recursive searches. An empty value means all files are " - "included regardless of the name. Use forward slashes for directories on " - "all platforms (Windows, too). Exclusions are calculated first, inclusions " - "later." - ), - ) - parser.add_argument( - "--exclude", - metavar="TEXT", - type=str, - default=black.DEFAULT_EXCLUDES, - help=( - "A regular expression that matches files and directories that should be " - "excluded on recursive searches. An empty value means no paths are excluded. " - "Use forward slashes for directories on all platforms (Windows, too). " - "Exclusions are calculated first, inclusions later." - ), - ) - parser.add_argument( - "src", - action="store", - type=pathlib.Path, - nargs="*", - default=None, - help="one or more paths to work on", - ) - - args = parser.parse_args() - sys.exit(process(args)) diff --git a/blackdoc/__init__.py b/blackdoc/__init__.py new file mode 100644 index 0000000..ac8f74a --- /dev/null +++ b/blackdoc/__init__.py @@ -0,0 +1,200 @@ +import copy +import textwrap + +import black +import more_itertools + +doctest_prompt = ">>> " +doctest_continuation_prompt = "... " + +prompt_categories = { + "doctest": doctest_prompt, +} +continuation_prompt_categories = { + "doctest": doctest_continuation_prompt, +} +available_prompts = set(prompt_categories.values()) | set( + continuation_prompt_categories.values() +) + + +def extract_prompt(line): + stripped = line.lstrip() + prompt_length = stripped.find(" ") + 1 + + prompt = stripped[:prompt_length] + + return prompt if prompt in available_prompts else None + + +def remove_prompt(line, prompt): + if not line.startswith(prompt): + raise RuntimeError( + f"cannot remove prompt {prompt} from line: prompt not found", line + ) + + without_prompt = line[len(prompt) :] + return without_prompt + + +def add_prompt(line, prompt): + return prompt + line + + +def remove_doctest_prompt(code_unit): + indentation_depth = code_unit.find(doctest_prompt) + code_unit = textwrap.dedent(code_unit) + + # multiline unit + if "\n" in code_unit: + prompt_line, *continuation_lines = code_unit.split("\n") + removed = "\n".join( + [ + remove_prompt(prompt_line, doctest_prompt), + *( + remove_prompt(line, doctest_continuation_prompt) + for line in continuation_lines + ), + ] + ) + else: + removed = remove_prompt(code_unit, doctest_prompt) + + return indentation_depth, removed + + +def add_doctest_prompt(code_unit, indentation_depth): + if "\n" in code_unit: + prompt_line, *continuation_lines = code_unit.split("\n") + reformatted = "\n".join( + [ + add_prompt(prompt_line, doctest_prompt), + *( + add_prompt(line, doctest_continuation_prompt) + for line in continuation_lines + ), + ] + ) + else: + reformatted = add_prompt(code_unit, doctest_prompt) + + return textwrap.indent(reformatted, " " * indentation_depth) + + +extraction_funcs = { + "doctest": remove_doctest_prompt, +} +reformatting_funcs = { + "doctest": add_doctest_prompt, +} + + +def classify(lines): + """ classify lines by prompt type """ + prompts = dict(zip(prompt_categories.values(), prompt_categories.keys())) + continuation_prompts = dict( + zip( + continuation_prompt_categories.values(), + continuation_prompt_categories.keys(), + ) + ) + + for line in lines: + maybe_prompt = extract_prompt(line) + category = ( + prompts.get(maybe_prompt, None) + or continuation_prompts.get(maybe_prompt, None) + or "none" + ) + + yield category, line + + +def continuation_lines(lines, continuation_prompt): + # We can't use `itertools.takewhile` because it drops the first non-match + # Instead, we peek at the iterable and only remove the element if we take it + iterable = more_itertools.peekable(lines) if not hasattr(lines, "peek") else lines + while True: + try: + category, line = iterable.peek() + except StopIteration: + break + + if extract_prompt(line) != continuation_prompt: + break + + # consume the item + next(iterable) + + yield line + + +def group_code_units(labelled_lines): + """ group together code units """ + # we need to make this peekable here since otherwise we lose an element + lines = more_itertools.peekable(labelled_lines) + while True: + try: + category, line = next(lines) + except StopIteration: + break + + if category == "none": + unit = line + else: + continuation_prompt = continuation_prompt_categories.get(category, None) + if continuation_prompt is None: + raise ValueError("unknown prompt category for grouping: {category}") + unit = "\n".join([line, *continuation_lines(lines, continuation_prompt)]) + yield category, unit + + +def blacken(labelled_lines, mode=None): + for category, line in labelled_lines: + if category == "none": + yield category, line + continue + + # remove the prompt and save the indentation depth for later + converter = extraction_funcs.get(category, None) + if converter is None: + raise ValueError(f"unknown prompt category for extraction: {category}") + indentation_depth, code_unit = converter(line) + + # update the line length + prompt_length = indentation_depth + len(prompt_categories[category]) + current_mode = black.FileMode() if mode is None else copy.copy(mode) + current_mode.line_length -= prompt_length + + # blacken the code + blackened = black.format_str(code_unit, mode=current_mode).rstrip() + + # add the prompt and reindent + converter = reformatting_funcs.get(category, None) + if converter is None: + raise ValueError(f"unknown prompt category for reformatting: {category}") + + reformatted = converter(blackened, indentation_depth) + yield category, reformatted + + +def unclassify(labelled_lines): + for _, line in labelled_lines: + yield line + + +def format_lines(lines, mode=None): + labeled = classify(lines) + grouped = group_code_units(labeled) + blackened = blacken(grouped, mode=mode) + + return unclassify(blackened) + + +def format_file(path): + with open(path) as f: + return "\n".join(format_lines(line.rstrip() for line in f)) + "\n" + + +def format_text(text): + return "\n".join(format_lines(text.split("\n"))) diff --git a/blackdoc/__main__.py b/blackdoc/__main__.py new file mode 100644 index 0000000..78f919b --- /dev/null +++ b/blackdoc/__main__.py @@ -0,0 +1,252 @@ +import argparse +import pathlib +import sys + +import black + +from . import format_lines + + +def collect_files(src, include, exclude): + root = black.find_project_root(tuple(src)) + report = black.Report() + + for path in src: + if path.is_dir(): + yield from black.gen_python_files_in_dir( + path, root, include, exclude, report, black.get_gitignore(root), + ) + elif path.is_file() or str(path) == "-": + yield path + else: + print(f"invalid path: {path}", file=sys.stderr) + + +def format_and_overwrite(path, mode): + try: + with open(path, mode="rb") as f: + content, encoding, newline = black.decode_bytes(f.read()) + + lines = content.split("\n") + + new_content = "\n".join(format_lines(lines, mode)) + + if new_content == content: + result = "unchanged" + else: + print(f"reformatted {path}") + result = "reformatted" + + with open(path, "w", encoding=encoding, newline=newline) as f: + f.write(new_content) + except Exception as e: + print(f"error: cannot format {path.absolute()}: {e}") + result = "error" + + return result + + +def format_and_check(path, mode): + try: + with open(path, mode="rb") as f: + content, _, _ = black.decode_bytes(f.read()) + + lines = content.split("\n") + + new_content = "\n".join(format_lines(lines, mode)) + + if new_content == content: + result = "unchanged" + else: + print(f"would reformat {path}") + result = "reformatted" + except Exception as e: + print(f"error: cannot format {path.absolute()}: {e}") + result = "error" + + return result + + +def report_changes(n_reformatted, n_unchanged, n_error): + def noun(n): + return "file" if n < 2 else "files" + + reports = [] + if n_reformatted > 0: + reports.append(f"{n_reformatted} {noun(n_reformatted)} reformatted") + + if n_unchanged > 0: + reports.append(f"{n_unchanged} {noun(n_unchanged)} left unchanged") + + if n_error > 0: + reports.append(f"{n_error} {noun(n_error)} fails to reformat") + + return ", ".join(reports) + "." + + +def report_possible_changes(n_reformatted, n_unchanged, n_error): + def noun(n): + return "file" if n < 2 else "files" + + reports = [] + if n_reformatted > 0: + reports.append(f"{n_reformatted} {noun(n_reformatted)} would be reformatted") + + if n_unchanged > 0: + reports.append(f"{n_unchanged} {noun(n_unchanged)} would be left unchanged") + + if n_error > 0: + reports.append(f"{n_error} {noun(n_error)} would fail to reformat") + + return ", ".join(reports) + "." + + +def statistics(sources): + from collections import Counter + + statistics = Counter(sources.values()) + + n_unchanged = statistics.pop("unchanged", 0) + n_reformatted = statistics.pop("reformatted", 0) + n_error = statistics.pop("error", 0) + + if len(statistics) != 0: + raise RuntimeError(f"unknown results: {statistics.keys()}") + + return n_reformatted, n_unchanged, n_error + + +def process(args): + if not args.src: + print("No Path provided. Nothing to do 😴") + return 0 + + try: + include_regex = black.re_compile_maybe_verbose(args.include) + except black.re.error: + print( + f"Invalid regular expression for include given: {args.include!r}", + file=sys.stderr, + ) + return 2 + + try: + exclude_regex = black.re_compile_maybe_verbose(args.exclude) + except black.re.error: + print( + f"Invalid regular expression for exclude given: {args.exclude!r}", + file=sys.stderr, + ) + return 2 + + sources = set(collect_files(args.src, include_regex, exclude_regex)) + if len(sources) == 0: + print("No Python files are present to be formatted. Nothing to do 😴") + return 0 + + target_versions = set( + black.TargetVersion[version.upper()] + for version in getattr(args, "target_versions", ()) + ) + mode = black.FileMode( + line_length=args.line_length, target_versions=target_versions, + ) + + actions = { + "inplace": format_and_overwrite, + "check": format_and_check, + } + + action = actions.get(args.action) + + changed_sources = {source: action(source, mode) for source in sources} + n_reformatted, n_unchanged, n_error = statistics(changed_sources) + + report_formatters = { + "inplace": report_changes, + "check": report_possible_changes, + } + + report = report_formatters.get(args.action)(n_reformatted, n_unchanged, n_error) + + if args.action == "check" and n_reformatted > 0: + return_code = 1 + else: + return_code = 0 + + print("Oh no! 💥 💔 💥" if return_code else "All done! ✨ 🍰 ✨") + print(report) + return return_code + + +parser = argparse.ArgumentParser( + description="run black on documentation code snippets (e.g. doctest)", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, +) +parser.add_argument( + "-t", + "--target-versions", + action="append", + choices=[v.name.lower() for v in black.TargetVersion], + help=( + "Python versions that should be supported by Black's output. (default: " + "per-file auto-detection)" + ), + default=argparse.SUPPRESS, +) +parser.add_argument( + "-l", + "--line-length", + metavar="INT", + type=int, + default=black.DEFAULT_LINE_LENGTH, + help="How many characters per line to allow.", +) +parser.add_argument( + "--check", + dest="action", + action="store_const", + const="check", + default="inplace", + help=( + "Don't write the files back, just return the status. Return code 0 " + "means nothing would change. Return code 1 means some files would be " + "reformatted. Return code 123 means there was an internal error." + ), +) +parser.add_argument( + "--include", + metavar="TEXT", + type=str, + default=black.DEFAULT_INCLUDES, + help=( + "A regular expression that matches files and directories that should be " + "included on recursive searches. An empty value means all files are " + "included regardless of the name. Use forward slashes for directories on " + "all platforms (Windows, too). Exclusions are calculated first, inclusions " + "later." + ), +) +parser.add_argument( + "--exclude", + metavar="TEXT", + type=str, + default=black.DEFAULT_EXCLUDES, + help=( + "A regular expression that matches files and directories that should be " + "excluded on recursive searches. An empty value means no paths are excluded. " + "Use forward slashes for directories on all platforms (Windows, too). " + "Exclusions are calculated first, inclusions later." + ), +) +parser.add_argument( + "src", + action="store", + type=pathlib.Path, + nargs="*", + default=None, + help="one or more paths to work on", +) + +args = parser.parse_args() +sys.exit(process(args)) diff --git a/test_blackdoc.py b/blackdoc/tests/test_blackdoc.py similarity index 100% rename from test_blackdoc.py rename to blackdoc/tests/test_blackdoc.py From e98940ab488252ec4f4d5e19c72afaeb29c2cf18 Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 16 Mar 2020 13:24:43 +0100 Subject: [PATCH 02/24] add a module to make adding new formats easier --- blackdoc/formats/__init__.py | 25 +++++++++++++++++++++++++ blackdoc/formats/register.py | 15 +++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 blackdoc/formats/__init__.py create mode 100644 blackdoc/formats/register.py diff --git a/blackdoc/formats/__init__.py b/blackdoc/formats/__init__.py new file mode 100644 index 0000000..baf9182 --- /dev/null +++ b/blackdoc/formats/__init__.py @@ -0,0 +1,25 @@ +import textwrap + +from .register import detection_funcs # noqa +from .register import extraction_funcs, reformatting_funcs + + +def extract_code(line_unit, category): + dedented = textwrap.dedent(line_unit) + indentation_level = line_unit.find(dedented[:5]) + + func = extraction_funcs.get(category, None) + if func is None: + raise RuntimeError(f"unknown code format: {category}") + + return indentation_level, func(dedented) + + +def reformat_code(line_unit, category, indentation_depth): + func = reformatting_funcs.get(category, None) + if func is None: + raise RuntimeError(f"unknown code format: {category}") + + reformatted = func(line_unit) + + return textwrap.indent(reformatted, " " * indentation_depth) diff --git a/blackdoc/formats/register.py b/blackdoc/formats/register.py new file mode 100644 index 0000000..1e68c7a --- /dev/null +++ b/blackdoc/formats/register.py @@ -0,0 +1,15 @@ +import warnings + +detection_funcs = {} +extraction_funcs = {} +reformatting_funcs = {} + + +def register_format(name, detection_func, extraction_func, reformatting_func): + """ register a new format """ + if name in detection_funcs: + warnings.warn(f"{name} already registered", RuntimeWarning) + + detection_funcs[name] = detection_func + extraction_funcs[name] = extraction_func + reformatting_funcs[name] = reformatting_func From 12ca7c315df1c895f1dc3302bd2b27d584368d1f Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 16 Mar 2020 13:29:45 +0100 Subject: [PATCH 03/24] rewrite the register func to take format objects and call it in __init__ --- blackdoc/formats/__init__.py | 6 +++++- blackdoc/formats/register.py | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/blackdoc/formats/__init__.py b/blackdoc/formats/__init__.py index baf9182..94a2362 100644 --- a/blackdoc/formats/__init__.py +++ b/blackdoc/formats/__init__.py @@ -1,7 +1,7 @@ import textwrap from .register import detection_funcs # noqa -from .register import extraction_funcs, reformatting_funcs +from .register import extraction_funcs, reformatting_funcs, register_format def extract_code(line_unit, category): @@ -23,3 +23,7 @@ def reformat_code(line_unit, category, indentation_depth): reformatted = func(line_unit) return textwrap.indent(reformatted, " " * indentation_depth) + + +for module in (): + register_format(module.__name__, module) diff --git a/blackdoc/formats/register.py b/blackdoc/formats/register.py index 1e68c7a..b191074 100644 --- a/blackdoc/formats/register.py +++ b/blackdoc/formats/register.py @@ -5,11 +5,15 @@ reformatting_funcs = {} -def register_format(name, detection_func, extraction_func, reformatting_func): +def register_format(name, obj): """ register a new format """ if name in detection_funcs: warnings.warn(f"{name} already registered", RuntimeWarning) + detection_func = getattr(obj, "detection_func") + extraction_func = getattr(obj, "extraction_func") + reformatting_func = getattr(obj, "reformatting_func") + detection_funcs[name] = detection_func extraction_funcs[name] = extraction_func reformatting_funcs[name] = reformatting_func From 09fada66f4fd2b3b33a05023af99241e8294b41b Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 16 Mar 2020 13:31:41 +0100 Subject: [PATCH 04/24] add a dummy format that detects every line and reformats none --- blackdoc/formats/no_code.py | 14 ++++++++++++++ blackdoc/tests/__init__.py | 0 blackdoc/tests/data.py | 17 +++++++++++++++++ blackdoc/tests/test_no_code.py | 27 +++++++++++++++++++++++++++ 4 files changed, 58 insertions(+) create mode 100644 blackdoc/formats/no_code.py create mode 100644 blackdoc/tests/__init__.py create mode 100644 blackdoc/tests/data.py create mode 100644 blackdoc/tests/test_no_code.py diff --git a/blackdoc/formats/no_code.py b/blackdoc/formats/no_code.py new file mode 100644 index 0000000..f1e1a41 --- /dev/null +++ b/blackdoc/formats/no_code.py @@ -0,0 +1,14 @@ +import more_itertools + + +def detection_func(lines): + number, line = more_itertools.first(lines) + return (number, number + 1), line + + +def extraction_func(line): + return 0, line + + +def reformatting_func(line, indentation_depth): + return line diff --git a/blackdoc/tests/__init__.py b/blackdoc/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/blackdoc/tests/data.py b/blackdoc/tests/data.py new file mode 100644 index 0000000..e3d4365 --- /dev/null +++ b/blackdoc/tests/data.py @@ -0,0 +1,17 @@ +docstring = """ a function to open files + + with a very long description + + >>> file = open( + ... "very_long_filepath", + ... mode="a", + ... ) + >>> file + <_io.TextIOWrapper name='very_long_filepath' mode='w' encoding='UTF-8'> + + text after the first example, spanning + multiple lines + + >>> file.closed + False +""" diff --git a/blackdoc/tests/test_no_code.py b/blackdoc/tests/test_no_code.py new file mode 100644 index 0000000..c6828c7 --- /dev/null +++ b/blackdoc/tests/test_no_code.py @@ -0,0 +1,27 @@ +from blackdoc.formats import no_code + +from .data import docstring + + +def test_detection_func(): + lines = docstring.split("\n") + + line_range = (1, 2) + line = lines[0] + + assert no_code.detection_func(enumerate(lines, start=1)) == (line_range, line) + + +def test_extraction_func(): + lines = docstring.split("\n") + depth = 0 + line = lines[0] + + assert no_code.extraction_func(line) == (depth, line) + + +def test_reformatting_func(): + lines = docstring.split("\n") + line = lines[0] + + assert no_code.reformatting_func(line, indentation_depth=0) == line From 063acae7814447443021be8cda5170b05f418031 Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 16 Mar 2020 13:37:58 +0100 Subject: [PATCH 05/24] rename the dummy format and register it --- blackdoc/formats/__init__.py | 6 ++++-- blackdoc/formats/{no_code.py => none.py} | 0 blackdoc/tests/{test_no_code.py => test_none.py} | 8 ++++---- 3 files changed, 8 insertions(+), 6 deletions(-) rename blackdoc/formats/{no_code.py => none.py} (100%) rename blackdoc/tests/{test_no_code.py => test_none.py} (56%) diff --git a/blackdoc/formats/__init__.py b/blackdoc/formats/__init__.py index 94a2362..632aa25 100644 --- a/blackdoc/formats/__init__.py +++ b/blackdoc/formats/__init__.py @@ -1,5 +1,6 @@ import textwrap +from . import none from .register import detection_funcs # noqa from .register import extraction_funcs, reformatting_funcs, register_format @@ -25,5 +26,6 @@ def reformat_code(line_unit, category, indentation_depth): return textwrap.indent(reformatted, " " * indentation_depth) -for module in (): - register_format(module.__name__, module) +for module in (none,): + name = module.__name__.split(".")[-1] + register_format(name, module) diff --git a/blackdoc/formats/no_code.py b/blackdoc/formats/none.py similarity index 100% rename from blackdoc/formats/no_code.py rename to blackdoc/formats/none.py diff --git a/blackdoc/tests/test_no_code.py b/blackdoc/tests/test_none.py similarity index 56% rename from blackdoc/tests/test_no_code.py rename to blackdoc/tests/test_none.py index c6828c7..3c957c6 100644 --- a/blackdoc/tests/test_no_code.py +++ b/blackdoc/tests/test_none.py @@ -1,4 +1,4 @@ -from blackdoc.formats import no_code +from blackdoc.formats import none from .data import docstring @@ -9,7 +9,7 @@ def test_detection_func(): line_range = (1, 2) line = lines[0] - assert no_code.detection_func(enumerate(lines, start=1)) == (line_range, line) + assert none.detection_func(enumerate(lines, start=1)) == (line_range, line) def test_extraction_func(): @@ -17,11 +17,11 @@ def test_extraction_func(): depth = 0 line = lines[0] - assert no_code.extraction_func(line) == (depth, line) + assert none.extraction_func(line) == (depth, line) def test_reformatting_func(): lines = docstring.split("\n") line = lines[0] - assert no_code.reformatting_func(line, indentation_depth=0) == line + assert none.reformatting_func(line, indentation_depth=0) == line From 41d1f4819ecd4acdb6716683011cc71294deeda5 Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 16 Mar 2020 14:02:02 +0100 Subject: [PATCH 06/24] only work with the line data when extracting or reformatting --- blackdoc/formats/none.py | 4 ++-- blackdoc/tests/test_none.py | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/blackdoc/formats/none.py b/blackdoc/formats/none.py index f1e1a41..8d0ec01 100644 --- a/blackdoc/formats/none.py +++ b/blackdoc/formats/none.py @@ -7,8 +7,8 @@ def detection_func(lines): def extraction_func(line): - return 0, line + return line -def reformatting_func(line, indentation_depth): +def reformatting_func(line): return line diff --git a/blackdoc/tests/test_none.py b/blackdoc/tests/test_none.py index 3c957c6..d882e62 100644 --- a/blackdoc/tests/test_none.py +++ b/blackdoc/tests/test_none.py @@ -14,14 +14,13 @@ def test_detection_func(): def test_extraction_func(): lines = docstring.split("\n") - depth = 0 line = lines[0] - assert none.extraction_func(line) == (depth, line) + assert none.extraction_func(line) == line def test_reformatting_func(): lines = docstring.split("\n") line = lines[0] - assert none.reformatting_func(line, indentation_depth=0) == line + assert none.reformatting_func(line) == line From 7d2e575e2f88bad15a44456ad14f349632f55c0c Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 16 Mar 2020 14:18:23 +0100 Subject: [PATCH 07/24] add a split-by-lines version of the test docstring --- blackdoc/tests/data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/blackdoc/tests/data.py b/blackdoc/tests/data.py index e3d4365..76b7d05 100644 --- a/blackdoc/tests/data.py +++ b/blackdoc/tests/data.py @@ -15,3 +15,4 @@ >>> file.closed False """ +lines = docstring.split("\n") From e292545035c200b81c224913f2c7f798234dd602 Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 16 Mar 2020 14:22:31 +0100 Subject: [PATCH 08/24] add a doctest format module --- blackdoc/formats/__init__.py | 4 +-- blackdoc/formats/doctest.py | 65 ++++++++++++++++++++++++++++++++++ blackdoc/tests/test_doctest.py | 53 +++++++++++++++++++++++++++ 3 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 blackdoc/formats/doctest.py create mode 100644 blackdoc/tests/test_doctest.py diff --git a/blackdoc/formats/__init__.py b/blackdoc/formats/__init__.py index 632aa25..6c4d7cc 100644 --- a/blackdoc/formats/__init__.py +++ b/blackdoc/formats/__init__.py @@ -1,6 +1,6 @@ import textwrap -from . import none +from . import doctest, none from .register import detection_funcs # noqa from .register import extraction_funcs, reformatting_funcs, register_format @@ -26,6 +26,6 @@ def reformat_code(line_unit, category, indentation_depth): return textwrap.indent(reformatted, " " * indentation_depth) -for module in (none,): +for module in (none, doctest): name = module.__name__.split(".")[-1] register_format(name, module) diff --git a/blackdoc/formats/doctest.py b/blackdoc/formats/doctest.py new file mode 100644 index 0000000..a3c0309 --- /dev/null +++ b/blackdoc/formats/doctest.py @@ -0,0 +1,65 @@ +import itertools + +import more_itertools + +prompt = ">>> " +continuation_prompt = "... " + + +def continuation_lines(lines): + while True: + try: + line_number, line = lines.peek() + except StopIteration: + line_number = -1 + line = "" + + if not line.lstrip().startswith(continuation_prompt): + break + + # consume the line + next(lines) + yield line_number, line + + +def detection_func(lines): + try: + _, line = lines.peek() + except StopIteration: + line = "" + + if not line.lstrip().startswith(prompt): + return None + + detected_lines = list( + itertools.chain([more_itertools.first(lines)], continuation_lines(lines)) + ) + line_numbers, lines = map(tuple, more_itertools.unzip(detected_lines)) + + line_range = min(line_numbers), max(line_numbers) + 1 + if set(line_numbers) != set(range(line_range[0], line_range[1])): + raise RuntimeError("line numbers are not contiguous") + + return line_range, "\n".join(lines) + + +def extraction_func(line): + lines = line.split("\n") + if any(line[:4] not in (prompt, continuation_prompt) for line in lines): + raise RuntimeError(f"misformatted code unit: {line}") + + extracted_line = "\n".join(line[4:] for line in lines) + + return extracted_line + + +def reformatting_func(line): + lines = iter(line.split("\n")) + + reformatted = "\n".join( + itertools.chain( + more_itertools.always_iterable(prompt + more_itertools.first(lines)), + (continuation_prompt + line for line in lines), + ) + ) + return reformatted diff --git a/blackdoc/tests/test_doctest.py b/blackdoc/tests/test_doctest.py new file mode 100644 index 0000000..13e750b --- /dev/null +++ b/blackdoc/tests/test_doctest.py @@ -0,0 +1,53 @@ +import textwrap + +import more_itertools +import pytest + +from blackdoc.formats import doctest + +from .data import lines + + +@pytest.mark.parametrize( + "lines,expected", + ( + pytest.param(lines[0], None, id="no_doctest"), + pytest.param(lines[8], ((1, 2), lines[8]), id="single_line"), + pytest.param(lines[4:8], ((1, 5), "\n".join(lines[4:8])), id="multiple_lines"), + ), +) +def test_detection_func(lines, expected): + lines = more_itertools.peekable( + enumerate(more_itertools.always_iterable(lines), start=1) + ) + + actual = doctest.detection_func(lines) + assert actual == expected + + +@pytest.mark.parametrize( + "line", + ( + pytest.param(textwrap.dedent(lines[8]), id="single_line"), + pytest.param(textwrap.dedent("\n".join(lines[4:8])), id="multiple_lines"), + ), +) +def test_extraction_func(line): + expected = "\n".join(line.lstrip()[4:] for line in line.split("\n")) + actual = doctest.extraction_func(line) + + assert expected == actual + + +@pytest.mark.parametrize( + "expected", + ( + pytest.param(textwrap.dedent(lines[8]), id="single_line"), + pytest.param(textwrap.dedent("\n".join(lines[4:8])), id="multiple_lines"), + ), +) +def test_reformatting_func(expected): + line = "\n".join(line.lstrip()[4:] for line in expected.split("\n")) + + actual = doctest.reformatting_func(line) + assert expected == actual From cb97bceab86c98843a23e0155ae29d3713cd0123 Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 16 Mar 2020 16:30:56 +0100 Subject: [PATCH 09/24] return the category along with the processed lines --- blackdoc/formats/doctest.py | 3 ++- blackdoc/formats/none.py | 4 +++- blackdoc/tests/test_doctest.py | 8 ++++++-- blackdoc/tests/test_none.py | 3 ++- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/blackdoc/formats/doctest.py b/blackdoc/formats/doctest.py index a3c0309..bf19413 100644 --- a/blackdoc/formats/doctest.py +++ b/blackdoc/formats/doctest.py @@ -2,6 +2,7 @@ import more_itertools +name = "doctest" prompt = ">>> " continuation_prompt = "... " @@ -40,7 +41,7 @@ def detection_func(lines): if set(line_numbers) != set(range(line_range[0], line_range[1])): raise RuntimeError("line numbers are not contiguous") - return line_range, "\n".join(lines) + return line_range, name, "\n".join(lines) def extraction_func(line): diff --git a/blackdoc/formats/none.py b/blackdoc/formats/none.py index 8d0ec01..e935680 100644 --- a/blackdoc/formats/none.py +++ b/blackdoc/formats/none.py @@ -1,9 +1,11 @@ import more_itertools +name = "none" + def detection_func(lines): number, line = more_itertools.first(lines) - return (number, number + 1), line + return (number, number + 1), name, line def extraction_func(line): diff --git a/blackdoc/tests/test_doctest.py b/blackdoc/tests/test_doctest.py index 13e750b..224c4fe 100644 --- a/blackdoc/tests/test_doctest.py +++ b/blackdoc/tests/test_doctest.py @@ -12,8 +12,12 @@ "lines,expected", ( pytest.param(lines[0], None, id="no_doctest"), - pytest.param(lines[8], ((1, 2), lines[8]), id="single_line"), - pytest.param(lines[4:8], ((1, 5), "\n".join(lines[4:8])), id="multiple_lines"), + pytest.param(lines[8], ((1, 2), doctest.name, lines[8]), id="single_line"), + pytest.param( + lines[4:8], + ((1, 5), doctest.name, "\n".join(lines[4:8])), + id="multiple_lines", + ), ), ) def test_detection_func(lines, expected): diff --git a/blackdoc/tests/test_none.py b/blackdoc/tests/test_none.py index d882e62..c864bea 100644 --- a/blackdoc/tests/test_none.py +++ b/blackdoc/tests/test_none.py @@ -8,8 +8,9 @@ def test_detection_func(): line_range = (1, 2) line = lines[0] + name = none.name - assert none.detection_func(enumerate(lines, start=1)) == (line_range, line) + assert none.detection_func(enumerate(lines, start=1)) == (line_range, name, line) def test_extraction_func(): From 7e0ebd013956ede59af2a8a409c2295d61f754bb Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 16 Mar 2020 16:33:58 +0100 Subject: [PATCH 10/24] also return the prompt length --- blackdoc/formats/__init__.py | 5 +++-- blackdoc/formats/doctest.py | 2 +- blackdoc/formats/none.py | 2 +- blackdoc/tests/test_doctest.py | 6 +++++- blackdoc/tests/test_none.py | 3 ++- 5 files changed, 12 insertions(+), 6 deletions(-) diff --git a/blackdoc/formats/__init__.py b/blackdoc/formats/__init__.py index 6c4d7cc..1c81632 100644 --- a/blackdoc/formats/__init__.py +++ b/blackdoc/formats/__init__.py @@ -7,13 +7,14 @@ def extract_code(line_unit, category): dedented = textwrap.dedent(line_unit) - indentation_level = line_unit.find(dedented[:5]) + indentation_depth = line_unit.find(dedented[:5]) func = extraction_funcs.get(category, None) if func is None: raise RuntimeError(f"unknown code format: {category}") - return indentation_level, func(dedented) + prompt_length, extracted = func(dedented) + return indentation_depth, prompt_length, extracted def reformat_code(line_unit, category, indentation_depth): diff --git a/blackdoc/formats/doctest.py b/blackdoc/formats/doctest.py index bf19413..2736eeb 100644 --- a/blackdoc/formats/doctest.py +++ b/blackdoc/formats/doctest.py @@ -51,7 +51,7 @@ def extraction_func(line): extracted_line = "\n".join(line[4:] for line in lines) - return extracted_line + return len(prompt), extracted_line def reformatting_func(line): diff --git a/blackdoc/formats/none.py b/blackdoc/formats/none.py index e935680..0f761f3 100644 --- a/blackdoc/formats/none.py +++ b/blackdoc/formats/none.py @@ -9,7 +9,7 @@ def detection_func(lines): def extraction_func(line): - return line + return 0, line def reformatting_func(line): diff --git a/blackdoc/tests/test_doctest.py b/blackdoc/tests/test_doctest.py index 224c4fe..0df07db 100644 --- a/blackdoc/tests/test_doctest.py +++ b/blackdoc/tests/test_doctest.py @@ -37,7 +37,11 @@ def test_detection_func(lines, expected): ), ) def test_extraction_func(line): - expected = "\n".join(line.lstrip()[4:] for line in line.split("\n")) + prompt_length = len(doctest.prompt) + expected = ( + prompt_length, + "\n".join(line.lstrip()[4:] for line in line.split("\n")), + ) actual = doctest.extraction_func(line) assert expected == actual diff --git a/blackdoc/tests/test_none.py b/blackdoc/tests/test_none.py index c864bea..6418acc 100644 --- a/blackdoc/tests/test_none.py +++ b/blackdoc/tests/test_none.py @@ -15,9 +15,10 @@ def test_detection_func(): def test_extraction_func(): lines = docstring.split("\n") + prompt_length = 0 line = lines[0] - assert none.extraction_func(line) == line + assert none.extraction_func(line) == (prompt_length, line) def test_reformatting_func(): From 0d40f28007c4382830c8a8340c1528fefe2b8478 Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 16 Mar 2020 16:34:23 +0100 Subject: [PATCH 11/24] register using the format object's name attribute --- blackdoc/formats/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/blackdoc/formats/__init__.py b/blackdoc/formats/__init__.py index 1c81632..196b422 100644 --- a/blackdoc/formats/__init__.py +++ b/blackdoc/formats/__init__.py @@ -28,5 +28,4 @@ def reformat_code(line_unit, category, indentation_depth): for module in (none, doctest): - name = module.__name__.split(".")[-1] - register_format(name, module) + register_format(module.name, module) From 8a11ca758ac1bf2cdc057e38c4b630bb41c3c7d5 Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 16 Mar 2020 16:35:01 +0100 Subject: [PATCH 12/24] limit the caught exceptions to TokenErrors --- blackdoc/__main__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/blackdoc/__main__.py b/blackdoc/__main__.py index 78f919b..94dd5d4 100644 --- a/blackdoc/__main__.py +++ b/blackdoc/__main__.py @@ -3,6 +3,7 @@ import sys import black +from blib2to3.pgen2.tokenize import TokenError from . import format_lines @@ -39,7 +40,7 @@ def format_and_overwrite(path, mode): with open(path, "w", encoding=encoding, newline=newline) as f: f.write(new_content) - except Exception as e: + except TokenError as e: print(f"error: cannot format {path.absolute()}: {e}") result = "error" @@ -60,7 +61,7 @@ def format_and_check(path, mode): else: print(f"would reformat {path}") result = "reformatted" - except Exception as e: + except TokenError as e: print(f"error: cannot format {path.absolute()}: {e}") result = "error" From 3c5520cd6906e7b20056b9df740c2259da1dec24 Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 16 Mar 2020 17:45:14 +0100 Subject: [PATCH 13/24] rewrite the main logic and provide more informative error messages --- blackdoc/__init__.py | 217 +++++++++++-------------------------------- blackdoc/__main__.py | 5 +- 2 files changed, 54 insertions(+), 168 deletions(-) diff --git a/blackdoc/__init__.py b/blackdoc/__init__.py index ac8f74a..4696c1c 100644 --- a/blackdoc/__init__.py +++ b/blackdoc/__init__.py @@ -1,180 +1,75 @@ import copy -import textwrap +import re import black import more_itertools +from blib2to3.pgen2.tokenize import TokenError -doctest_prompt = ">>> " -doctest_continuation_prompt = "... " +from .formats import detection_funcs, extract_code, reformat_code -prompt_categories = { - "doctest": doctest_prompt, -} -continuation_prompt_categories = { - "doctest": doctest_continuation_prompt, -} -available_prompts = set(prompt_categories.values()) | set( - continuation_prompt_categories.values() -) +def update_line_number(message, original_number): + line_re = re.compile(r"(?P\d+):(?P\d+):") + match = line_re.search(message) + if match: + line_number, column_number = map(int, match.groups()) + new_line_number = line_number + original_number - 1 -def extract_prompt(line): - stripped = line.lstrip() - prompt_length = stripped.find(" ") + 1 + message = line_re.sub(f"{new_line_number}:{column_number}:", message) + print(message, tuple(map(int, match.groups()))) + return message - prompt = stripped[:prompt_length] - return prompt if prompt in available_prompts else None +def line_numbers(lines): + yield from enumerate(lines, start=1) -def remove_prompt(line, prompt): - if not line.startswith(prompt): - raise RuntimeError( - f"cannot remove prompt {prompt} from line: prompt not found", line +def classify_lines(lines): + lines = more_itertools.peekable(lines) + while lines: + maybe_detected = ( + (name, func(lines)) + for name, func in detection_funcs.items() + if name != "none" ) - - without_prompt = line[len(prompt) :] - return without_prompt - - -def add_prompt(line, prompt): - return prompt + line - - -def remove_doctest_prompt(code_unit): - indentation_depth = code_unit.find(doctest_prompt) - code_unit = textwrap.dedent(code_unit) - - # multiline unit - if "\n" in code_unit: - prompt_line, *continuation_lines = code_unit.split("\n") - removed = "\n".join( - [ - remove_prompt(prompt_line, doctest_prompt), - *( - remove_prompt(line, doctest_continuation_prompt) - for line in continuation_lines - ), - ] - ) - else: - removed = remove_prompt(code_unit, doctest_prompt) - - return indentation_depth, removed - - -def add_doctest_prompt(code_unit, indentation_depth): - if "\n" in code_unit: - prompt_line, *continuation_lines = code_unit.split("\n") - reformatted = "\n".join( - [ - add_prompt(prompt_line, doctest_prompt), - *( - add_prompt(line, doctest_continuation_prompt) - for line in continuation_lines - ), - ] - ) - else: - reformatted = add_prompt(code_unit, doctest_prompt) - - return textwrap.indent(reformatted, " " * indentation_depth) - - -extraction_funcs = { - "doctest": remove_doctest_prompt, -} -reformatting_funcs = { - "doctest": add_doctest_prompt, -} - - -def classify(lines): - """ classify lines by prompt type """ - prompts = dict(zip(prompt_categories.values(), prompt_categories.keys())) - continuation_prompts = dict( - zip( - continuation_prompt_categories.values(), - continuation_prompt_categories.keys(), - ) - ) - - for line in lines: - maybe_prompt = extract_prompt(line) - category = ( - prompts.get(maybe_prompt, None) - or continuation_prompts.get(maybe_prompt, None) - or "none" - ) - - yield category, line - - -def continuation_lines(lines, continuation_prompt): - # We can't use `itertools.takewhile` because it drops the first non-match - # Instead, we peek at the iterable and only remove the element if we take it - iterable = more_itertools.peekable(lines) if not hasattr(lines, "peek") else lines - while True: - try: - category, line = iterable.peek() - except StopIteration: - break - - if extract_prompt(line) != continuation_prompt: - break - - # consume the item - next(iterable) - - yield line - - -def group_code_units(labelled_lines): - """ group together code units """ - # we need to make this peekable here since otherwise we lose an element - lines = more_itertools.peekable(labelled_lines) - while True: - try: - category, line = next(lines) - except StopIteration: - break - - if category == "none": - unit = line + detected = {name: value for name, value in maybe_detected if value is not None} + + if not detected: + yield detection_funcs["none"](lines) + elif len(detected) > 1: + raise RuntimeError( + f"cannot classify line: {', '.join(detected.values())} claim it: {lines.peek()}" + ) else: - continuation_prompt = continuation_prompt_categories.get(category, None) - if continuation_prompt is None: - raise ValueError("unknown prompt category for grouping: {category}") - unit = "\n".join([line, *continuation_lines(lines, continuation_prompt)]) - yield category, unit + yield more_itertools.one(detected.values()) -def blacken(labelled_lines, mode=None): - for category, line in labelled_lines: +def blacken(lines, mode=None): + for original_line_range, category, line_unit in lines: if category == "none": - yield category, line + yield category, line_unit continue - # remove the prompt and save the indentation depth for later - converter = extraction_funcs.get(category, None) - if converter is None: - raise ValueError(f"unknown prompt category for extraction: {category}") - indentation_depth, code_unit = converter(line) + indentation_depth, prompt_length, code = extract_code(line_unit, category) - # update the line length - prompt_length = indentation_depth + len(prompt_categories[category]) current_mode = black.FileMode() if mode is None else copy.copy(mode) - current_mode.line_length -= prompt_length + current_mode.line_length -= indentation_depth + prompt_length + + try: + blackened = black.format_str(code, mode=current_mode).rstrip() + except TokenError as e: + apparent_line_num, column = e.args[1] + message = e.args[0] + lineno = original_line_range[0] + (apparent_line_num - 1) + faulty_line = code.split("\n")[(apparent_line_num - 1) - 1] - # blacken the code - blackened = black.format_str(code_unit, mode=current_mode).rstrip() + raise black.InvalidInput(f"{lineno}:{column}: {message}: {faulty_line}") + except black.InvalidInput as e: + message = update_line_number(str(e), original_line_range[0]) + raise black.InvalidInput(message) - # add the prompt and reindent - converter = reformatting_funcs.get(category, None) - if converter is None: - raise ValueError(f"unknown prompt category for reformatting: {category}") + reformatted = reformat_code(blackened, category, indentation_depth) - reformatted = converter(blackened, indentation_depth) yield category, reformatted @@ -184,17 +79,9 @@ def unclassify(labelled_lines): def format_lines(lines, mode=None): - labeled = classify(lines) - grouped = group_code_units(labeled) - blackened = blacken(grouped, mode=mode) - - return unclassify(blackened) - + numbered = line_numbers(lines) -def format_file(path): - with open(path) as f: - return "\n".join(format_lines(line.rstrip() for line in f)) + "\n" + labeled = classify_lines(numbered) + blackened = blacken(labeled, mode=mode) - -def format_text(text): - return "\n".join(format_lines(text.split("\n"))) + return unclassify(blackened) diff --git a/blackdoc/__main__.py b/blackdoc/__main__.py index 94dd5d4..f5fbaf2 100644 --- a/blackdoc/__main__.py +++ b/blackdoc/__main__.py @@ -3,7 +3,6 @@ import sys import black -from blib2to3.pgen2.tokenize import TokenError from . import format_lines @@ -40,7 +39,7 @@ def format_and_overwrite(path, mode): with open(path, "w", encoding=encoding, newline=newline) as f: f.write(new_content) - except TokenError as e: + except black.InvalidInput as e: print(f"error: cannot format {path.absolute()}: {e}") result = "error" @@ -61,7 +60,7 @@ def format_and_check(path, mode): else: print(f"would reformat {path}") result = "reformatted" - except TokenError as e: + except black.InvalidInput as e: print(f"error: cannot format {path.absolute()}: {e}") result = "error" From 4d70da9a25e3a72766e0be476c66b6e07c7450d9 Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 16 Mar 2020 17:59:50 +0100 Subject: [PATCH 14/24] move the functions into different modules --- blackdoc/__init__.py | 78 ++------------------------------------ blackdoc/blacken.py | 47 +++++++++++++++++++++++ blackdoc/classification.py | 28 ++++++++++++++ 3 files changed, 78 insertions(+), 75 deletions(-) create mode 100644 blackdoc/blacken.py create mode 100644 blackdoc/classification.py diff --git a/blackdoc/__init__.py b/blackdoc/__init__.py index 4696c1c..a3daa90 100644 --- a/blackdoc/__init__.py +++ b/blackdoc/__init__.py @@ -1,87 +1,15 @@ -import copy -import re - -import black -import more_itertools -from blib2to3.pgen2.tokenize import TokenError - -from .formats import detection_funcs, extract_code, reformat_code - - -def update_line_number(message, original_number): - line_re = re.compile(r"(?P\d+):(?P\d+):") - match = line_re.search(message) - if match: - line_number, column_number = map(int, match.groups()) - new_line_number = line_number + original_number - 1 - - message = line_re.sub(f"{new_line_number}:{column_number}:", message) - print(message, tuple(map(int, match.groups()))) - return message +from .blacken import blacken +from .classification import classify, unclassify def line_numbers(lines): yield from enumerate(lines, start=1) -def classify_lines(lines): - lines = more_itertools.peekable(lines) - while lines: - maybe_detected = ( - (name, func(lines)) - for name, func in detection_funcs.items() - if name != "none" - ) - detected = {name: value for name, value in maybe_detected if value is not None} - - if not detected: - yield detection_funcs["none"](lines) - elif len(detected) > 1: - raise RuntimeError( - f"cannot classify line: {', '.join(detected.values())} claim it: {lines.peek()}" - ) - else: - yield more_itertools.one(detected.values()) - - -def blacken(lines, mode=None): - for original_line_range, category, line_unit in lines: - if category == "none": - yield category, line_unit - continue - - indentation_depth, prompt_length, code = extract_code(line_unit, category) - - current_mode = black.FileMode() if mode is None else copy.copy(mode) - current_mode.line_length -= indentation_depth + prompt_length - - try: - blackened = black.format_str(code, mode=current_mode).rstrip() - except TokenError as e: - apparent_line_num, column = e.args[1] - message = e.args[0] - lineno = original_line_range[0] + (apparent_line_num - 1) - faulty_line = code.split("\n")[(apparent_line_num - 1) - 1] - - raise black.InvalidInput(f"{lineno}:{column}: {message}: {faulty_line}") - except black.InvalidInput as e: - message = update_line_number(str(e), original_line_range[0]) - raise black.InvalidInput(message) - - reformatted = reformat_code(blackened, category, indentation_depth) - - yield category, reformatted - - -def unclassify(labelled_lines): - for _, line in labelled_lines: - yield line - - def format_lines(lines, mode=None): numbered = line_numbers(lines) - labeled = classify_lines(numbered) + labeled = classify(numbered) blackened = blacken(labeled, mode=mode) return unclassify(blackened) diff --git a/blackdoc/blacken.py b/blackdoc/blacken.py new file mode 100644 index 0000000..64af1bf --- /dev/null +++ b/blackdoc/blacken.py @@ -0,0 +1,47 @@ +import copy +import re + +import black +from blib2to3.pgen2.tokenize import TokenError + +from .formats import extract_code, reformat_code + + +def update_line_number(message, original_number): + line_re = re.compile(r"(?P\d+):(?P\d+):") + match = line_re.search(message) + if match: + line_number, column_number = map(int, match.groups()) + new_line_number = line_number + original_number - 1 + + message = line_re.sub(f"{new_line_number}:{column_number}:", message) + return message + + +def blacken(lines, mode=None): + for original_line_range, category, line_unit in lines: + if category == "none": + yield category, line_unit + continue + + indentation_depth, prompt_length, code = extract_code(line_unit, category) + + current_mode = black.FileMode() if mode is None else copy.copy(mode) + current_mode.line_length -= indentation_depth + prompt_length + + try: + blackened = black.format_str(code, mode=current_mode).rstrip() + except TokenError as e: + apparent_line_num, column = e.args[1] + message = e.args[0] + lineno = original_line_range[0] + (apparent_line_num - 1) + faulty_line = code.split("\n")[(apparent_line_num - 1) - 1] + + raise black.InvalidInput(f"{lineno}:{column}: {message}: {faulty_line}") + except black.InvalidInput as e: + message = update_line_number(str(e), original_line_range[0]) + raise black.InvalidInput(message) + + reformatted = reformat_code(blackened, category, indentation_depth) + + yield category, reformatted diff --git a/blackdoc/classification.py b/blackdoc/classification.py new file mode 100644 index 0000000..19b06b6 --- /dev/null +++ b/blackdoc/classification.py @@ -0,0 +1,28 @@ +import more_itertools + +from .formats import detection_funcs + + +def classify(lines): + lines = more_itertools.peekable(lines) + while lines: + maybe_detected = ( + (name, func(lines)) + for name, func in detection_funcs.items() + if name != "none" + ) + detected = {name: value for name, value in maybe_detected if value is not None} + + if not detected: + yield detection_funcs["none"](lines) + elif len(detected) > 1: + raise RuntimeError( + f"cannot classify line: {', '.join(detected.values())} claim it: {lines.peek()}" + ) + else: + yield more_itertools.one(detected.values()) + + +def unclassify(labelled_lines): + for _, line in labelled_lines: + yield line From 535325af4628f54e2ac096d02a7975a23ab0e1c5 Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 16 Mar 2020 18:00:26 +0100 Subject: [PATCH 15/24] import the register function into the main package --- blackdoc/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/blackdoc/__init__.py b/blackdoc/__init__.py index a3daa90..ccd3dca 100644 --- a/blackdoc/__init__.py +++ b/blackdoc/__init__.py @@ -1,5 +1,6 @@ from .blacken import blacken from .classification import classify, unclassify +from .formats import register_format # noqa def line_numbers(lines): From 913d2fd1db525e5223458934a15e5b7b65bee630 Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 16 Mar 2020 18:22:22 +0100 Subject: [PATCH 16/24] add tests for the classification function --- blackdoc/tests/data.py | 20 +++++++++ blackdoc/tests/test_classification.py | 60 +++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 blackdoc/tests/test_classification.py diff --git a/blackdoc/tests/data.py b/blackdoc/tests/data.py index 76b7d05..5db8fe6 100644 --- a/blackdoc/tests/data.py +++ b/blackdoc/tests/data.py @@ -16,3 +16,23 @@ False """ lines = docstring.split("\n") +code_units = (1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1) +line_labels = ( + "none", + "none", + "none", + "none", + "doctest", + "doctest", + "doctest", + "doctest", + "doctest", + "none", + "none", + "none", + "none", + "none", + "doctest", + "none", + "none", +) diff --git a/blackdoc/tests/test_classification.py b/blackdoc/tests/test_classification.py new file mode 100644 index 0000000..6d74bf6 --- /dev/null +++ b/blackdoc/tests/test_classification.py @@ -0,0 +1,60 @@ +import more_itertools + +from blackdoc import classification + +from . import data + + +def print_line_with_range(name, range_, unit): + min_, max_ = range_ + line_numbers = range(min_, max_) + + no_group = " " + start_group = "┐" + mid_group = "│" + end_group = "┘" + + for index, (lineno, line) in enumerate(zip(line_numbers, unit.split("\n"))): + if max_ - min_ == 1: + classifier = no_group + elif index == 0: + classifier = start_group + elif index == max_ - min_ - 1: + classifier = end_group + else: + classifier = mid_group + + print(f"{name:>8s} {classifier} → {index:02d}: {line}") + + +def print_classification(labeled): + for range, name, unit in labeled: + print_line_with_range(name, range, unit) + + +def test_classify(): + lines = enumerate(data.lines, start=1) + + classified = tuple(classification.classify(lines)) + + print_classification(classified) + + actual = tuple(max_ - min_ for (min_, max_), _, _ in classified) + expected = data.code_units + assert expected == actual + + actual = tuple( + more_itertools.collapse( + [name] * len(lines.split("\n")) for _, name, lines in classified + ) + ) + expected = data.line_labels + assert expected == actual + + +def test_unclassify(): + labeled = tuple(zip(data.line_labels, data.lines)) + actual = tuple(classification.unclassify(labeled)) + expected = tuple(data.lines) + + assert expected == actual From 2f12c5766588df19727cc7739e30e0318038096a Mon Sep 17 00:00:00 2001 From: keewis Date: Tue, 17 Mar 2020 00:11:31 +0100 Subject: [PATCH 17/24] Apply suggestions from code review --- blackdoc/formats/doctest.py | 2 +- blackdoc/tests/test_doctest.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/blackdoc/formats/doctest.py b/blackdoc/formats/doctest.py index 2736eeb..9505c9f 100644 --- a/blackdoc/formats/doctest.py +++ b/blackdoc/formats/doctest.py @@ -38,7 +38,7 @@ def detection_func(lines): line_numbers, lines = map(tuple, more_itertools.unzip(detected_lines)) line_range = min(line_numbers), max(line_numbers) + 1 - if set(line_numbers) != set(range(line_range[0], line_range[1])): + if line_numbers != tuple(range(line_range[0], line_range[1])): raise RuntimeError("line numbers are not contiguous") return line_range, name, "\n".join(lines) diff --git a/blackdoc/tests/test_doctest.py b/blackdoc/tests/test_doctest.py index 0df07db..b2d884a 100644 --- a/blackdoc/tests/test_doctest.py +++ b/blackdoc/tests/test_doctest.py @@ -11,7 +11,7 @@ @pytest.mark.parametrize( "lines,expected", ( - pytest.param(lines[0], None, id="no_doctest"), + pytest.param(lines[0], None, id="no_line"), pytest.param(lines[8], ((1, 2), doctest.name, lines[8]), id="single_line"), pytest.param( lines[4:8], From d7b3cb2a6c51aceed96eec7c6033a99c5844a1f2 Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 17 Mar 2020 01:10:26 +0100 Subject: [PATCH 18/24] use consume(iterable, n=1) instead of next --- blackdoc/formats/doctest.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/blackdoc/formats/doctest.py b/blackdoc/formats/doctest.py index 9505c9f..60d735e 100644 --- a/blackdoc/formats/doctest.py +++ b/blackdoc/formats/doctest.py @@ -18,8 +18,9 @@ def continuation_lines(lines): if not line.lstrip().startswith(continuation_prompt): break - # consume the line - next(lines) + # actually consume the item + more_itertools.consume(lines, n=1) + yield line_number, line From 71c77f288472d239fa9cb2781c3f48879e3771b2 Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 17 Mar 2020 01:12:57 +0100 Subject: [PATCH 19/24] shorten the error message a bit --- blackdoc/classification.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/blackdoc/classification.py b/blackdoc/classification.py index 19b06b6..863e957 100644 --- a/blackdoc/classification.py +++ b/blackdoc/classification.py @@ -16,8 +16,9 @@ def classify(lines): if not detected: yield detection_funcs["none"](lines) elif len(detected) > 1: + formatted_match_names = ", ".join(sorted(detected.keys())) raise RuntimeError( - f"cannot classify line: {', '.join(detected.values())} claim it: {lines.peek()}" + f"cannot classify line: {formatted_match_names} claim it: {lines.peek()}" ) else: yield more_itertools.one(detected.values()) From 4c8f09a58a86d99619b6f8afeb2ce76624a28594 Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 17 Mar 2020 15:51:11 +0100 Subject: [PATCH 20/24] remove the obsolete blackdoc test file --- blackdoc/tests/test_blackdoc.py | 122 -------------------------------- 1 file changed, 122 deletions(-) delete mode 100644 blackdoc/tests/test_blackdoc.py diff --git a/blackdoc/tests/test_blackdoc.py b/blackdoc/tests/test_blackdoc.py deleted file mode 100644 index 286bab9..0000000 --- a/blackdoc/tests/test_blackdoc.py +++ /dev/null @@ -1,122 +0,0 @@ -import more_itertools - -import blackdoc - -raw_docstring = """ a function to open files - - with a very long description - - >>> file = open( - ... "very_long_filepath", - ... mode="a", - ... ) - >>> file - <_io.TextIOWrapper name='very_long_filepath' mode='w' encoding='UTF-8'> - - text after the first example, spanning - multiple lines - - >>> file.closed - False -""" -line_labels = ( - "none", - "none", - "none", - "none", - "doctest", - "doctest", - "doctest", - "doctest", - "doctest", - "none", - "none", - "none", - "none", - "none", - "doctest", - "none", - "none", -) -code_units = (1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1) -docstring = """ a function to open files - - with a very long description - - >>> file = open("very_long_filepath", mode="a",) - >>> file - <_io.TextIOWrapper name='very_long_filepath' mode='w' encoding='UTF-8'> - - text after the first example, spanning - multiple lines - - >>> file.closed - False -""" - -prompts = ( - None, - None, - None, - None, - ">>> ", - "... ", - "... ", - "... ", - ">>> ", - None, - None, - None, - None, - None, - ">>> ", - None, - None, -) - - -def test_extract_prompt(): - extracted = tuple( - blackdoc.extract_prompt(line) for line in raw_docstring.split("\n") - ) - assert extracted == prompts - - -def test_classify(): - categories, _ = more_itertools.unzip(blackdoc.classify(raw_docstring.split("\n"))) - - assert tuple(categories) == line_labels - - -def test_unclassify(): - labelled_lines = zip(line_labels, raw_docstring.split("\n")) - lines = blackdoc.unclassify(labelled_lines) - - assert "\n".join(lines) == raw_docstring - - -def test_group_code_units(): - labelled_lines = list(zip(line_labels, raw_docstring.split("\n"))) - grouped = list(blackdoc.group_code_units(labelled_lines)) - - assert tuple(len(unit.split("\n")) for _, unit in grouped) == code_units - - -def test_blacken(): - def join(group): - if len(group) == 1: - return group - - categories, lines = more_itertools.unzip(group) - return more_itertools.first(categories), "\n".join(lines) - - labelled_lines = zip(line_labels, raw_docstring.split("\n")) - grouped = ( - tuple(more_itertools.collapse(join(group))) - for group in more_itertools.split_into(labelled_lines, code_units) - ) - - formatted = blackdoc.blacken(grouped) - formatted_docstring = "\n".join(unit for _, unit in formatted) - - assert formatted_docstring == docstring From a341470dd07fe1a6970af8023a55df6e6c4936a2 Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 17 Mar 2020 15:51:39 +0100 Subject: [PATCH 21/24] rename classify to detect_format --- blackdoc/__init__.py | 4 ++-- blackdoc/classification.py | 5 +++-- blackdoc/tests/test_classification.py | 10 +++++----- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/blackdoc/__init__.py b/blackdoc/__init__.py index ccd3dca..d7a2fb3 100644 --- a/blackdoc/__init__.py +++ b/blackdoc/__init__.py @@ -1,5 +1,5 @@ from .blacken import blacken -from .classification import classify, unclassify +from .classification import detect_format, unclassify from .formats import register_format # noqa @@ -10,7 +10,7 @@ def line_numbers(lines): def format_lines(lines, mode=None): numbered = line_numbers(lines) - labeled = classify(numbered) + labeled = detect_format(numbered) blackened = blacken(labeled, mode=mode) return unclassify(blackened) diff --git a/blackdoc/classification.py b/blackdoc/classification.py index 863e957..b1de12e 100644 --- a/blackdoc/classification.py +++ b/blackdoc/classification.py @@ -3,7 +3,7 @@ from .formats import detection_funcs -def classify(lines): +def detect_format(lines): lines = more_itertools.peekable(lines) while lines: maybe_detected = ( @@ -18,7 +18,8 @@ def classify(lines): elif len(detected) > 1: formatted_match_names = ", ".join(sorted(detected.keys())) raise RuntimeError( - f"cannot classify line: {formatted_match_names} claim it: {lines.peek()}" + "cannot detect code format for line:" + f" it is claimed by {formatted_match_names}: {lines.peek()}" ) else: yield more_itertools.one(detected.values()) diff --git a/blackdoc/tests/test_classification.py b/blackdoc/tests/test_classification.py index 6d74bf6..35b226e 100644 --- a/blackdoc/tests/test_classification.py +++ b/blackdoc/tests/test_classification.py @@ -32,20 +32,20 @@ def print_classification(labeled): print_line_with_range(name, range, unit) -def test_classify(): +def test_detect_format(): lines = enumerate(data.lines, start=1) - classified = tuple(classification.classify(lines)) + labeled = tuple(classification.detect_format(lines)) - print_classification(classified) + print_classification(labeled) - actual = tuple(max_ - min_ for (min_, max_), _, _ in classified) + actual = tuple(max_ - min_ for (min_, max_), _, _ in labeled) expected = data.code_units assert expected == actual actual = tuple( more_itertools.collapse( - [name] * len(lines.split("\n")) for _, name, lines in classified + [name] * len(lines.split("\n")) for _, name, lines in labeled ) ) expected = data.line_labels From 6cb15c2efc999710172ff1334dde14c0289da63f Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 17 Mar 2020 15:53:35 +0100 Subject: [PATCH 22/24] remove unclassify --- blackdoc/__init__.py | 4 ++-- blackdoc/blacken.py | 4 ++-- blackdoc/classification.py | 5 ----- blackdoc/tests/test_classification.py | 8 -------- 4 files changed, 4 insertions(+), 17 deletions(-) diff --git a/blackdoc/__init__.py b/blackdoc/__init__.py index d7a2fb3..74ecaa7 100644 --- a/blackdoc/__init__.py +++ b/blackdoc/__init__.py @@ -1,5 +1,5 @@ from .blacken import blacken -from .classification import detect_format, unclassify +from .classification import detect_format from .formats import register_format # noqa @@ -13,4 +13,4 @@ def format_lines(lines, mode=None): labeled = detect_format(numbered) blackened = blacken(labeled, mode=mode) - return unclassify(blackened) + return blackened diff --git a/blackdoc/blacken.py b/blackdoc/blacken.py index 64af1bf..aaecc3d 100644 --- a/blackdoc/blacken.py +++ b/blackdoc/blacken.py @@ -21,7 +21,7 @@ def update_line_number(message, original_number): def blacken(lines, mode=None): for original_line_range, category, line_unit in lines: if category == "none": - yield category, line_unit + yield line_unit continue indentation_depth, prompt_length, code = extract_code(line_unit, category) @@ -44,4 +44,4 @@ def blacken(lines, mode=None): reformatted = reformat_code(blackened, category, indentation_depth) - yield category, reformatted + yield reformatted diff --git a/blackdoc/classification.py b/blackdoc/classification.py index b1de12e..715006a 100644 --- a/blackdoc/classification.py +++ b/blackdoc/classification.py @@ -23,8 +23,3 @@ def detect_format(lines): ) else: yield more_itertools.one(detected.values()) - - -def unclassify(labelled_lines): - for _, line in labelled_lines: - yield line diff --git a/blackdoc/tests/test_classification.py b/blackdoc/tests/test_classification.py index 35b226e..4478743 100644 --- a/blackdoc/tests/test_classification.py +++ b/blackdoc/tests/test_classification.py @@ -50,11 +50,3 @@ def test_detect_format(): ) expected = data.line_labels assert expected == actual - - -def test_unclassify(): - labeled = tuple(zip(data.line_labels, data.lines)) - actual = tuple(classification.unclassify(labeled)) - expected = tuple(data.lines) - - assert expected == actual From b7c80f93bf36a4d939d7f9b76bdb0f3819d27afe Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 17 Mar 2020 16:21:40 +0100 Subject: [PATCH 23/24] rewrite the message updating function to only parse messages --- blackdoc/blacken.py | 45 ++++++++++++++++++++++------------ blackdoc/tests/test_blacken.py | 18 ++++++++++++++ 2 files changed, 48 insertions(+), 15 deletions(-) create mode 100644 blackdoc/tests/test_blacken.py diff --git a/blackdoc/blacken.py b/blackdoc/blacken.py index aaecc3d..2ee9cb9 100644 --- a/blackdoc/blacken.py +++ b/blackdoc/blacken.py @@ -7,15 +7,24 @@ from .formats import extract_code, reformat_code -def update_line_number(message, original_number): - line_re = re.compile(r"(?P\d+):(?P\d+):") - match = line_re.search(message) - if match: - line_number, column_number = map(int, match.groups()) - new_line_number = line_number + original_number - 1 +def parse_message(message): + line_re = re.compile( + r"^(?P[^:]+): (?P\d+):" + r"(?P\d+): (?P.+)$" + ) - message = line_re.sub(f"{new_line_number}:{column_number}:", message) - return message + types = { + "message": str, + "line_number": int, + "column_number": int, + "faulty_line": str, + } + + match = line_re.match(message) + if match is None: + raise ValueError(f"invalid error message: {message}") + + return tuple(types[key](value) for key, value in match.groupdict().items()) def blacken(lines, mode=None): @@ -29,18 +38,24 @@ def blacken(lines, mode=None): current_mode = black.FileMode() if mode is None else copy.copy(mode) current_mode.line_length -= indentation_depth + prompt_length + original_line_number, _ = original_line_range + try: blackened = black.format_str(code, mode=current_mode).rstrip() except TokenError as e: - apparent_line_num, column = e.args[1] - message = e.args[0] - lineno = original_line_range[0] + (apparent_line_num - 1) - faulty_line = code.split("\n")[(apparent_line_num - 1) - 1] + message, (apparent_line_number, column) = e.args - raise black.InvalidInput(f"{lineno}:{column}: {message}: {faulty_line}") + lineno = original_line_number + (apparent_line_number - 1) + faulty_line = code.split("\n")[(apparent_line_number - 1) - 1] + + raise black.InvalidInput( + f"Cannot parse: {lineno}:{column}: {message}: {faulty_line}" + ) except black.InvalidInput as e: - message = update_line_number(str(e), original_line_range[0]) - raise black.InvalidInput(message) + message, apparent_line_number, column, faulty_line = parse_message(str(e)) + + lineno = original_line_number + (apparent_line_number - 1) + raise black.InvalidInput(f"{message}: {lineno}:{column}: {faulty_line}") reformatted = reformat_code(blackened, category, indentation_depth) diff --git a/blackdoc/tests/test_blacken.py b/blackdoc/tests/test_blacken.py new file mode 100644 index 0000000..f029ebc --- /dev/null +++ b/blackdoc/tests/test_blacken.py @@ -0,0 +1,18 @@ +import pytest + +from blackdoc.blacken import parse_message + + +@pytest.mark.parametrize( + "message,expected", + ( + pytest.param( + 'Cannot parse: 16:10: with new_open("abc) as f:', + ("Cannot parse", 16, 10, 'with new_open("abc) as f:'), + id="simple_message", + ), + ), +) +def test_parse_message(message, expected): + actual = parse_message(message) + assert expected == actual From 685eceaafc9b2aac5aa581fc7c9d1ae57a0f75b2 Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 17 Mar 2020 16:25:31 +0100 Subject: [PATCH 24/24] rename category to code_format --- blackdoc/blacken.py | 8 ++++---- blackdoc/formats/__init__.py | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/blackdoc/blacken.py b/blackdoc/blacken.py index 2ee9cb9..4a18bfe 100644 --- a/blackdoc/blacken.py +++ b/blackdoc/blacken.py @@ -28,12 +28,12 @@ def parse_message(message): def blacken(lines, mode=None): - for original_line_range, category, line_unit in lines: - if category == "none": + for original_line_range, code_format, line_unit in lines: + if code_format == "none": yield line_unit continue - indentation_depth, prompt_length, code = extract_code(line_unit, category) + indentation_depth, prompt_length, code = extract_code(line_unit, code_format) current_mode = black.FileMode() if mode is None else copy.copy(mode) current_mode.line_length -= indentation_depth + prompt_length @@ -57,6 +57,6 @@ def blacken(lines, mode=None): lineno = original_line_number + (apparent_line_number - 1) raise black.InvalidInput(f"{message}: {lineno}:{column}: {faulty_line}") - reformatted = reformat_code(blackened, category, indentation_depth) + reformatted = reformat_code(blackened, code_format, indentation_depth) yield reformatted diff --git a/blackdoc/formats/__init__.py b/blackdoc/formats/__init__.py index 196b422..90f535d 100644 --- a/blackdoc/formats/__init__.py +++ b/blackdoc/formats/__init__.py @@ -5,22 +5,22 @@ from .register import extraction_funcs, reformatting_funcs, register_format -def extract_code(line_unit, category): +def extract_code(line_unit, code_format): dedented = textwrap.dedent(line_unit) indentation_depth = line_unit.find(dedented[:5]) - func = extraction_funcs.get(category, None) + func = extraction_funcs.get(code_format, None) if func is None: - raise RuntimeError(f"unknown code format: {category}") + raise RuntimeError(f"unknown code format: {code_format}") prompt_length, extracted = func(dedented) return indentation_depth, prompt_length, extracted -def reformat_code(line_unit, category, indentation_depth): - func = reformatting_funcs.get(category, None) +def reformat_code(line_unit, code_format, indentation_depth): + func = reformatting_funcs.get(code_format, None) if func is None: - raise RuntimeError(f"unknown code format: {category}") + raise RuntimeError(f"unknown code format: {code_format}") reformatted = func(line_unit)