Skip to content

Commit

Permalink
fix: rework exclusion parsing to fix #1779
Browse files Browse the repository at this point in the history
  • Loading branch information
nedbat committed May 15, 2024
1 parent 75f9d51 commit 96bd930
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 102 deletions.
4 changes: 4 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,17 @@ upgrading your version of coverage.py.
Unreleased
----------

- Fix: nested matches of exclude patterns could exclude too much code, as
reported in `issue 1779`_. This is now fixed.

- In the HTML report, the filter term and "hide covered" checkbox settings are
remembered between viewings, thanks to `Daniel Diniz <pull 1776_>`_.

- Python 3.13.0b1 is supported.


.. _pull 1776: https://github.com/nedbat/coveragepy/pull/1776
.. _issue 1779: https://github.com/nedbat/coveragepy/issues/1779


.. scriv-start-here
Expand Down
118 changes: 51 additions & 67 deletions coverage/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from coverage.bytecode import code_objects
from coverage.debug import short_stack
from coverage.exceptions import NoSource, NotPython
from coverage.misc import join_regex, nice_pair
from coverage.misc import nice_pair
from coverage.phystokens import generate_tokens
from coverage.types import TArc, TLineNo

Expand Down Expand Up @@ -62,8 +62,8 @@ def __init__(

self.exclude = exclude

# The text lines of the parsed code.
self.lines: list[str] = self.text.split("\n")
# The parsed AST of the text.
self._ast_root: ast.AST | None = None

# The normalized line numbers of the statements in the code. Exclusions
# are taken into account, and statements are adjusted to their first
Expand Down Expand Up @@ -101,19 +101,16 @@ def __init__(
self._all_arcs: set[TArc] | None = None
self._missing_arc_fragments: TArcFragments | None = None

@functools.lru_cache()
def lines_matching(self, *regexes: str) -> set[TLineNo]:
"""Find the lines matching one of a list of regexes.
def lines_matching(self, regex: str) -> set[TLineNo]:
"""Find the lines matching a regex.
Returns a set of line numbers, the lines that contain a match for one
of the regexes in `regexes`. The entire line needn't match, just a
part of it.
Returns a set of line numbers, the lines that contain a match for
`regex`. The entire line needn't match, just a part of it.
"""
combined = join_regex(regexes)
regex_c = re.compile(combined)
regex_c = re.compile(regex)
matches = set()
for i, ltext in enumerate(self.lines, start=1):
for i, ltext in enumerate(self.text.split("\n"), start=1):
if regex_c.search(ltext):
matches.add(self._multiline.get(i, i))
return matches
Expand All @@ -127,26 +124,18 @@ def _raw_parse(self) -> None:
# Find lines which match an exclusion pattern.
if self.exclude:
self.raw_excluded = self.lines_matching(self.exclude)
self.excluded = set(self.raw_excluded)

# Tokenize, to find excluded suites, to find docstrings, and to find
# multi-line statements.

# The last token seen. Start with INDENT to get module docstrings
prev_toktype: int = token.INDENT
# The current number of indents.
indent: int = 0
# An exclusion comment will exclude an entire clause at this indent.
exclude_indent: int = 0
# Are we currently excluding lines?
excluding: bool = False
# Are we excluding decorators now?
excluding_decorators: bool = False
# The line number of the first line in a multi-line statement.
first_line: int = 0
# Is the file empty?
empty: bool = True
# Is this the first token on a line?
first_on_line: bool = True
# Parenthesis (and bracket) nesting level.
nesting: int = 0

Expand All @@ -162,42 +151,22 @@ def _raw_parse(self) -> None:
indent += 1
elif toktype == token.DEDENT:
indent -= 1
elif toktype == token.NAME:
if ttext == "class":
# Class definitions look like branches in the bytecode, so
# we need to exclude them. The simplest way is to note the
# lines with the "class" keyword.
self.raw_classdefs.add(slineno)
elif toktype == token.OP:
if ttext == ":" and nesting == 0:
should_exclude = (
self.raw_excluded.intersection(range(first_line, elineno + 1))
or excluding_decorators
self.excluded.intersection(range(first_line, elineno + 1))
)
if not excluding and should_exclude:
# Start excluding a suite. We trigger off of the colon
# token so that the #pragma comment will be recognized on
# the same line as the colon.
self.raw_excluded.add(elineno)
self.excluded.add(elineno)
exclude_indent = indent
excluding = True
excluding_decorators = False
elif ttext == "@" and first_on_line:
# A decorator.
if elineno in self.raw_excluded:
excluding_decorators = True
if excluding_decorators:
self.raw_excluded.add(elineno)
elif ttext in "([{":
nesting += 1
elif ttext in ")]}":
nesting -= 1
elif toktype == token.STRING:
if prev_toktype == token.INDENT:
# Strings that are first on an indented line are docstrings.
# (a trick from trace.py in the stdlib.) This works for
# 99.9999% of cases.
self.raw_docstrings.update(range(slineno, elineno+1))
elif toktype == token.NEWLINE:
if first_line and elineno != first_line:
# We're at the end of a line, and we've ended on a
Expand All @@ -206,7 +175,6 @@ def _raw_parse(self) -> None:
for l in range(first_line, elineno+1):
self._multiline[l] = first_line
first_line = 0
first_on_line = True

if ttext.strip() and toktype != tokenize.COMMENT:
# A non-white-space token.
Expand All @@ -218,10 +186,7 @@ def _raw_parse(self) -> None:
if excluding and indent <= exclude_indent:
excluding = False
if excluding:
self.raw_excluded.add(elineno)
first_on_line = False

prev_toktype = toktype
self.excluded.add(elineno)

# Find the starts of the executable statements.
if not empty:
Expand All @@ -234,6 +199,34 @@ def _raw_parse(self) -> None:
if env.PYBEHAVIOR.module_firstline_1 and self._multiline:
self._multiline[1] = min(self.raw_statements)

self.excluded = self.first_lines(self.excluded)

# AST lets us find classes, docstrings, and decorator-affected
# functions and classes.
assert self._ast_root is not None
for node in ast.walk(self._ast_root):
# Find class definitions.
if isinstance(node, ast.ClassDef):
self.raw_classdefs.add(node.lineno)
# Find docstrings.
if isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef, ast.Module)):
if node.body:
first = node.body[0]
if (
isinstance(first, ast.Expr)
and isinstance(first.value, ast.Constant)
and isinstance(first.value.value, str)
):
self.raw_docstrings.update(
range(first.lineno, cast(int, first.end_lineno) + 1)
)
# Exclusions carry from decorators and signatures to the bodies of
# functions and classes.
if isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)):
first_line = min((d.lineno for d in node.decorator_list), default=node.lineno)
if self.excluded.intersection(range(first_line, node.lineno + 1)):
self.excluded.update(range(first_line, cast(int, node.end_lineno) + 1))

@functools.lru_cache(maxsize=1000)
def first_line(self, lineno: TLineNo) -> TLineNo:
"""Return the first line number of the statement including `lineno`."""
Expand Down Expand Up @@ -268,19 +261,14 @@ def parse_source(self) -> None:
"""
try:
self._ast_root = ast.parse(self.text)
self._raw_parse()
except (tokenize.TokenError, IndentationError, SyntaxError) as err:
if hasattr(err, "lineno"):
lineno = err.lineno # IndentationError
else:
lineno = err.args[1][0] # TokenError
except (IndentationError, SyntaxError) as err:
raise NotPython(
f"Couldn't parse '{self.filename}' as Python source: " +
f"{err.args[0]!r} at line {lineno}",
f"{err.args[0]!r} at line {err.lineno}",
) from err

self.excluded = self.first_lines(self.raw_excluded)

ignore = self.excluded | self.raw_docstrings
starts = self.raw_statements - ignore
self.statements = self.first_lines(starts) - ignore
Expand All @@ -303,7 +291,8 @@ def _analyze_ast(self) -> None:
`_all_arcs` is the set of arcs in the code.
"""
aaa = AstArcAnalyzer(self.text, self.raw_statements, self._multiline)
assert self._ast_root is not None
aaa = AstArcAnalyzer(self._ast_root, self.raw_statements, self._multiline)
aaa.analyze()

self._all_arcs = set()
Expand Down Expand Up @@ -403,14 +392,9 @@ def __init__(
self.code = code
else:
assert filename is not None
try:
self.code = compile(text, filename, "exec", dont_inherit=True)
except SyntaxError as synerr:
raise NotPython(
"Couldn't parse '%s' as Python source: '%s' at line %d" % (
filename, synerr.msg, synerr.lineno or 0,
),
) from synerr
# We only get here if earlier ast parsing succeeded, so no need to
# catch errors.
self.code = compile(text, filename, "exec", dont_inherit=True)

def child_parsers(self) -> Iterable[ByteParser]:
"""Iterate over all the code objects nested within this one.
Expand Down Expand Up @@ -685,11 +669,11 @@ class AstArcAnalyzer:

def __init__(
self,
text: str,
root_node: ast.AST,
statements: set[TLineNo],
multiline: dict[TLineNo, TLineNo],
) -> None:
self.root_node = ast.parse(text)
self.root_node = root_node
# TODO: I think this is happening in too many places.
self.statements = {multiline.get(l, l) for l in statements}
self.multiline = multiline
Expand Down
6 changes: 4 additions & 2 deletions coverage/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,10 @@ def translate_arcs(self, arcs: Iterable[TArc]) -> set[TArc]:
def no_branch_lines(self) -> set[TLineNo]:
assert self.coverage is not None
no_branch = self.parser.lines_matching(
join_regex(self.coverage.config.partial_list),
join_regex(self.coverage.config.partial_always_list),
join_regex(
self.coverage.config.partial_list
+ self.coverage.config.partial_always_list
)
)
return no_branch

Expand Down
22 changes: 14 additions & 8 deletions lab/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def one_file(self, options, filename):

if options.dis:
print("Main code:")
disassemble(pyparser)
disassemble(pyparser.text)

arcs = pyparser.arcs()

Expand All @@ -95,8 +95,8 @@ def one_file(self, options, filename):

exit_counts = pyparser.exit_counts()

for lineno, ltext in enumerate(pyparser.lines, start=1):
marks = [' ', ' ', ' ', ' ', ' ']
for lineno, ltext in enumerate(pyparser.text.splitlines(), start=1):
marks = [' '] * 6
a = ' '
if lineno in pyparser.raw_statements:
marks[0] = '-'
Expand All @@ -110,7 +110,13 @@ def one_file(self, options, filename):
if lineno in pyparser.raw_classdefs:
marks[3] = 'C'
if lineno in pyparser.raw_excluded:
marks[4] = 'x'
marks[4] = 'X'
elif lineno in pyparser.excluded:
marks[4] = '×'
if lineno in pyparser._multiline.values():
marks[5] = 'o'
elif lineno in pyparser._multiline.keys():
marks[5] = '.'

if arc_chars:
a = arc_chars[lineno].ljust(arc_width)
Expand Down Expand Up @@ -173,13 +179,13 @@ def all_code_objects(code):
yield code


def disassemble(pyparser):
def disassemble(text):
"""Disassemble code, for ad-hoc experimenting."""

code = compile(pyparser.text, "", "exec", dont_inherit=True)
code = compile(text, "", "exec", dont_inherit=True)
for code_obj in all_code_objects(code):
if pyparser.text:
srclines = pyparser.text.splitlines()
if text:
srclines = text.splitlines()
else:
srclines = None
print("\n%s: " % code_obj)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,12 +664,12 @@ def test_module_docstring(self) -> None:
[2, 3],
)
self.check_coverage("""\
# Start with a comment, because it changes the behavior(!?)
# Start with a comment, even though it doesn't change the behavior.
'''I am a module docstring.'''
a = 3
b = 4
""",
[2, 3, 4],
[3, 4],
)


Expand Down
Loading

0 comments on commit 96bd930

Please sign in to comment.