Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cylc lint S007: fix regex catastrophic backtracking #6078

Merged
merged 1 commit into from
Apr 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes.d/6078.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed bug where `cylc lint` could hang when checking `inherit` settings in `flow.cylc`.
86 changes: 33 additions & 53 deletions cylc/flow/scripts/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,38 @@ def check_indentation(line: str) -> bool:
return bool(len(match[0]) % 4 != 0)


INHERIT_REGEX = re.compile(r'\s*inherit\s*=\s*(.*)')
FAM_NAME_IGNORE_REGEX = re.compile(
# Stuff we want to ignore when checking for lowercase in family names
r'''
# comments
(?<!{)\#.*
# or Cylc parameters
| <[^>]+>
# or Jinja2
| {{.*?}} | {%.*?%} | {\#.*?\#}
# or EmPy
| (@[\[{\(]).*([\]\}\)])
''',
re.X
)
LOWERCASE_REGEX = re.compile(r'[a-z]')


def check_lowercase_family_names(line: str) -> bool:
"""Check for lowercase in family names."""
match = INHERIT_REGEX.match(line)
if not match:
return False
# Replace stuff we want to ignore with a neutral char (tilde will do):
content = FAM_NAME_IGNORE_REGEX.sub('~', match.group(1))
return any(
LOWERCASE_REGEX.search(i)
for i in content.split(',')
if i.strip(' \'"') not in {'None', 'none', 'root'}
)


FUNCTION = 'function'

STYLE_GUIDE = (
Expand Down Expand Up @@ -351,62 +383,10 @@ def check_indentation(line: str) -> bool:
'url': STYLE_GUIDE + 'trailing-whitespace',
FUNCTION: re.compile(r'[ \t]$').findall
},
# Look for families both from inherit=FAMILY and FAMILY:trigger-all/any.
# Do not match inherit lines with `None` at the start.
"S007": {
'short': 'Family name contains lowercase characters.',
'url': STYLE_GUIDE + 'task-naming-conventions',
FUNCTION: re.compile(
r'''
# match all inherit statements
^\s*inherit\s*=
# filtering out those which match only valid family names
(?!
\s*
# none, None and root are valid family names
# and `inherit =` or `inherit = # x` are valid too
(['"]?(none|None|root|\#.*|$)['"]?|
(
# as are families named with capital letters
[A-Z0-9_-]+
# and optional quotes
| [\'\"]
# which may include Cylc parameters
| (<[^>]+>)
# or Jinja2
| ({[{%].*[%}]})
# or EmPy
| (@[\[{\(]).*([\]\}\)])
)+
)
# this can be a comma separated list
(
\s*,\s*
# none, None and root are valid family names
(['"]?(none|None|root)['"]?|
(
# as are families named with capital letters
[A-Z0-9_-]+
# and optional quotes
| [\'\"]
# which may include Cylc parameters
| (<[^>]+>)
# or Jinja2
| ({[{%].*[%}]})
# or EmPy
| (@[\[{\(]).*([\]\}\)])
)+
)
)*
# allow trailing commas and whitespace
\s*,?\s*
# allow trailing comments
(\#.*)?
$
)
''',
re.X
).findall,
FUNCTION: check_lowercase_family_names,
},
"S008": {
'short': JINJA2_FOUND_WITHOUT_SHEBANG,
Expand Down
131 changes: 68 additions & 63 deletions tests/unit/scripts/test_lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

from cylc.flow.scripts.lint import (
MANUAL_DEPRECATIONS,
check_lowercase_family_names,
get_cylc_files,
get_pyproject_toml,
get_reference_rst,
Expand Down Expand Up @@ -240,77 +241,81 @@ def test_check_cylc_file_line_no():


@pytest.mark.parametrize(
'inherit_line',
(
param(item, id=str(ind))
for ind, item in enumerate([
Comment on lines -245 to -246
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just using the index was making it kind of hard to tell which test cases were failing

# lowercase family names are not permitted
'inherit = g',
'inherit = foo, b, a, r',
'inherit = FOO, bar',
'inherit = None, bar',
'inherit = A, b, C',
'inherit = "A", "b"',
"inherit = 'A', 'b'",
# parameters, jinja2 and empy should be ignored
# but any lowercase chars before or after should not
'inherit = a<x>z',
'inherit = a{{ x }}z',
'inherit = a@( x )z',
])
)
'line',
[
# lowercase family names are not permitted
'inherit = g',
'inherit = FOO, bar',
'inherit = None, bar',
'inherit = A, b, C',
'inherit = "A", "b"',
"inherit = 'A', 'b'",
'inherit = FOO_BAr',
# whitespace & trailing commas
' inherit = a , ',
# parameters, jinja2 and empy should be ignored
# but any lowercase chars before or after should not
'inherit = A<x>z',
'inherit = A{{ x }}z',
'inherit = N{# #}one',
'inherit = A@( x )z',
]
)
def test_inherit_lowercase_matches(inherit_line):
lint = lint_text(inherit_line, ['style'])
assert any('S007' in msg for msg in lint.messages)
def test_check_lowercase_family_names__true(line):
assert check_lowercase_family_names(line) is True


@pytest.mark.parametrize(
'inherit_line',
(
param(item, id=str(ind))
for ind, item in enumerate([
# undefined values are ok
'inherit =',
'inherit = ',
# none, None and root are ok
'inherit = none',
'inherit = None',
'inherit = root',
# trailing commas and whitespace are ok
'inherit = None,',
'inherit = None, ',
'inherit = None , ',
# uppercase family names are ok
'inherit = None, FOO, BAR',
'inherit = FOO',
'inherit = BAZ',
'inherit = root',
'inherit = FOO_BAR_0',
# parameters should be ignored
'inherit = A<a>Z',
'inherit = <a=1, b-1, c+1>',
# jinja2 should be ignored
'line',
[
# undefined values are ok
'inherit =',
'inherit = ',
# none, None and root are ok
'inherit = none',
'inherit = None',
'inherit = root',
# whitespace & trailing commas
'inherit = None,',
'inherit = None, ',
' inherit = None , ',
# uppercase family names are ok
'inherit = None, FOO, BAR',
'inherit = FOO',
'inherit = FOO_BAR_0',
# parameters should be ignored
'inherit = A<a>Z',
'inherit = <a=1, b-1, c+1>',
# jinja2 should be ignored
param(
'inherit = A{{ a }}Z, {% for x in range(5) %}'
'A{{ x }}, {% endfor %}',
# empy should be ignored
'inherit = A@( a )Z',
# trailing comments should be ignored
'inherit = A, B # no, comment',
'inherit = # a',
# quotes are ok
'inherit = "A", "B"',
"inherit = 'A', 'B'",
'inherit = "None", B',
'inherit = <a = 1, b - 1>',
# one really awkward, but valid example
id='jinja2-long'
),
# empy should be ignored
'inherit = A@( a )Z',
# trailing comments should be ignored
'inherit = A, B # no, comment',
'inherit = # a',
# quotes are ok
'inherit = "A", "B"',
"inherit = 'A', 'B'",
'inherit = "None", B',
'inherit = <a = 1, b - 1>',
# one really awkward, but valid example
param(
'inherit = none, FOO_BAR_0, "<a - 1>", A<a>Z, A{{a}}Z, A@(a)Z',
])
)
id='awkward'
),
]
)
def test_inherit_lowercase_not_match_none(inherit_line):
lint = lint_text(inherit_line, ['style'])
assert not any('S007' in msg for msg in lint.messages)
def test_check_lowercase_family_names__false(line):
assert check_lowercase_family_names(line) is False


def test_inherit_lowercase_matches():
lint = lint_text('inherit = a', ['style'])
assert any('S007' in msg for msg in lint.messages)


@pytest.mark.parametrize(
Expand Down
Loading