Skip to content

Commit

Permalink
Update tests for trailing parens
Browse files Browse the repository at this point in the history
Also adopt balck and isort

Signed-off-by: Philippe Ombredanne <[email protected]>
  • Loading branch information
pombredanne committed Sep 4, 2024
1 parent f1a9bc1 commit 66f26f9
Show file tree
Hide file tree
Showing 8 changed files with 103 additions and 109 deletions.
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ testing =
pytest-xdist >= 2
aboutcode-toolkit >= 6.0.0
black
isort

docs =
Sphinx >= 3.3.1
Expand Down
3 changes: 1 addition & 2 deletions src/pygmars/lex.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,7 @@ def __init__(self, matchers, re_flags=0):
)

except (Exception, FutureWarning) as e:
raise InvalidLexerMatcher(
f"Invalid Lexer matcher: {m!r}, label: {label}") from e
raise InvalidLexerMatcher(f"Invalid Lexer matcher: {m!r}, label: {label}") from e

def tokenize(self, string, splitter=str.split):
"""
Expand Down
26 changes: 11 additions & 15 deletions src/pygmars/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def parse(self, tree):

for i in range(self._loop):
if trace:
print(f'\nparse: loop# {i}')
print(f"\nparse: loop# {i}")
for parse_rule in self.rules:
tree = parse_rule.parse(tree=tree, trace=trace)
return tree
Expand Down Expand Up @@ -201,9 +201,10 @@ class ParseString:
parse Tree from the backing pieces.
"""

# Anything that's not a delimiter such as <> or {}
LABEL_CHARS = r"[^\{\}<>]"
LABEL = fr"(<{LABEL_CHARS}+>)"
LABEL = rf"(<{LABEL_CHARS}+>)"

# return a True'ish value if the parse results look valid
is_valid = re.compile(r"^(\{?%s\}?)*?$" % LABEL).match
Expand Down Expand Up @@ -271,7 +272,7 @@ def to_tree(self, label="GROUP", pieces_splitter=re.compile(r"[{}]").split):

# Find the list of tokens contained in this piece.
length = piece.count("<")
subsequence = tree[index:index + length]
subsequence = tree[index : index + length]

# Add this list of tokens to our tree.
if matched:
Expand Down Expand Up @@ -387,10 +388,7 @@ def has_balanced_non_nested_curly_braces(string):
# this should probably be made more strict than it is -- e.g., it
# currently accepts 'foo'.
is_label_pattern = re.compile(
r"^((%s|<%s>)*)$" % (
r"([^{}<>]|{\d+,?}|{\d*,\d+})+",
r"[^{}<>]+"
)
r"^((%s|<%s>)*)$" % (r"([^{}<>]|{\d+,?}|{\d*,\d+})+", r"[^{}<>]+")
).match

remove_spaces = re.compile(r"\s").sub
Expand Down Expand Up @@ -432,11 +430,7 @@ def label_pattern_to_regex(label_pattern):
should not contain nested or mismatched angle-brackets.
"""
# Clean up the regular expression
label_pattern = (
remove_spaces("", label_pattern)
.replace("<", "(?:<(?:")
.replace(">", ")>)")
)
label_pattern = remove_spaces("", label_pattern).replace("<", "(?:<(?:").replace(">", ")>)")

# Check the regular expression
if not is_label_pattern(label_pattern):
Expand Down Expand Up @@ -475,7 +469,7 @@ def __init__(
self._root_label = root_label

regexp = label_pattern_to_regex(pattern)
regexp = fr"(?P<group>{regexp})"
regexp = rf"(?P<group>{regexp})"
self._regexp = regexp
# the replacement wraps matched tokens in curly braces
self._repl = "{\\g<group>}"
Expand Down Expand Up @@ -533,7 +527,7 @@ def parse(self, tree, trace=0):
if trace:
updated = re.sub(r"\{[^\{]+\}", f" <{self.label}> ", after_parse)
trace_elements.append("-------------------------------------")
trace_elements.append(f'Rule.parse: applied rule: {self!r}')
trace_elements.append(f"Rule.parse: applied rule: {self!r}")
trace_elements.append(f" Rule regex: {self._regexp}")
trace_elements.append(f" Input parsed to label: {self.label}")
trace_elements.append(f" before : {before_parse}")
Expand All @@ -542,7 +536,9 @@ def parse(self, tree, trace=0):
if trace > 1:
trace_elements.append(". . . . . . . . .. ")
trace_elements.append(tree.pformat())
trace_elements.append(f" with pattern: {self.description} ( {self.pattern!r} )")
trace_elements.append(
f" with pattern: {self.description} ( {self.pattern!r} )"
)

tree = parse_string.to_tree(self.label)

Expand Down
59 changes: 28 additions & 31 deletions src/pygmars/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,17 @@
(label='s', children=(
(label='dp', children=(
(label='d', children=(
the)
the))
(label='np', children=(
dog))
dog))))
(label='vp', children=(
(label='v', children=(
chased)
chased))
(label='dp', children=(
(label='d', children=(
the)
the))
(label='np', children=(
cat))))
cat))))))))
The node label is accessed using the `label` attribute:
>>> dp1.label, dp2.label, vp.label, tree.label
Expand Down Expand Up @@ -76,55 +76,55 @@ class Tree(list):
<BLANKLINE>
2
<BLANKLINE>
4)
5)
4
5
>>> vp = Tree('VP', [Tree('V', ['saw']), Tree('NP', ['him'])])
>>> s = Tree('S', [Tree('NP', ['I']), vp])
>>> print(s)
(label='S', children=(
(label='NP', children=(
I)
I))
(label='VP', children=(
(label='V', children=(
saw)
saw))
(label='NP', children=(
him)))
him))))))
>>> print(s[1])
(label='VP', children=(
(label='V', children=(
saw)
saw))
(label='NP', children=(
him))
him))))
>>> print(s[1,1])
(label='NP', children=(
him)
him))
>>> t = Tree.from_string("(S (NP I) (VP (V saw) (NP him)))")
>>> s == t
True
>>> print(t)
(label='S', children=(
(label='NP', children=(
I)
I))
(label='VP', children=(
(label='V', children=(
saw)
saw))
(label='NP', children=(
him)))
him))))))
>>> t[0], t[1,1] = t[1,1], t[0]
>>> print(t)
(label='S', children=(
(label='NP', children=(
him)
him))
(label='VP', children=(
(label='V', children=(
saw)
saw))
(label='NP', children=(
I)))
I))))))
The length of a tree is the number of children it has.
Expand Down Expand Up @@ -170,25 +170,22 @@ def __getitem__(self, index):
return self[index[0]][index[1:]]
else:
raise TypeError(
"%s indices must be integers, not %s" % (
type(self).__name__, type(index).__name__)
"%s indices must be integers, not %s" % (type(self).__name__, type(index).__name__)
)

def __setitem__(self, index, value):
if isinstance(index, (int, slice)):
return list.__setitem__(self, index, value)
elif isinstance(index, (list, tuple)):
if len(index) == 0:
raise IndexError(
"The tree position () may not be " "assigned to.")
raise IndexError("The tree position () may not be " "assigned to.")
elif len(index) == 1:
self[index[0]] = value
else:
self[index[0]][index[1:]] = value
else:
raise TypeError(
"%s indices must be integers, not %s" % (
type(self).__name__, type(index).__name__)
"%s indices must be integers, not %s" % (type(self).__name__, type(index).__name__)
)

def leaves(self):
Expand Down Expand Up @@ -275,8 +272,7 @@ def from_string(
if leaf_pattern is None:
leaf_pattern = r"[^\s%s%s]+" % (open_pattern, close_pattern)
token_re = re.compile(
r"%s\s*(%s)?|%s|(%s)" % (open_pattern,
node_pattern, close_pattern, leaf_pattern)
r"%s\s*(%s)?|%s|(%s)" % (open_pattern, node_pattern, close_pattern, leaf_pattern)
)
# Walk through each token, updating a stack of trees.
stack = [(None, [])] # list of (node, children) tuples
Expand Down Expand Up @@ -345,7 +341,7 @@ def _parse_error(cls, s, match, expecting):
if len(s) > pos + 10:
s = s[: pos + 10] + "..."
if pos > 10:
s = "..." + s[pos - 10:]
s = "..." + s[pos - 10 :]
offset = 13
msg += '\n%s"%s"\n%s^' % (" " * 16, s, " " * (17 + offset))
raise ValueError(msg)
Expand All @@ -371,19 +367,20 @@ def pformat(self, indent=0, *args, **kwargs):
subsequent lines.
:type indent: int
"""
closings = 0
if isinstance(self.label, str):
s = f"(label={self.label!r}, children=("
closings = 2
else:
s = ""

for child in self:
if isinstance(child, Tree):
s += "\n" + " " * (indent + 2) + \
child.pformat(indent=indent + 2)
s += "\n" + " " * (indent + 2) + child.pformat(indent=indent + 2)
elif isinstance(child, tuple):
s += "\n" + " " * (indent + 2) + "/".join(child)
elif isinstance(child, str):
s += "\n" + " " * (indent + 2) + f"{child}"
else:
s += "\n" + " " * (indent + 2) + repr(child)
return f"{s})"
return f"{s}" + (")" * closings)
2 changes: 1 addition & 1 deletion tests/test_lex.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

import unittest

from pygmars.lex import Lexer
from pygmars.lex import InvalidLexerMatcher
from pygmars.lex import Lexer


class TestLexer(unittest.TestCase):
Expand Down
6 changes: 3 additions & 3 deletions tests/test_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def test_can_use_label_patterns_with_quantifiers(self):
(label='NN-TL', value='Court')
(label='NN-TL', value='Judge')
(label='NP', value='Durwood')
(label='NP', value='Pye'))
(label='NP', value='Pye')))
(label='TO', value='to')
(label='VB', value='investigate')
(label='NNS', value='reports')
Expand All @@ -103,7 +103,7 @@ def test_can_use_label_patterns_with_quantifiers(self):
(label='NN-TL', value='Mayor-nominate')
(label='NP', value='Ivan')
(label='NP', value='Allen')
(label='NP', value='Jr.'))
(label='DOT', value='.'))"""
(label='NP', value='Jr.')))
(label='DOT', value='.')))"""

assert tree.pformat() == expected
Loading

0 comments on commit 66f26f9

Please sign in to comment.