Skip to content

Commit

Permalink
Fix #372 quote simplemrs preds with reserved chars
Browse files Browse the repository at this point in the history
  • Loading branch information
goodmami committed Dec 12, 2023
1 parent ec01639 commit 07e0aac
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 1 deletion.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
* Stop using Python's undocumented parse_template re function ([#378])
* Resolved newline and encoding issues for running tests on Windows ([#379])
* Capture legacy single-quoted predicates longer than 1 character ([#373])
* Quote SimpleMRS predicates with reserved characters ([#372])


## [v1.8.1]
Expand Down Expand Up @@ -1621,6 +1622,7 @@ information about changes, except for
[#360]: https://github.com/delph-in/pydelphin/issues/360
[#364]: https://github.com/delph-in/pydelphin/issues/364
[#367]: https://github.com/delph-in/pydelphin/issues/367
[#372]: https://github.com/delph-in/pydelphin/issues/372
[#373]: https://github.com/delph-in/pydelphin/issues/373
[#374]: https://github.com/delph-in/pydelphin/issues/374
[#375]: https://github.com/delph-in/pydelphin/issues/375
Expand Down
9 changes: 8 additions & 1 deletion delphin/codecs/simplemrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from pathlib import Path
from typing import Optional
import re

from delphin.util import Lexer
from delphin import predicate
Expand Down Expand Up @@ -361,7 +362,7 @@ def _encode_rels(rels, varprops, lnk, indent):
delim = ('\n ' + ' ' * len('RELS: < ')) if indent else ' '
tokens = []
for rel in rels:
pred = rel.predicate
pred = _encode_predicate(rel.predicate)
if lnk:
pred += str(rel.lnk)
reltoks = ['[', pred]
Expand All @@ -382,6 +383,12 @@ def _encode_rels(rels, varprops, lnk, indent):
return tokens


def _encode_predicate(predicate: str) -> str:
if re.search(r"[\s\"':<>[\]]", predicate):
return f'"{_escape(predicate)}"'
return predicate


def _encode_hcons(hcons):
tokens = ['{} {} {}'.format(hc.hi, hc.relation, hc.lo)
for hc in hcons]
Expand Down
21 changes: 21 additions & 0 deletions tests/codecs/simplemrs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,24 @@ def test_legacy_single_quote_predicates_issue_373():
# https://github.com/delph-in/pydelphin/issues/373
m = simplemrs.decode("[ RELS: < [ 'single+quoted LBL: h0 ] > ]")
assert m.rels[0].predicate == "single+quoted"


def test_quote_reserved_characters_issue_372():
# https://github.com/delph-in/pydelphin/issues/372

def assert_quoted(p: str, escape: bool = False):
m = simplemrs.decode(f'[ RELS: < [ "{p}"<1:2> LBL: h0 ] > ]')
_p = m.rels[0].predicate
assert (_p.replace('"', r'\"') if escape else _p) == p
s = simplemrs.encode(m)
assert f'"{p}"' in s
simplemrs.decode(s) # confirm it roundtrips without error

assert_quoted("a space")
assert_quoted("a:colon")
assert_quoted(r'double\"quotes', escape=True)
assert_quoted("single'quotes")
assert_quoted("left<angle")
assert_quoted("right>angle")
assert_quoted("left[bracket")
assert_quoted("right]bracket")

0 comments on commit 07e0aac

Please sign in to comment.