Skip to content

Commit

Permalink
[htm8] Fix AttrLexer bug with Missing value
Browse files Browse the repository at this point in the history
The state wasn't reset between iterations.  Gah.

Stateful APIs need extra care.  I could probably combine ReadName() and
ReadValue(), although we would then return a 6-tuple!

And the function would be long.
  • Loading branch information
Andy C committed Jan 17, 2025
1 parent 41a63a9 commit c668460
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 4 deletions.
7 changes: 4 additions & 3 deletions data_lang/htm8.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,12 +591,13 @@ def ReadName(self):
self.next_value_is_missing = True
# HACK: REWIND, since we don't want to consume whitespace
self.pos = self.name_end
else:
self.next_value_is_missing = False
return attr_name.Ok, self.name_start, self.name_end
else:
# Reset state - e.g. you must call AttrNameEquals
self.name_start = -1
self.name_end = -1
self.next_value_is_missing = False

if a == attr_name.Done:
return attr_name.Done, -1, -1
Expand Down Expand Up @@ -746,8 +747,8 @@ def AllAttrsRaw(attr_lx):
while True:
n, name_start, name_end = attr_lx.ReadName()
if 0:
log(' AllAttrsRaw ==> ReadName %s %d %d', attr_name_str(n),
name_start, name_end)
log(' AllAttrsRaw ==> ReadName %s %d %d %r', attr_name_str(n),
name_start, name_end, attr_lx.s[attr_lx.pos:attr_lx.pos + 10])
if n == attr_name.Ok:
name = attr_lx.s[name_start:name_end]
#log(' Name %r', name)
Expand Down
3 changes: 2 additions & 1 deletion data_lang/htm8_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,8 @@ def ValidTokenList(s, no_special_tags=False):
if tok_id == h8_id.EndOfStream:
break
if tok_id == h8_id.Invalid:
raise htm8.LexError(s, start_pos)
raise htm8.LexError('ValidTokenList() got invalid token', s,
start_pos)
start_pos = end_pos
return tokens

Expand Down
4 changes: 4 additions & 0 deletions lazylex/html_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,10 @@ def testValid(self):
('<STYLE><</STYLE>', ''),
#'<SCRipt><</script>',

# Regression test from blog
('<script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>',
'')

# Note: Python HTMLParser.py does DYNAMIC compilation of regex with re.I
# flag to handle this! Gah I want something faster.
#'<script><</SCRIPT>',
Expand Down

0 comments on commit c668460

Please sign in to comment.