Skip to content

Commit a0c7a17

Browse files
committed
Fix an edge case with br attribute sanitization
Since we're depending on lxml's cleaner this isn't a security issue, just unexpected. Thanks Sean Gilligan for the report!
1 parent b84099f commit a0c7a17

File tree

3 files changed

+17
-2
lines changed

3 files changed

+17
-2
lines changed

CHANGELOG.rst

+3
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ Change log
55
Next version
66
============
77

8+
- Fixed an edge case where ``br`` tag attributes weren't removed if the br tag
9+
appears first.
10+
811

912
2.3 (2024-02-07)
1013
================

html_sanitizer/sanitizer.py

-1
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,6 @@ def sanitize(self, html):
337337
)
338338
):
339339
nx.drop_tag()
340-
continue
341340

342341
if not element.text:
343342
# No text before first child and first child is a <br>: Drop it

html_sanitizer/tests.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ def test_01_sanitize(self):
7373
("<a> </a>", "<a> </a>"),
7474
# ...but breaks without any additional content are still removed
7575
("<a><br /> </a>", "<a> </a>"),
76+
("<p>blab<br hello='world' />blub<p>", "<p>blab<br>blub</p>"),
7677
]
7778

7879
self.run_tests(entries)
@@ -104,7 +105,9 @@ def test_03_merge(self):
104105
self.run_tests(entries)
105106

106107
def test_no_space_between_same_tags(self):
107-
entries = [("<strong>Hel</strong><strong>lo</strong>", "<strong>Hello</strong>")]
108+
entries = [
109+
("<strong>Hel</strong><strong>lo</strong>", "<strong>Hello</strong>")
110+
]
108111
self.run_tests(entries)
109112

110113
def test_04_p_in_li(self):
@@ -642,3 +645,13 @@ def test_code_whitespace(self):
642645
"""
643646

644647
self.run_tests([(html, html)], sanitizer=sanitizer)
648+
649+
def test_br_attribute_sanitization(self):
650+
"""Attributes which aren't allowlisted are removed from br tags"""
651+
self.run_tests(
652+
[
653+
("<p><br hello=\"alert('world');\"/><br></p>", ""),
654+
('<p hello="world"></p>', ""),
655+
("<br hello=\"alert('world');\"/><br>", "<br>"),
656+
]
657+
)

0 commit comments

Comments
 (0)