Skip to content

Commit a9c44e2

Browse files
committed
add regex to paragraph selector
1 parent 40fd1a8 commit a9c44e2

File tree

1 file changed

+1
-1
lines changed
  • src/fundus/publishers/de

1 file changed

+1
-1
lines changed

src/fundus/publishers/de/dw.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class V2(BaseParser):
2323
# https://regex101.com/r/uZLwyb/1
2424
_author_regex = r"^([a-z]{2,3}\/|[A-Z]{2,3}\/)*([a-z]{2,3}|[A-Z]{2,3})\s\(([a-z]{2,3}, )*([a-z]{2,3})\)$"
2525
_paragraph_selector = XPath(
26-
f"//div[contains(@class, 'rich-text')] /p[not(em) or text() and not(re:test(text(), '{_author_regex}'))]",
26+
f"//div[contains(@class, 'rich-text')] /p[text() and not(re:test(text(), '{_author_regex}'))]",
2727
namespaces={"re": "http://exslt.org/regular-expressions"},
2828
)
2929
_summary_selector = CSSSelector("header > p")

0 commit comments

Comments
 (0)