Commit 2659bee 1 parent e76a236 commit 2659bee Copy full SHA for 2659bee
File tree 1 file changed +14
-3
lines changed
1 file changed +14
-3
lines changed Original file line number Diff line number Diff line change 5
5
from fundus .logging import basic_logger
6
6
from fundus .parser import ParserProxy
7
7
from fundus .scraping .article import Article
8
- from fundus .scraping .filter import ExtractionFilter , Requires , URLFilter
8
+ from fundus .scraping .filter import (
9
+ ExtractionFilter ,
10
+ FilterResultWithMissingAttributes ,
11
+ Requires ,
12
+ URLFilter ,
13
+ )
9
14
from fundus .scraping .html import FundusSource
10
15
11
16
@@ -66,8 +71,14 @@ async def scrape(
66
71
else :
67
72
raise ValueError (f"Unknown value '{ error_handling } ' for parameter <error_handling>'" )
68
73
69
- if extraction_filter and extraction_filter (extraction ):
70
- basic_logger .debug (f"Skipped article at '{ html .requested_url } ' because of extraction filter" )
74
+ if extraction_filter and (filter_result := extraction_filter (extraction )):
75
+ if isinstance (filter_result , FilterResultWithMissingAttributes ):
76
+ basic_logger .debug (
77
+ f"Skipped article at '{ html .requested_url } ' because attribute(s) "
78
+ f"{ ', ' .join (filter_result .missing_attributes )!r} is(are) missing"
79
+ )
80
+ else :
81
+ basic_logger .debug (f"Skipped article at '{ html .requested_url } ' because of extraction filter" )
71
82
yield None
72
83
else :
73
84
article = Article .from_extracted (html = html , extracted = extraction )
You can’t perform that action at this time.
0 commit comments