Skip to content

Commit 61d2c9a

Browse files
authored
Merge pull request #346 from flairNLP/bug-fixes
Two major bug fixes
2 parents d86d52c + 9db290e commit 61d2c9a

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

src/fundus/scraping/pipeline.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ async def crawl_async(
6565
self,
6666
max_articles: Optional[int] = None,
6767
error_handling: Literal["suppress", "catch", "raise"] = "suppress",
68-
only_complete: Union[bool, ExtractionFilter] = Requires("title, body", "publishing_date"),
68+
only_complete: Union[bool, ExtractionFilter] = Requires("title", "body", "publishing_date"),
6969
delay: Optional[Union[float, Delay]] = None,
7070
url_filter: Optional[URLFilter] = None,
7171
only_unique: bool = True,
@@ -232,7 +232,7 @@ def __init__(
232232
233233
Args:
234234
*publishers (Union[PublisherEnum, Type[PublisherEnum]]): The publishers to crawl.
235-
restrict_sources_to (Optional[List[Type[URLSource]]]): Let's you restrict
235+
restrict_sources_to (Optional[List[Type[URLSource]]]): Lets you restrict
236236
sources defined in the publisher specs. If set, only articles from given source types
237237
will be yielded.
238238
"""

src/fundus/scraping/scraper.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ async def scrape(
6262
continue
6363
elif error_handling == "suppress":
6464
basic_logger.info(f"Skipped article at '{html.requested_url}' because of: {err!r}")
65-
continue
65+
yield None
6666
else:
6767
raise ValueError(f"Unknown value '{error_handling}' for parameter <error_handling>'")
6868

0 commit comments

Comments
 (0)