Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add utility to retrieve test articles #355

Merged
merged 1 commit into from
Feb 18, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion tests/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,38 @@
import json
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Callable, Dict, Generic, Optional, Type, TypeVar
from typing import Any, Callable, Dict, Generic, List, Optional, Type, TypeVar

from typing_extensions import Self

from fundus import PublisherCollection
from fundus.parser import BaseParser
from fundus.publishers.base_objects import PublisherEnum
from fundus.scraping.article import Article
from fundus.scraping.html import HTML, HTMLSource
from scripts.generate_tables import supported_publishers_markdown_path
from tests.resources.parser.test_data import __module_path__ as test_resource_path

_T = TypeVar("_T")


def get_test_articles(publisher: PublisherEnum) -> List[Article]:
articles = []
html_mapping = load_html_test_file_mapping(publisher)
for html_test_file in html_mapping.values():
extraction = publisher.parser(html_test_file.crawl_date).parse(html_test_file.content)
html = HTML(
content=html_test_file.content,
crawl_date=html_test_file.crawl_date,
requested_url=html_test_file.url,
responded_url=html_test_file.url,
source=HTMLSource(publisher.publisher_name),
)
article = Article.from_extracted(extracted=extraction, html=html)
articles.append(article)
return articles


@dataclass
class JSONFile(Generic[_T]):
"""Generic file class representing a JSON file.
Expand Down
Loading