Skip to content

Commit 9f8cf6a

Browse files
committed
add -u parameter to scripts.generate_parser_test_files to specify URL(s) for generated test cases
1 parent 69c9984 commit 9f8cf6a

File tree

1 file changed

+31
-9
lines changed

1 file changed

+31
-9
lines changed

scripts/generate_parser_test_files.py

+31-9
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,24 @@
55

66
from tqdm import tqdm
77

8-
from fundus import Crawler, PublisherCollection
8+
from fundus import BaseCrawler, Crawler, PublisherCollection
99
from fundus.logging import basic_logger
1010
from fundus.publishers.base_objects import PublisherEnum
1111
from fundus.scraping.article import Article
12+
from fundus.scraping.html import FundusSource
13+
from fundus.scraping.scraper import Scraper
1214
from tests.test_parser import attributes_required_to_cover
1315
from tests.utility import HTMLTestFile, get_test_case_json, load_html_test_file_mapping
1416

1517

16-
def get_test_article(enum: PublisherEnum) -> Optional[Article]:
17-
crawler = Crawler(enum)
18+
def get_test_article(enum: PublisherEnum, url: Optional[str] = None) -> Optional[Article]:
19+
crawler: BaseCrawler
20+
if url is None:
21+
crawler = Crawler(enum)
22+
else:
23+
source = FundusSource([url], publisher=enum.publisher_name)
24+
scraper = Scraper(source, parser=enum.parser)
25+
crawler = BaseCrawler(scraper)
1826
return next(crawler.crawl(max_articles=1, error_handling="suppress", only_complete=True), None)
1927

2028

@@ -31,9 +39,17 @@ def get_test_article(enum: PublisherEnum) -> Optional[Article]:
3139
"attributes",
3240
metavar="A",
3341
nargs="*",
34-
help=f"the attributes which should be used to create test cases. default: {', '.join(attributes_required_to_cover)}",
42+
help=f"the attributes which should be used to create test cases. "
43+
f"default: {', '.join(attributes_required_to_cover)}",
3544
)
3645
parser.add_argument("-p", dest="publishers", metavar="P", nargs="+", help="only consider given publishers")
46+
parser.add_argument(
47+
"-u",
48+
dest="urls",
49+
metavar="U",
50+
nargs="+",
51+
help="use given URL instead of searching for an article. if set the urls will be mapped to the order of -p",
52+
)
3753
group = parser.add_mutually_exclusive_group()
3854
group.add_argument(
3955
"-o",
@@ -50,19 +66,25 @@ def get_test_article(enum: PublisherEnum) -> Optional[Article]:
5066

5167
args = parser.parse_args()
5268

69+
if args.urls is not None:
70+
if args.publishers is None:
71+
parser.error("-u requires -p. you can only specify URLs when also specifying publishers.")
72+
if len(args.urls) != len(args.publishers):
73+
parser.error("-u and -p do not have the same argument length")
74+
5375
# sort args.attributes for consistency
5476
args.attributes = list(sorted(args.attributes)) or attributes_required_to_cover
5577

5678
basic_logger.setLevel(WARN)
5779

5880
publishers: List[PublisherEnum] = (
59-
list(PublisherCollection)
60-
if args.publishers is None
61-
else [pub for pub in PublisherCollection if pub.name in args.publishers]
81+
list(PublisherCollection) if args.publishers is None else [PublisherCollection[pub] for pub in args.publishers]
6282
)
6383

84+
urls = args.urls if args.urls is not None else [None] * len(publishers)
85+
6486
with tqdm(total=len(publishers)) as bar:
65-
for publisher in publishers:
87+
for url, publisher in zip(urls, publishers):
6688
bar.set_description(desc=publisher.name, refresh=True)
6789

6890
# load json
@@ -73,7 +95,7 @@ def get_test_article(enum: PublisherEnum) -> Optional[Article]:
7395
html_mapping = load_html_test_file_mapping(publisher) if not args.overwrite else {}
7496

7597
if args.overwrite or not html_mapping.get(publisher.parser.latest_version):
76-
if not (article := get_test_article(publisher)):
98+
if not (article := get_test_article(publisher, url)):
7799
basic_logger.warning(f"Couldn't get article for {publisher.name}. Skipping")
78100
continue
79101
html = HTMLTestFile(

0 commit comments

Comments
 (0)