Skip to content

Commit a378007

Browse files
committed
add images to TaipeiTimes
1 parent 86cdcf4 commit a378007

File tree

2 files changed

+32
-1
lines changed

2 files changed

+32
-1
lines changed

src/fundus/publishers/tw/TaipeiTimes.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@
44

55
from lxml.etree import XPath
66

7-
from fundus.parser import ArticleBody, BaseParser, ParserProxy, attribute
7+
from fundus.parser import ArticleBody, BaseParser, Image, ParserProxy, attribute
88
from fundus.parser.utility import (
99
extract_article_body_with_selector,
1010
generic_author_parsing,
1111
generic_date_parsing,
12+
image_extraction,
1213
)
1314

1415

@@ -47,3 +48,14 @@ def publishing_date(self) -> Optional[datetime.datetime]:
4748
@attribute
4849
def title(self) -> Optional[str]:
4950
return self.precomputed.ld.bf_search("headline")
51+
52+
@attribute
53+
def images(self) -> List[Image]:
54+
return image_extraction(
55+
doc=self.precomputed.doc,
56+
paragraph_selector=self._paragraph_selector,
57+
upper_boundary_selector=XPath("//div[@class='archives']"),
58+
image_selector=XPath("//div[@class='imgboxa']//img"),
59+
caption_selector=XPath("./ancestor::div[@class='imgboxa']//h1"),
60+
author_selector=XPath("./ancestor::div[@class='imgboxa']//p"),
61+
)

tests/resources/parser/test_data/tw/TaipeiTimes.json

+19
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,25 @@
3333
}
3434
]
3535
},
36+
"images": [
37+
{
38+
"versions": [
39+
{
40+
"url": "https://www.taipeitimes.com/images/2024/10/16/P01-241016-317.jpg",
41+
"query_width": null,
42+
"size": null,
43+
"type": "image/jpeg"
44+
}
45+
],
46+
"is_cover": false,
47+
"description": null,
48+
"caption": "Philippine Marine Corps Commandant Major-General Arturo Roja, second left, US Marines exercise representative Colonel Stuart Glenn, left, and Philippine Marine Corps and exercise director Brigadier General Vicente Blanco, second right, take part in the opening ceremony of the annual Kamandag joint military exercises at the Philippine Marines officers’ club at Fort Bonifacio, Metro Manila, yesterday.",
49+
"authors": [
50+
"AFP"
51+
],
52+
"position": 170
53+
}
54+
],
3655
"publishing_date": "2024-10-16 00:00:00+08:00",
3756
"title": "US, Manila launch war games after PRC’s Taiwan drills"
3857
}

0 commit comments

Comments
 (0)