Skip to content

Commit c0254db

Browse files
authored
Merge pull request #693 from flairNLP/update-inews
Add parser `v1_1` - `iNews`
2 parents 7c5d78c + 29dc940 commit c0254db

File tree

4 files changed

+130
-0
lines changed

4 files changed

+130
-0
lines changed

src/fundus/publishers/uk/i_news.py

+16
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
class INewsParser(ParserProxy):
1818
class V1(BaseParser):
19+
VALID_UNTIL = datetime.date(2025, 1, 1)
1920
_summary_selector = CSSSelector("article > h2")
2021
_paragraph_selector = CSSSelector("article div.article-content p")
2122

@@ -53,3 +54,18 @@ def images(self) -> List[Image]:
5354
image_selector=CSSSelector("figure:has(> figcaption) img"),
5455
author_selector=re.compile(r"\((?P<credits>.*?)\)$"),
5556
)
57+
58+
class V1_1(V1):
59+
VALID_UNTIL = datetime.date.today()
60+
61+
_summary_selector = CSSSelector("article p.inews__post-excerpt")
62+
63+
@attribute
64+
def images(self) -> List[Image]:
65+
return image_extraction(
66+
doc=self.precomputed.doc,
67+
paragraph_selector=self._paragraph_selector,
68+
upper_boundary_selector=CSSSelector("div.article-wrapper"),
69+
image_selector=CSSSelector("figure:has(> figcaption) img"),
70+
author_selector=re.compile(r"\((?P<credits>.*?)\)$"),
71+
)

tests/resources/parser/test_data/uk/iNews.json

+110
Original file line numberDiff line numberDiff line change
@@ -138,5 +138,115 @@
138138
"Katie Boulter",
139139
"US Open"
140140
]
141+
},
142+
"V1_1": {
143+
"authors": [
144+
"Jacqui Housden"
145+
],
146+
"body": {
147+
"summary": [
148+
"Hollywood legend escorted off stage after protesters climb up holding sign saying 'over 1.5 degrees is a global shipwreck'"
149+
],
150+
"sections": [
151+
{
152+
"headline": [],
153+
"paragraphs": [
154+
"Two people have been arrested after Just Stop Oil protesters disrupted a West End performance of The Tempest starring Sigourney Weaver on Monday night.",
155+
"A 42-year-old woman and a 60-year-old man have been arrested, the Metropolitan Police said.",
156+
"A video posted on social media by the group showed two activists climbing on stage holding a sign that read “over 1.5 degrees is a global shipwreck”, a reference to the recent announcement that 2024 had been the warmest on record globally and the first full year when the average temperature exceeded 1.5°C above pre-industrial levels.",
157+
"The pair also launched a confetti cannon just after Weaver uttered the lines: “Upon thy wicked dam, come forth!”",
158+
"A voice is then heard saying: “We’ll have to stop the show ladies and gentlemen, sorry”.",
159+
"Hollywood legend Weaver, who had been sitting on a chair, was escorted off stage at the Theatre Royal in Drury Lane on Monday, while the two protesters faced boos and a few cheers from the audience.",
160+
"One of the protesters, Hayley Walsh, 42, a lecturer from Nottingham, said: “Years of writing to MPs, going on marches and teaching my students to be more sustainable hasn’t seen the urgent change needed.",
161+
"“I am scared for my children, I can’t sleepwalk them into a future of food shortages, life-threatening storms and wars for resources.",
162+
"“1.5 degrees is a global shipwreck we can’t ignore. Wildfires in California, deadly floods in Valencia and hundreds of thousands without power in the UK this weekend.",
163+
"“This isn’t a distant, future problem. We need a global treaty to stop fossil fuel burning and a global emergency response.”",
164+
"Fellow protester, mechanical engineer Richard Weir, 60, from Tynemouth, North Tyneside, said: “I started my career in the shipyards of Tyneside and I watched management inaction lead to the collapse of UK manufacturing.",
165+
"“Now I see similar failures of leadership as politicians refuse to take action to protect us and our loved ones.",
166+
"“We’re already seeing the damage this crisis is doing to crops, homes and entire neighbourhoods. Unless we come together and demand a move away from fossil fuels by 2030, we will go the same way as manufacturing in the UK.”",
167+
"Bafta-award winning actress Weaver plays the storm-creating magician Prospero in the new staging of the Shakespeare classic, in a role typically played by a man.",
168+
"The production opened in December and will run until February 1."
169+
]
170+
}
171+
]
172+
},
173+
"images": [
174+
{
175+
"versions": [
176+
{
177+
"url": "https://inews.co.uk/wp-content/uploads/2025/01/01JJMXM1T3Z99HXM3DJ5QB8YQF.jpg?crop=0px%2C33px%2C1198px%2C677px&resize=640%2C360",
178+
"query_width": null,
179+
"size": {
180+
"width": 16,
181+
"height": 9
182+
},
183+
"type": "image/jpeg"
184+
}
185+
],
186+
"is_cover": true,
187+
"description": "Article thumbnail image",
188+
"caption": "The protesters came on stage during Sigourney Weaver’s performance as Prospero in Shakespeare’s ‘The Tempest’",
189+
"authors": [
190+
"Just Stop Oil/PA Wire"
191+
],
192+
"position": 576
193+
},
194+
{
195+
"versions": [
196+
{
197+
"url": "https://inews.co.uk/wp-content/uploads/2025/01/01JJMTKRSAHE83RF1X0TJSKAPX.jpg?resize=300,175",
198+
"query_width": null,
199+
"size": {
200+
"width": 300,
201+
"height": 175
202+
},
203+
"type": "image/jpeg"
204+
},
205+
{
206+
"url": "https://inews.co.uk/wp-content/uploads/2025/01/01JJMTKRSAHE83RF1X0TJSKAPX.jpg?resize=380,222",
207+
"query_width": null,
208+
"size": {
209+
"width": 380,
210+
"height": 222
211+
},
212+
"type": "image/jpeg"
213+
},
214+
{
215+
"url": "https://inews.co.uk/wp-content/uploads/2025/01/01JJMTKRSAHE83RF1X0TJSKAPX.jpg?resize=760,444",
216+
"query_width": null,
217+
"size": {
218+
"width": 760,
219+
"height": 444
220+
},
221+
"type": "image/jpeg"
222+
},
223+
{
224+
"url": "https://inews.co.uk/wp-content/uploads/2025/01/01JJMTKRSAHE83RF1X0TJSKAPX.jpg",
225+
"query_width": null,
226+
"size": {
227+
"width": 1200,
228+
"height": 701
229+
},
230+
"type": "image/jpeg"
231+
}
232+
],
233+
"is_cover": false,
234+
"description": null,
235+
"caption": "One of the protesters said she wanted to see a global treaty to stop fossil fuel burning",
236+
"authors": [
237+
"Just Stop Oil/PA Wire"
238+
],
239+
"position": 650
240+
}
241+
],
242+
"publishing_date": "2025-01-28 11:30:00+00:00",
243+
"title": "Two arrested after Just Stop Oil protest at Sigourney Weaver West End play",
244+
"topics": [
245+
"Climate Change",
246+
"Global Warming",
247+
"Just Stop Oil",
248+
"Protests",
249+
"West End Theatre"
250+
]
141251
}
142252
}
Binary file not shown.

tests/resources/parser/test_data/uk/meta.info

+4
Original file line numberDiff line numberDiff line change
@@ -66,5 +66,9 @@
6666
"iNews_2023_08_30.html.gz": {
6767
"url": "https://inews.co.uk/sport/tennis/us-open-2023-british-players-results-andy-murray-katie-boulter-cam-norrie-evans-burrage-draper-2580837",
6868
"crawl_date": "2023-08-30 18:39:34.320432"
69+
},
70+
"iNews_2025_01_28.html.gz": {
71+
"url": "https://inews.co.uk/news/environment/just-stop-oil-activists-disrupt-west-end-play-as-sigourney-weaver-performs-3504901",
72+
"crawl_date": "2025-01-28 15:40:34.688064"
6973
}
7074
}

0 commit comments

Comments
 (0)