Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parser v1_1 - iNews #693

Merged
merged 1 commit into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions src/fundus/publishers/uk/i_news.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

class INewsParser(ParserProxy):
class V1(BaseParser):
VALID_UNTIL = datetime.date(2025, 1, 1)
_summary_selector = CSSSelector("article > h2")
_paragraph_selector = CSSSelector("article div.article-content p")

Expand Down Expand Up @@ -53,3 +54,18 @@ def images(self) -> List[Image]:
image_selector=CSSSelector("figure:has(> figcaption) img"),
author_selector=re.compile(r"\((?P<credits>.*?)\)$"),
)

class V1_1(V1):
VALID_UNTIL = datetime.date.today()

_summary_selector = CSSSelector("article p.inews__post-excerpt")

@attribute
def images(self) -> List[Image]:
return image_extraction(
doc=self.precomputed.doc,
paragraph_selector=self._paragraph_selector,
upper_boundary_selector=CSSSelector("div.article-wrapper"),
image_selector=CSSSelector("figure:has(> figcaption) img"),
author_selector=re.compile(r"\((?P<credits>.*?)\)$"),
)
110 changes: 110 additions & 0 deletions tests/resources/parser/test_data/uk/iNews.json
Original file line number Diff line number Diff line change
Expand Up @@ -138,5 +138,115 @@
"Katie Boulter",
"US Open"
]
},
"V1_1": {
"authors": [
"Jacqui Housden"
],
"body": {
"summary": [
"Hollywood legend escorted off stage after protesters climb up holding sign saying 'over 1.5 degrees is a global shipwreck'"
],
"sections": [
{
"headline": [],
"paragraphs": [
"Two people have been arrested after Just Stop Oil protesters disrupted a West End performance of The Tempest starring Sigourney Weaver on Monday night.",
"A 42-year-old woman and a 60-year-old man have been arrested, the Metropolitan Police said.",
"A video posted on social media by the group showed two activists climbing on stage holding a sign that read “over 1.5 degrees is a global shipwreck”, a reference to the recent announcement that 2024 had been the warmest on record globally and the first full year when the average temperature exceeded 1.5°C above pre-industrial levels.",
"The pair also launched a confetti cannon just after Weaver uttered the lines: “Upon thy wicked dam, come forth!”",
"A voice is then heard saying: “We’ll have to stop the show ladies and gentlemen, sorry”.",
"Hollywood legend Weaver, who had been sitting on a chair, was escorted off stage at the Theatre Royal in Drury Lane on Monday, while the two protesters faced boos and a few cheers from the audience.",
"One of the protesters, Hayley Walsh, 42, a lecturer from Nottingham, said: “Years of writing to MPs, going on marches and teaching my students to be more sustainable hasn’t seen the urgent change needed.",
"“I am scared for my children, I can’t sleepwalk them into a future of food shortages, life-threatening storms and wars for resources.",
"“1.5 degrees is a global shipwreck we can’t ignore. Wildfires in California, deadly floods in Valencia and hundreds of thousands without power in the UK this weekend.",
"“This isn’t a distant, future problem. We need a global treaty to stop fossil fuel burning and a global emergency response.”",
"Fellow protester, mechanical engineer Richard Weir, 60, from Tynemouth, North Tyneside, said: “I started my career in the shipyards of Tyneside and I watched management inaction lead to the collapse of UK manufacturing.",
"“Now I see similar failures of leadership as politicians refuse to take action to protect us and our loved ones.",
"“We’re already seeing the damage this crisis is doing to crops, homes and entire neighbourhoods. Unless we come together and demand a move away from fossil fuels by 2030, we will go the same way as manufacturing in the UK.”",
"Bafta-award winning actress Weaver plays the storm-creating magician Prospero in the new staging of the Shakespeare classic, in a role typically played by a man.",
"The production opened in December and will run until February 1."
]
}
]
},
"images": [
{
"versions": [
{
"url": "https://inews.co.uk/wp-content/uploads/2025/01/01JJMXM1T3Z99HXM3DJ5QB8YQF.jpg?crop=0px%2C33px%2C1198px%2C677px&resize=640%2C360",
"query_width": null,
"size": {
"width": 16,
"height": 9
},
"type": "image/jpeg"
}
],
"is_cover": true,
"description": "Article thumbnail image",
"caption": "The protesters came on stage during Sigourney Weaver’s performance as Prospero in Shakespeare’s ‘The Tempest’",
"authors": [
"Just Stop Oil/PA Wire"
],
"position": 576
},
{
"versions": [
{
"url": "https://inews.co.uk/wp-content/uploads/2025/01/01JJMTKRSAHE83RF1X0TJSKAPX.jpg?resize=300,175",
"query_width": null,
"size": {
"width": 300,
"height": 175
},
"type": "image/jpeg"
},
{
"url": "https://inews.co.uk/wp-content/uploads/2025/01/01JJMTKRSAHE83RF1X0TJSKAPX.jpg?resize=380,222",
"query_width": null,
"size": {
"width": 380,
"height": 222
},
"type": "image/jpeg"
},
{
"url": "https://inews.co.uk/wp-content/uploads/2025/01/01JJMTKRSAHE83RF1X0TJSKAPX.jpg?resize=760,444",
"query_width": null,
"size": {
"width": 760,
"height": 444
},
"type": "image/jpeg"
},
{
"url": "https://inews.co.uk/wp-content/uploads/2025/01/01JJMTKRSAHE83RF1X0TJSKAPX.jpg",
"query_width": null,
"size": {
"width": 1200,
"height": 701
},
"type": "image/jpeg"
}
],
"is_cover": false,
"description": null,
"caption": "One of the protesters said she wanted to see a global treaty to stop fossil fuel burning",
"authors": [
"Just Stop Oil/PA Wire"
],
"position": 650
}
],
"publishing_date": "2025-01-28 11:30:00+00:00",
"title": "Two arrested after Just Stop Oil protest at Sigourney Weaver West End play",
"topics": [
"Climate Change",
"Global Warming",
"Just Stop Oil",
"Protests",
"West End Theatre"
]
}
}
Binary file not shown.
4 changes: 4 additions & 0 deletions tests/resources/parser/test_data/uk/meta.info
Original file line number Diff line number Diff line change
Expand Up @@ -66,5 +66,9 @@
"iNews_2023_08_30.html.gz": {
"url": "https://inews.co.uk/sport/tennis/us-open-2023-british-players-results-andy-murray-katie-boulter-cam-norrie-evans-burrage-draper-2580837",
"crawl_date": "2023-08-30 18:39:34.320432"
},
"iNews_2025_01_28.html.gz": {
"url": "https://inews.co.uk/news/environment/just-stop-oil-activists-disrupt-west-end-play-as-sigourney-weaver-performs-3504901",
"crawl_date": "2025-01-28 15:40:34.688064"
}
}