Skip to content

Commit d3f9cc0

Browse files
authored
Merge pull request #699 from flairNLP/add-pt
Add `PT`
2 parents 39cf171 + be797a3 commit d3f9cc0

File tree

7 files changed

+248
-0
lines changed

7 files changed

+248
-0
lines changed

docs/supported_publishers.md

+34
Original file line numberDiff line numberDiff line change
@@ -1683,6 +1683,40 @@
16831683
</table>
16841684

16851685

1686+
## PT-Publishers
1687+
1688+
<table class="publishers pt">
1689+
<thead>
1690+
<tr>
1691+
<th>Class&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</th>
1692+
<th>Name&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</th>
1693+
<th>URL&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</th>
1694+
<th>Missing&#160;Attributes</th>
1695+
<th>Additional&#160;Attributes&#160;&#160;&#160;&#160;</th>
1696+
</tr>
1697+
</thead>
1698+
<tbody>
1699+
<tr>
1700+
<td>
1701+
<code>ThePortugalNews</code>
1702+
</td>
1703+
<td>
1704+
<div>Nine News</div>
1705+
</td>
1706+
<td>
1707+
<a href="https://www.9news.com.au/">
1708+
<span>www.9news.com.au</span>
1709+
</a>
1710+
</td>
1711+
<td>
1712+
<code>topics</code>
1713+
</td>
1714+
<td>&#160;</td>
1715+
</tr>
1716+
</tbody>
1717+
</table>
1718+
1719+
16861720
## TR-Publishers
16871721

16881722
<table class="publishers tr">

src/fundus/publishers/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from fundus.publishers.na import NA
2121
from fundus.publishers.no import NO
2222
from fundus.publishers.pl import PL
23+
from fundus.publishers.pt import PT
2324
from fundus.publishers.tr import TR
2425
from fundus.publishers.tw import TW
2526
from fundus.publishers.tz import TZ
@@ -71,6 +72,7 @@ class PublisherCollection(metaclass=PublisherCollectionMeta):
7172
be = BE
7273
tr = TR
7374
my = MY
75+
pt = PT
7476
pl = PL
7577
ind = IND
7678
no = NO

src/fundus/publishers/pt/__init__.py

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from fundus.publishers.base_objects import Publisher, PublisherGroup
2+
from fundus.scraping.filter import regex_filter
3+
from fundus.scraping.url import Sitemap
4+
5+
from .the_portugal_news import ThePortugalNewsParser
6+
7+
8+
class PT(metaclass=PublisherGroup):
9+
ThePortugalNews = Publisher(
10+
name="Nine News",
11+
domain="https://www.9news.com.au/",
12+
parser=ThePortugalNewsParser,
13+
sources=[
14+
Sitemap("https://www.theportugalnews.com/sitemap.xml", sitemap_filter=regex_filter("category-pages")),
15+
],
16+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import datetime
2+
import re
3+
from typing import List, Optional
4+
5+
from lxml.etree import XPath
6+
7+
from fundus.parser import ArticleBody, BaseParser, Image, ParserProxy, attribute
8+
from fundus.parser.utility import (
9+
extract_article_body_with_selector,
10+
generic_author_parsing,
11+
generic_date_parsing,
12+
image_extraction,
13+
)
14+
15+
16+
class ThePortugalNewsParser(ParserProxy):
17+
class V1(BaseParser):
18+
_paragraph_selector = XPath("//div[@class='article-body']//p[string-length(text())>1]")
19+
_subheadline_selector = XPath("//div[@class='article-body']/p/b[not(u)]")
20+
_summary_selector = XPath("//div[@class='fs-4 font-semibold mb-3']")
21+
22+
_author_selector = XPath("//div[@class='col-lg-10 order-lg-1']/p//text()")
23+
24+
@attribute
25+
def body(self) -> Optional[ArticleBody]:
26+
return extract_article_body_with_selector(
27+
self.precomputed.doc,
28+
paragraph_selector=self._paragraph_selector,
29+
subheadline_selector=self._subheadline_selector,
30+
summary_selector=self._summary_selector,
31+
)
32+
33+
@attribute
34+
def publishing_date(self) -> Optional[datetime.datetime]:
35+
return generic_date_parsing(self.precomputed.ld.bf_search("datePublished"))
36+
37+
@attribute
38+
def title(self) -> Optional[str]:
39+
return self.precomputed.ld.bf_search("headline")
40+
41+
@attribute
42+
def authors(self) -> List[str]:
43+
author_objects = self._author_selector(self.precomputed.doc)
44+
if author_objects and (author := re.search(r"(?i)by\s*(?P<authors>.*),[\r\sr\n]*in", author_objects[0])):
45+
return generic_author_parsing(author.group("authors"))
46+
return []
47+
48+
@attribute
49+
def images(self) -> List[Image]:
50+
return image_extraction(
51+
doc=self.precomputed.doc,
52+
paragraph_selector=self._paragraph_selector,
53+
author_selector=re.compile(r"(?i)credits:\s*(?P<credits>.*)"),
54+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
{
2+
"V1": {
3+
"authors": [
4+
"TPN"
5+
],
6+
"body": {
7+
"summary": [
8+
"In 2024, Athena Advisers recorded a 40% increase in demand for properties in the French Alps from various nationalities based in Portugal, namely Americans, British, French and Portuguese."
9+
],
10+
"sections": [
11+
{
12+
"headline": [],
13+
"paragraphs": [
14+
"While Serra da Estrela offers stunning landscapes and a light skiing experience, it is far from being sufficient for the level of demand of the growing community of skiers that is settling in Portugal, particularly in Lisbon. Which is why renowned ski destinations such as the French Alps, just a short flight away, are emerging as interesting options for property investors based in Portugal.",
15+
"In 2024, Athena Advisers - a real estate consultancy specializing in the French Alps market since its foundation over 20 years ago - recorded a 40% growth in demand for properties in the French Alps by the population residing in Portugal, considering a group of nationalities that includes mainly Americans, British, French and Portuguese. According to data from the European Central Bank, the net worth of Portuguese families reached 855 billion euros in 2023, an increase of 6% compared to 2022 and 28% compared to 2019, which explains why the Portuguese with greater financial capacity are creating new investment trends.",
16+
"“Lisbon’s growing prosperity is redefining the map of real estate investment in Europe,” comments David Moura-George, Managing Director of Athena Advisers Portugal. \"The sun and the snow may seem like different worlds, but alpine life and surfing, for example, have a lot in common: the connection with nature, the thrill of adventure, the desire to create memories, the passion for gastronomy, among others. This trend reflects, on the one hand, the aspiration to a certain lifestyle and, on the other, strategic financial planning, reinforcing Portugal's role as a central player in the European investment landscape\", highlights David Moura-George.",
17+
"To deepen the growing connection between Portugal and European real estate markets, such as the French Alps, Athena Advisers organizes an annual presentation for real estate investors based in Portugal and this year's edition takes place on January 30th, in Lisbon."
18+
]
19+
},
20+
{
21+
"headline": [
22+
"American skiers in Portugal seek European resort prices"
23+
],
24+
"paragraphs": [
25+
"In recent years, Portugal has stood out as a preferred destination for the wave of North American emigration to Europe, whose influx registered an increase of 239% between 2017 and 2022, resulting in a population of approximately 10,000 residents, according to data from Global Citizen Solutions. Although Lisbon's sunny charm is a strong draw, many of these expats want to continue skiing and French resorts offer this possibility at a much more competitive price.",
26+
"\"A day ski pass at major North American resorts can cost more than $300, while in France, even at major resorts, a day pass rarely exceeds 70 euros,\" adds Moura-George. \"The quality and diversity of European gastronomy is also a relevant factor\", he emphasizes.",
27+
"Property prices in the Alpine regions, whether in France or North America, often exceed 40,000 euros per square meter, but the greater diversity of ski resorts and regions means that the French offer is much broader when it comes to investment opportunities. “Each resort is a micro-market, where prices can vary greatly from street to street,” adds Moura-George. \"We have helped people invest up to €60 million in a chalet in Val d'Isère and this price level is quite common across the French Alps. However, there is also great value and strong investment potential in other resorts where Prices for new, well-located properties in highly regarded villages can start at around €7,000 per square meter.\""
28+
]
29+
},
30+
{
31+
"headline": [
32+
"Athena Advisers sales in the French Alps grow by 110%"
33+
],
34+
"paragraphs": [
35+
"In 2024, Athena Advisers recorded a sales volume of over 140 million euros in the French Alps, which represents a growth of 110% compared to 2023.",
36+
"Globally, winter tourism generates approximately 180 billion euros per year, and in France alone, this industry was valued at 71 billion euros in 2024, and agreement with the French Ministry of Tourism.",
37+
"The number of transactions also increased, growing 30% compared to the previous year. This increase in transaction volume was accompanied by an increase in the average property value, reflecting a greater demand for luxury chalets and apartments in the region.",
38+
"The average price per unit rose from approximately €2.2 million in 2023 to €3.6 million in 2024, a substantial growth that demonstrates a trend towards higher value investments in the French Alps, further consolidating the region's positioning as a hub for premium properties.",
39+
"Athena Advisers currently has properties for sale in the French Alps priced from €500,000 to over €30 million."
40+
]
41+
}
42+
]
43+
},
44+
"images": [
45+
{
46+
"versions": [
47+
{
48+
"url": "https://d1mnxluw9mpf9w.cloudfront.net/media/1738253380/16x9/1200.jpg?format=webp&width=490&height=275",
49+
"query_width": "min-width:992",
50+
"size": {
51+
"width": 490,
52+
"height": 275
53+
},
54+
"type": "image/jpeg"
55+
},
56+
{
57+
"url": "https://d1mnxluw9mpf9w.cloudfront.net/media/1738253380/4x3/1200.jpg?format=webp&width=520&height=390",
58+
"query_width": null,
59+
"size": {
60+
"width": 520,
61+
"height": 390
62+
},
63+
"type": "image/jpeg"
64+
},
65+
{
66+
"url": "https://d1mnxluw9mpf9w.cloudfront.net/media/1738253380/16x9/1200.jpg?format=webp&width=667&height=375",
67+
"query_width": "min-width:1200",
68+
"size": {
69+
"width": 667,
70+
"height": 375
71+
},
72+
"type": "image/jpeg"
73+
},
74+
{
75+
"url": "https://d1mnxluw9mpf9w.cloudfront.net/media/1738253380/16x9/1200.jpg?format=webp&width=780&height=438",
76+
"query_width": "min-width:1400",
77+
"size": {
78+
"width": 780,
79+
"height": 438
80+
},
81+
"type": "image/jpeg"
82+
}
83+
],
84+
"is_cover": true,
85+
"description": null,
86+
"caption": null,
87+
"authors": [
88+
"Supplied Image"
89+
],
90+
"position": 399
91+
},
92+
{
93+
"versions": [
94+
{
95+
"url": "https://d1mnxluw9mpf9w.cloudfront.net/media/1738253400/Val-d--039-Isere---Le-Fornet---Les-Jardins-de-Juliette--1-.jpg",
96+
"query_width": null,
97+
"size": {
98+
"width": 800,
99+
"height": 0
100+
},
101+
"type": "image/jpeg"
102+
}
103+
],
104+
"is_cover": false,
105+
"description": null,
106+
"caption": null,
107+
"authors": [
108+
"Supplied Image"
109+
],
110+
"position": 411
111+
},
112+
{
113+
"versions": [
114+
{
115+
"url": "https://d1mnxluw9mpf9w.cloudfront.net/media/1738253416/Les-Deux-Alpes---Le-Telemark.jpg",
116+
"query_width": null,
117+
"size": {
118+
"width": 800,
119+
"height": 0
120+
},
121+
"type": "image/jpeg"
122+
}
123+
],
124+
"is_cover": false,
125+
"description": null,
126+
"caption": null,
127+
"authors": [
128+
"Supplied Image"
129+
],
130+
"position": 427
131+
}
132+
],
133+
"publishing_date": "2025-01-31 20:02:00+00:00",
134+
"title": "Portugal emerges as a French Alps real estate investment"
135+
}
136+
}
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"ThePortugalNews_2025_02_03.html.gz": {
3+
"url": "https://www.theportugalnews.com/news/2025-01-31/portugal-emerges-as-a-french-alps-real-estate-investment/95254",
4+
"crawl_date": "2025-02-03 01:18:28.617595"
5+
}
6+
}

0 commit comments

Comments
 (0)