3
3
from dateutil .rrule import MONTHLY , rrule
4
4
5
5
from fundus .publishers .base_objects import Publisher , PublisherGroup
6
+ from fundus .publishers .it .corriere_della_sera import CorriereDellaSeraParser
7
+ from fundus .publishers .it .il_giornale import IlGiornaleParser
6
8
from fundus .publishers .it .la_repubblica import LaRepubblicaParser
7
9
from fundus .scraping .url import RSSFeed , Sitemap
8
10
@@ -22,3 +24,90 @@ class IT(metaclass=PublisherGroup):
22
24
)
23
25
],
24
26
)
27
+
28
+ CorriereDellaSera = Publisher (
29
+ name = "Corriere Della Sera" ,
30
+ domain = "https://www.corriere.it" ,
31
+ parser = CorriereDellaSeraParser ,
32
+ sources = [
33
+ # Main RSS feeds
34
+ RSSFeed ("https://www.corriere.it/feed-hp/homepage.xml" ),
35
+ RSSFeed ("https://www.corriere.it/rss/ultimora.xml" ),
36
+ RSSFeed ("https://www.corriere.it/dynamic-feed/rss/section/Dataroom.xml" ),
37
+ RSSFeed ("https://www.corriere.it/dynamic-feed/rss/section/lettere-al-direttore.xml" ),
38
+ RSSFeed ("https://www.corriere.it/dynamic-feed/rss/section/lo-dico-al-corriere.xml" ),
39
+ RSSFeed ("https://www.corriere.it/dynamic-feed/rss/section/frammenti-di-ferruccio-de-bortoli.xml" ),
40
+ # Main sitemaps
41
+ Sitemap ("https://www.corriere.it/rss/sitemap_v2.xml" ),
42
+ Sitemap ("https://www.corriere.it/salute/sitemap-dizionario-corriere-salute.xml" ),
43
+ # Dynamic sitemaps - Last 100 articles
44
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/video/Corriere.xml" ),
45
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Economia.xml" ),
46
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Salute.xml" ),
47
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Scienze.xml" ),
48
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Interni.xml" ),
49
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Esteri.xml" ),
50
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Sport.xml" ),
51
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Politica.xml" ),
52
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Salute__Figli__e__Genitori.xml" ),
53
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Salute__Sportello__Cancro.xml" ),
54
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Elezioni.xml" ),
55
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Tecnologia.xml" ),
56
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Offerte__recensioni.xml" ),
57
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Lotterie.xml" ),
58
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Spettacoli.xml" ),
59
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Scuola.xml" ),
60
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Animali.xml" ),
61
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Opinioni.xml" ),
62
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Caffe-gramellini.xml" ),
63
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Ultimo-banco.xml" ),
64
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Letti-da-rifarei.xml" ),
65
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Piccole-dosi.xml" ),
66
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/L-angolo.xml" ),
67
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Padiglione-italia.xml" ),
68
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Facce-nuove.xml" ),
69
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Ritorno-in-solferino.xml" ),
70
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Oriente-occidente.xml" ),
71
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Sette.xml" ),
72
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Moda.xml" ),
73
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/BuoneNotizie.xml" ),
74
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/lettere__al__direttore.xml" ),
75
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/lo__dico__al__corriere.xml" ),
76
+ Sitemap ("https://www.corriere.it/dynamic-sitemap/sitemap-last-100/Frammenti-ferruccio-de-bortoli.xml" ),
77
+ # Section sitemaps
78
+ Sitemap ("https://www.corriere.it/rss/sitemap/Motori.xml" ),
79
+ Sitemap ("https://www.corriere.it/rss/sitemap/Cultura.xml" ),
80
+ Sitemap ("https://vivimilano.corriere.it/sitemap_index.xml" ),
81
+ Sitemap ("https://www.corriere.it/cook/sitemap-index.xml" ),
82
+ Sitemap ("https://www.corriere.it/oroscopo/sitemap.xml" ),
83
+ Sitemap ("https://www.corriere.it/elezioni/sitemap/sitemap.xml" ),
84
+ Sitemap ("https://www.corriere.it/sport/risultati-live/sitemap.xml" ),
85
+ Sitemap ("https://www.corriere.it/salute/il-medico-risponde/sitemap.xml" ),
86
+ Sitemap ("https://www.corriere.it/rss/sitemap/lettere-al-direttore.xml" ),
87
+ Sitemap ("https://www.corriere.it/rss/sitemap/lo-dico-al-corriere.xml" ),
88
+ Sitemap ("https://www.corriere.it/rss/sitemap/Cook-Last.xml" ),
89
+ Sitemap ("https://www.corriere.it/economia/chiedi-esperto/sitemap.xml" ),
90
+ Sitemap ("https://www.corriere.it/economia/chiedi-esperto/news/sitemap.xml" ),
91
+ Sitemap ("https://www.corriere.it/studio/sitemap-studio.xml" ),
92
+ ],
93
+ )
94
+
95
+ IlGiornale = Publisher (
96
+ name = "Il Giornale" ,
97
+ domain = "https://www.ilgiornale.it" ,
98
+ parser = IlGiornaleParser ,
99
+ request_header = {
100
+ "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36" ,
101
+ "Accept" : "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8" ,
102
+ "Accept-Language" : "en-US,en;q=0.9" ,
103
+ "Accept-Encoding" : "gzip, deflate, br" ,
104
+ "Connection" : "keep-alive" ,
105
+ "Upgrade-Insecure-Requests" : "1" ,
106
+ },
107
+ sources = [
108
+ RSSFeed ("https://www.ilgiornale.it/feed.xml" ),
109
+ RSSFeed ("https://www.ilgiornale.it/feed/rss.xml" ),
110
+ Sitemap ("https://www.ilgiornale.it/sitemap/google-news.xml" ),
111
+ Sitemap ("https://www.ilgiornale.it/sitemap/indice.xml" ),
112
+ ],
113
+ )
0 commit comments