diff --git a/.blog.hu.txt b/.blog.hu.txt new file mode 100644 index 000000000..f47754bd5 --- /dev/null +++ b/.blog.hu.txt @@ -0,0 +1,17 @@ +# First catch with wallbag sometimes fails, +# just reload article within wallabag then + +http_header(user-agent): Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0 + +body: //article[1] + +strip_id_or_class: blh_share_cont +strip_id_or_class: comments +strip_id_or_class: tags +strip_id_or_class: article-bottom + +prune: no +tidy: no + +test_url: https://passport.blog.hu/2020/03/13/a_cia_lopott_muholdja +test_url: https://mpt-it.blog.hu/2024/10/27/_itt_van_a_varos_vagyunk_lakoi diff --git a/.nasa.gov.txt b/.nasa.gov.txt new file mode 100644 index 000000000..1feb23247 --- /dev/null +++ b/.nasa.gov.txt @@ -0,0 +1,14 @@ +body: //article[1]/section[1]/div[contains(@class, 'grid-container')] +title: substring-before( //meta[@property='og:title']/@content , ' - NASA Science') +author: //div[contains(@class, 'SideboxAuthor')]//p[contains(@class, 'hds-meta-subheading')] +author: //div[contains(@class, 'SideboxAuthor')]//p[contains(@class, 'hds-meta-heading')] +date: //meta[@name='parsely-pub-date']/@content + +strip_id_or_class: article-header + +strip: //aside + +prune: no +tidy: no + +test_url: https://science.nasa.gov/missions/hubble/new-clues-to-trappist-1-planet-compositions-atmospheres diff --git a/antigone21.com.txt b/antigone21.com.txt new file mode 100644 index 000000000..e4979acc4 --- /dev/null +++ b/antigone21.com.txt @@ -0,0 +1,12 @@ +body: //div[contains(@class, 'post-entry')] +title: substring-before( //meta[@property='og:title']/@content , ' | Antigone XXI') + +strip: //span[text()='*'] +strip: //p[text()='*'] + +strip_id_or_class: post-tags + +prune: no +tidy: no + +test_url: https://antigone21.com/2014/04/03/falafels-hummus-maison-vegan-sans-gluten/ diff --git a/arstechnica.com.txt b/arstechnica.com.txt index e8be30c9d..844e505a2 100644 --- a/arstechnica.com.txt +++ b/arstechnica.com.txt @@ -1,41 +1,25 @@ -author: //p[@class='byline']/span[@class='author'] -author: //p[@class='byline']/a +body: //article[1] +author: //div[@class='author-mini-bio']//a +author: //a[contains(@href, '/author/')] -body: //div[contains(@class,'article-content')] -strip: //h2[@class='title'] -strip: //div[contains(@class, 'pullbox')] +strip: //header//h1 | //header//h1/following-sibling::div[contains(@class, 'md:')] | //header//h1/preceding-sibling::div +strip: //div[@class='author-mini-bio']/self::* | //div[@class='author-mini-bio']/following-sibling::* +strip: //div[contains(@class, 'story-tools')]/self::* | //div[contains(@class, 'story-tools')]/following-sibling::* -strip_id_or_class: byline -strip_id_or_class: story-sidebar -prune: no - -find_string: " data-src=" -replace_string: "> contains a condition redirect via +# to /oldie/ +# following line deactivates this for FTR and wallabagger browser extension +# but unfortunately NOT for wallabag UI + +replace_string(http-equiv="refresh"): foo="bar" + + +test_url: https://callistaenterprise.se/blogg/teknik/2016/05/27/building-a-microservice-with-golang/ diff --git a/canardpc.com.txt b/canardpc.com.txt index 2328c36fe..bae4c4286 100644 --- a/canardpc.com.txt +++ b/canardpc.com.txt @@ -1,11 +1,17 @@ title: //div[contains(@class, 'main-article-titre')] body: //div[contains(@class, 'post-content')] -author: //div[@class="post-informations"]//a[contains(@href, 'https://www.canardpc.com/auteur/')] +author: //div[contains(@class, "post-informations")]//a[contains(@href, 'https://www.canardpc.com/auteur/')] date: //meta[@property="article:published_time"]/@content strip: //div[contains(@class, 'slick-cloned')] strip_id_or_class: locomotive-navigation strip: //div[contains(@class, "encarts")]//div[contains(@class, "layout-framed")] +#strip: //div[contains(@class, "post-row desktop")] +strip: //div[contains(@class, "post-row mobile")] +strip_id_or_class: main-cartouche +strip: //div[contains(@class, 'post-informations')]/span + +prune: no # wallabag-specific login directives (not supported in FTR) requires_login: yes diff --git a/community.element14.com.txt b/community.element14.com.txt new file mode 100644 index 000000000..f35ce9520 --- /dev/null +++ b/community.element14.com.txt @@ -0,0 +1,18 @@ +http_header(user-agent): PHP/7.4 + +body: (//div[@class='content'])[1] +date: //time/@datetime +author: //span[@class='user-name']//a +# and for wallabag +author: //div[@class='avatar']/following-sibling::a + +strip_id_or_class: toc + +# remove large gaps between

+find_string: "> 

): + +prune: no +tidy: no + +test_url: https://www.lexpress.fr/idees-et-debats/abel-quentin-le-climat-est-une-question-de-vie-ou-de-mort-pas-les-toilettes-non-genrees-2ZQAOWQ46JEE5I654A5IXI43WQ/ +test_url: https://www.lexpress.fr/economie/high-tech/les-reverse-etl-la-solution-pour-activer-vos-donnees-clients-IL63BDTD3ZBVLIEDWVBDNM2FAM/ diff --git a/macg.co.txt b/macg.co.txt index 274797cc8..babd7421e 100644 --- a/macg.co.txt +++ b/macg.co.txt @@ -1,14 +1,5 @@ -date: //time[@itemprop="datePublished"]/@datetime - -strip: //div[contains(concat(' ',normalize-space(@class),' '),' plusLoin ')] -strip: //select +body: //div[contains(concat(' ',normalize-space(@class),' '),' print:shadow-none ')]//article +strip: //footer strip_id_or_class: comments -strip_id_or_class: smart-paging-pager -strip_id_or_class: megaUne -strip_id_or_class: pas -strip_id_or_class: field-name-field-pour-aller-plus-loin - -next_page_link: //div[contains(concat(' ',normalize-space(@class),' '),' smart-paging-pager ')]//a[@title='Aller à la page suivante'] -body: //section[contains(concat(' ',normalize-space(@class),' '),' corps ')] -test_url: http://www.macg.co/aapl/2015/09/tim-cook-rassure-sur-lavenir-du-mac-90957 +test_url: https://www.macg.co/ailleurs/2024/10/microsoft-donne-des-yeux-et-une-voix-lia-de-copilot-146279 diff --git a/mailchi.mp.txt b/mailchi.mp.txt index f9fda34a0..75cbe9e84 100644 --- a/mailchi.mp.txt +++ b/mailchi.mp.txt @@ -9,12 +9,18 @@ replace_string(): div> + +# remove blank in meta>og:image>content, which resulted in an empty preview image in wallabag +replace_string(.jpeg "): .jpeg" + +prune: no +tidy: no + +test_url: https://taz.de/Italienische-Comicneuheiten/!6041889/ test_url: https://www.taz.de/!5504959/ test_url: https://taz.de/!5708122 diff --git a/universe.shelfd.com.txt b/universe.shelfd.com.txt new file mode 100644 index 000000000..52e18583b --- /dev/null +++ b/universe.shelfd.com.txt @@ -0,0 +1,9 @@ +body: //div[@class='content-inner '] + +strip: //strong[contains(text(), 'Hol dir Vorfreude in deinen Kalender')]/parent::h2 | //strong[contains(text(), 'Hol dir Vorfreude in deinen Kalender')]/parent::h2/following-sibling::* + +prune: no +tidy: no + +test_url: https://universe.shelfd.com/mubi-neustarts-kalender/ +test_url: https://universe.shelfd.com/feed/