Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
GitHub Actions Bot committed Nov 1, 2024
2 parents 063c5f7 + bcd337b commit 253e273
Show file tree
Hide file tree
Showing 34 changed files with 431 additions and 60 deletions.
17 changes: 17 additions & 0 deletions .blog.hu.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# First catch with wallbag sometimes fails,
# just reload article within wallabag then

http_header(user-agent): Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0

body: //article[1]

strip_id_or_class: blh_share_cont
strip_id_or_class: comments
strip_id_or_class: tags
strip_id_or_class: article-bottom

prune: no
tidy: no

test_url: https://passport.blog.hu/2020/03/13/a_cia_lopott_muholdja
test_url: https://mpt-it.blog.hu/2024/10/27/_itt_van_a_varos_vagyunk_lakoi
14 changes: 14 additions & 0 deletions .nasa.gov.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
body: //article[1]/section[1]/div[contains(@class, 'grid-container')]
title: substring-before( //meta[@property='og:title']/@content , ' - NASA Science')
author: //div[contains(@class, 'SideboxAuthor')]//p[contains(@class, 'hds-meta-subheading')]
author: //div[contains(@class, 'SideboxAuthor')]//p[contains(@class, 'hds-meta-heading')]
date: //meta[@name='parsely-pub-date']/@content

strip_id_or_class: article-header

strip: //aside

prune: no
tidy: no

test_url: https://science.nasa.gov/missions/hubble/new-clues-to-trappist-1-planet-compositions-atmospheres
12 changes: 12 additions & 0 deletions antigone21.com.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
body: //div[contains(@class, 'post-entry')]
title: substring-before( //meta[@property='og:title']/@content , ' | Antigone XXI')

strip: //span[text()='*']
strip: //p[text()='*']

strip_id_or_class: post-tags

prune: no
tidy: no

test_url: https://antigone21.com/2014/04/03/falafels-hummus-maison-vegan-sans-gluten/
46 changes: 15 additions & 31 deletions arstechnica.com.txt
Original file line number Diff line number Diff line change
@@ -1,41 +1,25 @@
author: //p[@class='byline']/span[@class='author']
author: //p[@class='byline']/a
body: //article[1]
author: //div[@class='author-mini-bio']//a
author: //a[contains(@href, '/author/')]

body: //div[contains(@class,'article-content')]
strip: //h2[@class='title']
strip: //div[contains(@class, 'pullbox')]
strip: //header//h1 | //header//h1/following-sibling::div[contains(@class, 'md:')] | //header//h1/preceding-sibling::div
strip: //div[@class='author-mini-bio']/self::* | //div[@class='author-mini-bio']/following-sibling::*
strip: //div[contains(@class, 'story-tools')]/self::* | //div[contains(@class, 'story-tools')]/following-sibling::*

strip_id_or_class: byline
strip_id_or_class: story-sidebar
prune: no

find_string: " data-src="
replace_string: "><img src="

find_string: " data-responsive="
replace_string: " /><span "

find_string: <figure style="
replace_string: </span><figure style="
strip: //svg

date: //div[@class='byline']/span[@class='posted']//abbr/@original-title
date: //div[@class='byline']/span[@class='posted']//abbr
date: //*[@class='byline']//time[@class='date']
strip_id_or_class: view-comments
strip_id_or_class: pswp-caption-content
strip_id_or_class: related-stories
strip_id_or_class: text-settings-dropdown-story

title: //div[@id='story']//h2[@class='title']

strip: //div[@class='pager']
native_ad_clue: //meta[@property="og:url" and contains(@content, '/sponsored/')]

strip: //aside
next_page_link: //nav//a[contains(text(), 'Next')]/@href
next_page_link: //span[@class='numbers']//a/span[@class='next']/..
next_page_link: //nav//a/span[contains(text(), 'Next')]/../@href
prune: no
tidy: no

test_url: http://arstechnica.com/tech-policy/news/2012/02/gigabit-internet-for-80-the-unlikely-success-of-californias-sonicnet.ars
test_url: http://arstechnica.com/apple/2005/04/macosx-10-4/
test_url: https://arstechnica.com/features/2020/10/the-space-operating-systems-booting-up-where-no-one-has-gone-before
test_url: https://arstechnica.com/gaming/2021/02/coders-reverse-engineer-grand-theft-autos-raw-pc-source-code/

# test_url: http://arstechnica.co.uk/science/2016/06/what-is-open-access-free-sharing-of-all-human-knowledge/
# test_url: http://arstechnica.co.uk/information-technology/2016/05/eben-moglen-gpl-online-advertising-is-becoming-a-perfect-despotism/
test_url: http://arstechnica.co.uk/science/2016/06/what-is-open-access-free-sharing-of-all-human-knowledge/
test_url: http://arstechnica.co.uk/information-technology/2016/05/eben-moglen-gpl-online-advertising-is-becoming-a-perfect-despotism/
12 changes: 12 additions & 0 deletions blog.rchapman.org.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
body: //article[@class='post-container']
title: //h1[@class='post-title']
author: 'Ryan A. Chapman'
date: substring-after(//article[@class='post-container']/header/text() , 'Published')

strip: //header
strip: //footer/self::* | //footer/following-sibling::*

prune: no
tidy: no

test_url: https://blog.rchapman.org/posts/Linux_System_Call_Table_for_x86_64/
6 changes: 6 additions & 0 deletions buildvirtual.net.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
body: //article[1]/div[contains(@class, 'post-entry')]
title: substring-before(//meta[@property='og:title']/@content , ' - buildVirtual')

prune: no

test_url: https://buildvirtual.net/how-to-use-docker-stack-to-deploy-docker-containers/
9 changes: 9 additions & 0 deletions callistaenterprise.se.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# <head> contains a condition redirect via <meta http-equiv="refresh" ...>
# to /oldie/
# following line deactivates this for FTR and wallabagger browser extension
# but unfortunately NOT for wallabag UI

replace_string(http-equiv="refresh"): foo="bar"


test_url: https://callistaenterprise.se/blogg/teknik/2016/05/27/building-a-microservice-with-golang/
8 changes: 7 additions & 1 deletion canardpc.com.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
title: //div[contains(@class, 'main-article-titre')]
body: //div[contains(@class, 'post-content')]
author: //div[@class="post-informations"]//a[contains(@href, 'https://www.canardpc.com/auteur/')]
author: //div[contains(@class, "post-informations")]//a[contains(@href, 'https://www.canardpc.com/auteur/')]
date: //meta[@property="article:published_time"]/@content

strip: //div[contains(@class, 'slick-cloned')]
strip_id_or_class: locomotive-navigation
strip: //div[contains(@class, "encarts")]//div[contains(@class, "layout-framed")]
#strip: //div[contains(@class, "post-row desktop")]
strip: //div[contains(@class, "post-row mobile")]
strip_id_or_class: main-cartouche
strip: //div[contains(@class, 'post-informations')]/span

prune: no

# wallabag-specific login directives (not supported in FTR)
requires_login: yes
Expand Down
18 changes: 18 additions & 0 deletions community.element14.com.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
http_header(user-agent): PHP/7.4

body: (//div[@class='content'])[1]
date: //time/@datetime
author: //span[@class='user-name']//a
# and for wallabag
author: //div[@class='avatar']/following-sibling::a

strip_id_or_class: toc

# remove large gaps between <p>
find_string: ">&nbsp;</p
replace_string: "></p

prune: no
tidy: no

test_url: https://community.element14.com/technologies/test-and-measurement/b/blog/posts/building-solderable-in-circuit-oscilloscope-probes
10 changes: 9 additions & 1 deletion community.openstreetmap.org.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
# [Fulltext-RSS] needs this to catch page 2+
http_header(user-agent): curl/7.83.1

body: //div[contains(@id, 'post_')]

next_page_link: //link[@rel='next']/@href

prune: no
tidy: no

test_url: https://community.openstreetmap.org/t/lidar-mapping-of-roads/97100
test_contains: Roadroid had an app since 2011, so we are probably the longest lasting app for the issue

28 changes: 28 additions & 0 deletions countrylife.co.uk.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
http_header(user-agent): Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0

body: //div[@class='articleBody'] | //div[contains(@class, 'featured-media')]
title: normalize-space(//h1[@class='title-primary'])

strip_id_or_class: collection
strip_id_or_class: post-tags
strip_id_or_class: keystone-voep-widget-3
strip_id_or_class: hidden-md
strip_id_or_class: hidden-lg


# rewrite image parameters to show images in wallabag and FTR

strip_attr: //img/@data-srcset

find_string: data-processed src="https://
replace_string: foo="" bar="https://

find_string: data-src="https://
replace_string: dummy="" src="https://


prune: no
tidy: no

test_url: https://www.countrylife.co.uk/food-drink/curious-questions-how-did-the-tea-bag-take-over-from-loose-leaf-tea-219407
test_url: https://www.countrylife.co.uk/architecture/ptolemy-dean-the-magic-that-happens-when-you-stop-to-draw-a-place-instead-of-just-taking-a-photograph-274700
6 changes: 6 additions & 0 deletions derekseaman.com.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
body: //div[@class='entry-content']

prune: no
tidy: no

test_url: https://www.derekseaman.com/2019/09/how-to-pi-hole-plus-dnscrypt-setup-on-raspberry-pi-4.html
9 changes: 5 additions & 4 deletions diepresse.com.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,26 @@
# or add a category, e.g.:
# https://www.diepresse.com/rss/kultur/kunst


title: //article/h1[1]
title: //div[@class='article']/h1
date: substring-before(//p[@class='articletime'],'|')
author: //meta[@name='cXenseParse:author']/@content
author: //div[@class='author']/descendant::a
body: //article | //div[@id='articletext'] | //div[@id='article-body']

strip: //div[@class='meta__date']
strip: //div[@class='meta__authors']
strip: //h1[@class='article__title']
strip: //article/h1[1]
strip: //div[contains(@class, 'fm-relation')]
strip: //svg
strip: //div[@class='vued']
strip: //svg
strip: //button

strip_id_or_class: skyline
strip_id_or_class: article__actions
strip_id_or_class: article__byline
strip_id_or_class: article__breadcrumbs


# strip gerneric text images when there is no article photo/image

find_string: https://www.diepresse.com/assets_v2/images/
Expand Down
11 changes: 11 additions & 0 deletions discuss.logseq.com.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# [Fulltext-RSS] needs this to catch page 2+
http_header(user-agent): curl/7.83.1

body: //div[contains(@id, 'post_')]

next_page_link: //link[@rel='next']/@href

prune: no
tidy: no

test_url: https://discuss.logseq.com/t/concerns-on-db-version-and-future-state-from-a-3-year-user/29225
28 changes: 28 additions & 0 deletions haaretz.com.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,30 @@
http_header(user-agent): Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0

body: //article[1]/div[1]/header[1] | //article[1]/div[1]/div[1]/section[1] | //div[@data-test="articleBody"]

strip_id_or_class: trinity-iframe
strip_id_or_class: undefined

strip: //nav
strip: //button
strip: //svg

strip: //header//h1
strip: //div[@data-test='shareBar']
strip: //div[@data-display-dates='true']/parent::div
strip: //div[@data-test='tags']
strip: //a[contains(@href, 'mailto:')]

prune: no
tidy: no


strip_attr: //img[@height='9']/@width
strip_attr: //img[@height='9']/@height


### old version: 2020-2024

body: //div[@data-test="articleBody"]
title: //meta[@property="og:title"]/@content
date: //meta[@property="article:published"]/@content
Expand All @@ -11,4 +38,5 @@ strip: //ul[@data-test="relatedArticles"]
strip: //div[contains(@id, 'newsletter')]
strip: //button


test_url: https://www.haaretz.com/israel-news/.premium-with-israel-s-encouragement-nso-sold-spyware-to-uae-and-other-gulf-states-1.9093465
2 changes: 2 additions & 0 deletions habr.com.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
body: //div[@id='post-content-body']
body: //div[contains(@class, 'post__body')]

strip: //section[@class='article__infopanel']
strip: //script

prune: no
tidy: no
Expand Down
13 changes: 13 additions & 0 deletions hpd.de.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
body: //article[1]/div/div[@class='content'] | //figure[contains(@class, 'file-image')]
date: //article[1]//time/@datetime
author: //p[@class='authors']/i

strip_id_or_class: element-invisible

strip: //a[contains(@href, 'https://steadyhq.com/')]/parent::*

prune: no
tidy: no

test_url: https://hpd.de/artikel/mythos-thule-22499
test_url: https://hpd.de/artikel/schaffen-affengehirne-auch-links-17728
19 changes: 19 additions & 0 deletions kuemmerle.name.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
body: //article[contains(concat(' ',normalize-space(@class),' '),' post ')] | //div[contains(@class, 'page-banner-inner')]
title: //meta[@name='twitter:title']/@content
author: //div[contains(@class, 'entry-meta')]//a[contains(@href, '/author/')]
date: //time/@datetime

strip: //header
strip: //footer
strip: //svg

strip: //div[contains(@class, 'rmp-widgets-container')]/self::div | //div[contains(@class, 'rmp-widgets-container')]/preceding-sibling::hr[1] | //div[contains(@class, 'rmp-widgets-container')]/following-sibling::*

strip_id_or_class: rmp-results-widget
strip_id_or_class: rmp-rating-widget
strip_id_or_class: rmp-feedback-widget
strip_id_or_class: sydney-post-sharing

prune: no

test_url: https://kuemmerle.name/zeit-fuer-ein-gedicht-26
10 changes: 10 additions & 0 deletions lexpress.fr.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
body: //article[1] | //*[contains(@class, 'article__lead')] | //div[@class='article__image']

replace_string(<h2): <h4
replace_string(</h2>): </h4>

prune: no
tidy: no

test_url: https://www.lexpress.fr/idees-et-debats/abel-quentin-le-climat-est-une-question-de-vie-ou-de-mort-pas-les-toilettes-non-genrees-2ZQAOWQ46JEE5I654A5IXI43WQ/
test_url: https://www.lexpress.fr/economie/high-tech/les-reverse-etl-la-solution-pour-activer-vos-donnees-clients-IL63BDTD3ZBVLIEDWVBDNM2FAM/
15 changes: 3 additions & 12 deletions macg.co.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,5 @@
date: //time[@itemprop="datePublished"]/@datetime

strip: //div[contains(concat(' ',normalize-space(@class),' '),' plusLoin ')]
strip: //select
body: //div[contains(concat(' ',normalize-space(@class),' '),' print:shadow-none ')]//article
strip: //footer
strip_id_or_class: comments
strip_id_or_class: smart-paging-pager
strip_id_or_class: megaUne
strip_id_or_class: pas
strip_id_or_class: field-name-field-pour-aller-plus-loin

next_page_link: //div[contains(concat(' ',normalize-space(@class),' '),' smart-paging-pager ')]//a[@title='Aller à la page suivante']

body: //section[contains(concat(' ',normalize-space(@class),' '),' corps ')]
test_url: http://www.macg.co/aapl/2015/09/tim-cook-rassure-sur-lavenir-du-mac-90957
test_url: https://www.macg.co/ailleurs/2024/10/microsoft-donne-des-yeux-et-une-voix-lia-de-copilot-146279
6 changes: 6 additions & 0 deletions mailchi.mp.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,18 @@ replace_string(<td): <div
replace_string(</td): </div

body: //div[@id="bodyTable"]
body: //body[1]

strip_id_or_class: awesomewrap
strip_id_or_class: templatePreheader
strip_id_or_class: mcnShareBlock
strip_id_or_class: templateFooter
strip_id_or_class: mcnFollowBlock

# strip hint for browser edition
strip: (//div[contains(@class, 'mj-outlook-group-fix')])[1]

prune: no


# Test URL needs to be something that doesn't reveal a subscriber's email address
Loading

0 comments on commit 253e273

Please sign in to comment.