Skip to content

Commit 3a8d33e

Browse files
committed
web feed polling: start using ETag/If-None-Match and Last-Modified/If-Modified-Since
for #791
1 parent 3e49faf commit 3a8d33e

File tree

2 files changed

+72
-20
lines changed

2 files changed

+72
-20
lines changed

tests/test_web.py

+40-2
Original file line numberDiff line numberDiff line change
@@ -1783,15 +1783,20 @@ def test_poll_feed_atom(self, mock_create_task, mock_get, _):
17831783
<content>I hereby ☕ post</content>
17841784
</entry>
17851785
"""
1786-
mock_get.return_value = requests_response(
1787-
feed, headers={'Content-Type': atom.CONTENT_TYPE})
1786+
mock_get.return_value = requests_response(feed, headers={
1787+
'Content-Type': atom.CONTENT_TYPE,
1788+
'Last-Modified': 'Sat, 01 Jan 2024 01:02:03 GMT',
1789+
'ETag': '"abc123"',
1790+
})
17881791

17891792
got = self.post('/queue/poll-feed', data={'domain': 'user.com'})
17901793
self.assertEqual(200, got.status_code)
17911794

17921795
user = self.user.key.get()
17931796
self.assertEqual(NOW, user.last_polled_feed)
17941797
self.assertEqual('https://user.com/post', user.feed_last_item)
1798+
self.assertEqual('"abc123"', user.feed_etag)
1799+
self.assertEqual('Sat, 01 Jan 2024 01:02:03 GMT', user.feed_last_modified)
17951800

17961801
mock_get.assert_has_calls((
17971802
self.req('https://foo/feed'),
@@ -2074,6 +2079,39 @@ def test_poll_feed_last_webmention_in_noop(self, mock_create_task, mock_get, _):
20742079
mock_create_task.assert_not_called()
20752080
mock_get.assert_not_called()
20762081

2082+
2083+
@patch('oauth_dropins.webutil.appengine_config.tasks_client.create_task')
2084+
def test_poll_feed_etag_last_modified(self, mock_create_task, mock_get, _):
2085+
common.RUN_TASKS_INLINE = False
2086+
self.user.obj.mf2 = ACTOR_MF2_REL_FEED_URL
2087+
self.user.obj.put()
2088+
2089+
self.user.feed_etag = '"abc123"'
2090+
self.user.feed_last_modified ='Sat, 01 Jan 2024 01:02:03 GMT'
2091+
self.user.put()
2092+
2093+
mock_get.return_value = requests_response('', status=304, headers={
2094+
'Last-Modified': 'Sat, 99 Jan 2024 01:02:03 GMT',
2095+
'ETag': '"def789"',
2096+
})
2097+
2098+
got = self.post('/queue/poll-feed', data={'domain': 'user.com'})
2099+
self.assertEqual(200, got.status_code)
2100+
2101+
user = self.user.key.get()
2102+
self.assertEqual(NOW, user.last_polled_feed)
2103+
self.assertEqual('"def789"', user.feed_etag)
2104+
self.assertEqual('Sat, 99 Jan 2024 01:02:03 GMT', user.feed_last_modified)
2105+
2106+
mock_get.assert_has_calls([self.req('https://foo/feed', headers={
2107+
'If-None-Match': '"abc123"',
2108+
'If-Modified-Since': 'Sat, 01 Jan 2024 01:02:03 GMT',
2109+
})])
2110+
2111+
expected_eta = NOW_SECONDS + web.MIN_FEED_POLL_PERIOD.total_seconds()
2112+
self.assert_task(mock_create_task, 'poll-feed', '/queue/poll-feed',
2113+
domain='user.com', eta_seconds=expected_eta)
2114+
20772115
def _test_verify(self, redirects, hcard, actor, redirects_error=None):
20782116
self.user.has_redirects = False
20792117
self.user.put()

web.py

+32-18
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ class Web(User, Protocol):
102102
last_webmention_in = ndb.DateTimeProperty(tzinfo=timezone.utc)
103103
last_polled_feed = ndb.DateTimeProperty(tzinfo=timezone.utc)
104104
feed_last_item = ndb.StringProperty() # id (URL)
105+
feed_etag = ndb.StringProperty()
106+
feed_last_modified = ndb.StringProperty()
105107

106108
# Originally, BF served Web users' AP actor ids on fed.brid.gy, eg
107109
# https://fed.brid.gy/snarfed.org . When we started adding new protocols, we
@@ -642,25 +644,35 @@ def poll_feed_task():
642644
return msg
643645

644646
# fetch feed
645-
resp = util.requests_get(url)
646-
content_type = resp.headers.get('Content-Type') or ''
647-
type = FEED_TYPES.get(content_type.split(';')[0])
648-
if type == 'atom' or (type == 'xml' and rel_type == 'atom'):
649-
try:
650-
activities = atom.atom_to_activities(resp.text)
651-
except ValueError as e:
652-
error(f"Couldn't parse feed as Atom: {e}", status=502)
653-
obj_feed_prop = {'atom': resp.text}
654-
elif type == 'rss' or (type == 'xml' and rel_type == 'rss'):
655-
try:
656-
activities = rss.to_activities(resp.text)
657-
except ValueError as e:
658-
error(f"Couldn't parse feed as RSS: {e}", status=502)
659-
obj_feed_prop = {'rss': resp.text}
647+
headers = {}
648+
if user.feed_etag:
649+
headers['If-None-Match'] = user.feed_etag
650+
if user.feed_last_modified:
651+
headers['If-Modified-Since'] = user.feed_last_modified
652+
resp = util.requests_get(url, headers=headers)
653+
654+
if resp.status_code == 304:
655+
logger.info('Feed is unchanged since last poll')
656+
activities = []
660657
else:
661-
msg = f'Unknown feed type {content_type}'
662-
logger.info(msg)
663-
return msg
658+
content_type = resp.headers.get('Content-Type') or ''
659+
type = FEED_TYPES.get(content_type.split(';')[0])
660+
if type == 'atom' or (type == 'xml' and rel_type == 'atom'):
661+
try:
662+
activities = atom.atom_to_activities(resp.text)
663+
except ValueError as e:
664+
error(f"Couldn't parse feed as Atom: {e}", status=502)
665+
obj_feed_prop = {'atom': resp.text}
666+
elif type == 'rss' or (type == 'xml' and rel_type == 'rss'):
667+
try:
668+
activities = rss.to_activities(resp.text)
669+
except ValueError as e:
670+
error(f"Couldn't parse feed as RSS: {e}", status=502)
671+
obj_feed_prop = {'rss': resp.text}
672+
else:
673+
msg = f'Unknown feed type {content_type}'
674+
logger.info(msg)
675+
return msg
664676

665677
# create Objects and receive tasks
666678
for i, activity in enumerate(activities):
@@ -720,6 +732,8 @@ def clamp(delay):
720732

721733
# update user
722734
user.last_polled_feed = util.now()
735+
user.feed_etag = resp.headers.get('ETag')
736+
user.feed_last_modified = resp.headers.get('Last-Modified')
723737
user.put()
724738

725739
return 'OK'

0 commit comments

Comments
 (0)