From 97914b61460c6e5ea7e7f2725e02a1a029232a22 Mon Sep 17 00:00:00 2001 From: pjsier Date: Fri, 28 Feb 2020 08:55:09 -0600 Subject: [PATCH] feat: create latest per-agency with combinefeeds --- city_scrapers_core/commands/combinefeeds.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/city_scrapers_core/commands/combinefeeds.py b/city_scrapers_core/commands/combinefeeds.py index 35b3d81..b0db18b 100644 --- a/city_scrapers_core/commands/combinefeeds.py +++ b/city_scrapers_core/commands/combinefeeds.py @@ -1,7 +1,7 @@ import json from datetime import datetime, timedelta from operator import itemgetter -from urllib.parse import urlparse +from urllib.parse import quote, urlparse from scrapy.commands import ScrapyCommand from scrapy.exceptions import UsageError @@ -65,6 +65,13 @@ def combine_s3(self): meetings.extend( [json.loads(line) for line in feed_text.split("\n") if line.strip()] ) + # Copy latest results for each spider + spider_key = key.split("/")[-1] + client.copy_object( + Bucket=bucket, + Key=spider_key, + CopySource={"Bucket": bucket, "Key": key}, + ) meetings = sorted(meetings, key=itemgetter(self.start_key)) yesterday_iso = (datetime.now() - timedelta(days=1)).isoformat()[:19] upcoming = [ @@ -123,6 +130,14 @@ def combine_azure(self): meetings.extend( [json.loads(line) for line in feed_text.split("\n") if line] ) + # Copy latest results for each spider + spider_blob_name = blob_name.split("/")[-1] + spider_blob = container_client.get_blob_client(spider_blob_name) + spider_blob.start_copy_from_url( + "https://{}.blob.core.windows.net/{}/{}".format( + account_name, quote(container), blob_name + ) + ) meetings = sorted(meetings, key=itemgetter(self.start_key)) yesterday_iso = (datetime.now() - timedelta(days=1)).isoformat()[:19] upcoming = [