From 8e1f7392efd5e4dd7956c6614673e72e231d61b9 Mon Sep 17 00:00:00 2001
From: pjsier <pjsier@gmail.com>
Date: Fri, 10 Jul 2020 07:10:04 -0500
Subject: [PATCH] refactor: drop py 3.5 support, f strings, logs

Drops Python 3.5 support, replaces several calls to .format() with f
string literals, replaces print() calls with logs
---
 city_scrapers_core/commands/combinefeeds.py | 11 +++----
 city_scrapers_core/commands/genspider.py    | 33 +++++++++++----------
 city_scrapers_core/commands/validate.py     |  8 +++--
 city_scrapers_core/pipelines/diff.py        |  8 ++---
 city_scrapers_core/pipelines/validation.py  | 17 ++++++-----
 city_scrapers_core/spiders/legistar.py      |  8 ++---
 setup.py                                    |  2 +-
 7 files changed, 44 insertions(+), 43 deletions(-)

diff --git a/city_scrapers_core/commands/combinefeeds.py b/city_scrapers_core/commands/combinefeeds.py
index b0db18b..17e7c96 100644
--- a/city_scrapers_core/commands/combinefeeds.py
+++ b/city_scrapers_core/commands/combinefeeds.py
@@ -101,9 +101,7 @@ def combine_azure(self):
         account_name, account_key = feed_uri[8::].split("@")[0].split(":")
         container = feed_uri.split("@")[1].split("/")[0]
         container_client = ContainerClient(
-            "{}.blob.core.windows.net".format(account_name),
-            container,
-            credential=account_key,
+            f"{account_name}.blob.core.windows.net", container, credential=account_key,
         )
 
         max_days_previous = 3
@@ -134,9 +132,8 @@ def combine_azure(self):
             spider_blob_name = blob_name.split("/")[-1]
             spider_blob = container_client.get_blob_client(spider_blob_name)
             spider_blob.start_copy_from_url(
-                "https://{}.blob.core.windows.net/{}/{}".format(
-                    account_name, quote(container), blob_name
-                )
+                f"https://{account_name}.blob.core.windows.net"
+                f"/{quote(container)}/{blob_name}"
             )
         meetings = sorted(meetings, key=itemgetter(self.start_key))
         yesterday_iso = (datetime.now() - timedelta(days=1)).isoformat()[:19]
@@ -164,7 +161,7 @@ def get_spider_paths(self, path_list):
         """Get a list of the most recent scraper results for each spider"""
         spider_paths = []
         for spider in self.crawler_process.spider_loader.list():
-            all_spider_paths = [p for p in path_list if "{}.".format(spider) in p]
+            all_spider_paths = [p for p in path_list if f"{spider}." in p]
             if len(all_spider_paths) > 0:
                 spider_paths.append(sorted(all_spider_paths)[-1])
         return spider_paths
diff --git a/city_scrapers_core/commands/genspider.py b/city_scrapers_core/commands/genspider.py
index 2fbf794..d0348ad 100644
--- a/city_scrapers_core/commands/genspider.py
+++ b/city_scrapers_core/commands/genspider.py
@@ -1,3 +1,4 @@
+import logging
 import json
 import shutil
 import string
@@ -14,6 +15,8 @@
 
 USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36"  # noqa
 
+logger = logging.getLogger(__name__)
+
 
 class Command(ScrapyCommand):
     requires_project = False
@@ -34,13 +37,13 @@ def run(self, args, opts):
         test_template = "test.tmpl"
         if "legistar.com" in domain:
             proto = "https" if start_url.startswith("https") else "http"
-            start_url = "{}://{}".format(proto, domain)
+            start_url = f"{proto}://{domain}"
             spider_template = "spider_legistar.tmpl"
             test_template = "test_legistar.tmpl"
             fixture_file = self._gen_legistar_fixtures(name, start_url)
         else:
             fixture_file = self._gen_fixtures(name, start_url)
-        classname = "{}Spider".format(string.capwords(name, sep="_").replace("_", ""))
+        classname = f"{string.capwords(name, sep='_').replace('_', '')}Spider"
         self._genspider(name, agency, classname, domain, start_url, spider_template)
         self._gen_tests(name, classname, start_url, fixture_file, test_template)
 
@@ -51,14 +54,12 @@ def _genspider(self, name, agency, classname, domain, start_url, template_file):
             "agency": agency,
             "domain": domain,
             "start_url": start_url,
-            "classname": "{}Spider".format(
-                string.capwords(name, sep="_").replace("_", "")
-            ),
+            "classname": f"{string.capwords(name, sep='_').replace('_', '')}Spider",
         }
-        spider_file = "{}.py".format(join(self.spiders_dir, name))
+        spider_file = f"{join(self.spiders_dir, name)}.py"
         shutil.copyfile(join(self.templates_dir, template_file), spider_file)
         render_templatefile(spider_file, **template_dict)
-        print("Created file: {}".format(spider_file))
+        logger.info(f"Created file: {spider_file}")
 
     def _gen_tests(self, name, classname, start_url, fixture_file, template_file):
         """Creates tests from test template file"""
@@ -70,34 +71,34 @@ def _gen_tests(self, name, classname, start_url, fixture_file, template_file):
         }
         if "legistar" not in name:
             template_dict["start_url"] = start_url
-        test_file = join(self.tests_dir, "test_{}.py".format(name))
+        test_file = join(self.tests_dir, f"test_{name}.py")
         shutil.copyfile(join(self.templates_dir, template_file), test_file)
         render_templatefile(test_file, **template_dict)
-        print("Created file: {}".format(test_file))
+        logger.info(f"Created file: {test_file}")
 
     def _gen_fixtures(self, name, start_url):
         """Creates fixures from HTML response at the start URL"""
         res = requests.get(start_url, headers={"user-agent": USER_AGENT})
         content = res.text.strip()
-        fixture_file = join(self.fixtures_dir, "{}.html".format(name))
+        fixture_file = join(self.fixtures_dir, f"{name}.html")
         with open(fixture_file, "w", encoding="utf-8") as f:
             f.write(content)
-        print("Created file: {}".format(fixture_file))
-        return "{}.html".format(name)
+        logger.info(f"Created file: {fixture_file}")
+        return f"{name}.html"
 
     def _gen_legistar_fixtures(self, name, start_url):
         """Creates fixtures from a Legistar response"""
         events = []
         les = LegistarEventsScraper()
         les.BASE_URL = start_url
-        les.EVENTSPAGE = "{}/Calendar.aspx".format(start_url)
+        les.EVENTSPAGE = f"{start_url}/Calendar.aspx"
         for event, _ in les.events(since=datetime.today().year):
             events.append((dict(event), None))
-        fixture_file = join(self.fixtures_dir, "{}.json".format(name))
+        fixture_file = join(self.fixtures_dir, f"{name}.json")
         with open(fixture_file, "w", encoding="utf-8") as f:
             json.dump(events, f)
-        print("Created file: {}".format(fixture_file))
-        return "{}.json".format(name)
+        logger.info(f"Created file: {fixture_file}")
+        return f"{name}.json"
 
     @property
     def spiders_dir(self):
diff --git a/city_scrapers_core/commands/validate.py b/city_scrapers_core/commands/validate.py
index 8c70233..877c200 100644
--- a/city_scrapers_core/commands/validate.py
+++ b/city_scrapers_core/commands/validate.py
@@ -1,3 +1,4 @@
+import logging
 import os
 from importlib import import_module
 
@@ -6,6 +7,9 @@
 from ..pipelines import ValidationPipeline
 
 
+logger = logging.getLogger(__name__)
+
+
 class Command(ScrapyCommand):
     requires_project = True
 
@@ -29,7 +33,7 @@ def run(self, args, opts):
         spider_list = self.crawler_process.spider_loader.list()
         spiders = [spider for spider in args if spider in spider_list]
         if len(spiders) == 0 and not opts.all:
-            print("No spiders provided, exiting...")
+            logger.info("No spiders provided, exiting...")
             return
         elif opts.all:
             spiders = spider_list
@@ -44,7 +48,7 @@ def _add_validation_pipeline(self):
         # Exit if pipeline already included
         if any(pipeline_name in pipeline for pipeline in pipelines.keys()):
             return
-        fullname = "{}.{}".format(ValidationPipeline.__module__, pipeline_name)
+        fullname = f"{ValidationPipeline.__module__}.{pipeline_name}"
         priority = 1
         if len(pipelines.keys()) > 0:
             priority = max(pipelines.values()) + 1
diff --git a/city_scrapers_core/pipelines/diff.py b/city_scrapers_core/pipelines/diff.py
index cd95568..fca9b9e 100644
--- a/city_scrapers_core/pipelines/diff.py
+++ b/city_scrapers_core/pipelines/diff.py
@@ -99,7 +99,7 @@ def __init__(self, crawler, output_format):
         self.spider = crawler.spider
         self.container = feed_uri.split("@")[1].split("/")[0]
         self.container_client = ContainerClient(
-            "{}.blob.core.windows.net".format(account_name),
+            f"{account_name}.blob.core.windows.net",
             self.container,
             credential=account_key,
         )
@@ -119,9 +119,7 @@ def load_previous_results(self):
                 ).strftime(self.feed_prefix)
             )
             spider_blobs = [
-                blob
-                for blob in matching_blobs
-                if "{}.".format(self.spider.name) in blob.name
+                blob for blob in matching_blobs if f"{self.spider_name}." in blob.name
             ]
             if len(spider_blobs) > 0:
                 break
@@ -170,7 +168,7 @@ def load_previous_results(self):
             spider_objects = [
                 obj
                 for obj in match_objects.get("Contents", [])
-                if "{}.".format(self.spider.name) in obj["Key"]
+                if f"{self.spider.name}." in obj["Key"]
             ]
             if len(spider_objects) > 0:
                 break
diff --git a/city_scrapers_core/pipelines/validation.py b/city_scrapers_core/pipelines/validation.py
index dc69f28..476e701 100644
--- a/city_scrapers_core/pipelines/validation.py
+++ b/city_scrapers_core/pipelines/validation.py
@@ -1,8 +1,12 @@
+import logging
 from collections import defaultdict
 
 from jsonschema.validators import Draft7Validator
 
 
+logger = logging.getLogger(__name__)
+
+
 class ValidationPipeline:
     """
     Check against schema if present, prints % valid for each property.
@@ -43,17 +47,14 @@ def process_item(self, item, spider):
     def validation_report(self, spider):
         """Prints a validation report to stdout and raise an error if fails"""
         props = list(self.error_count.keys())
-        print(
-            "\n{line}Validation summary for: {spider}{line}".format(
-                line="-" * 12, spider=spider.name
-            )
-        )
-        print("Validating {} items\n".format(self.item_count))
+        line_str = "-" * 12
+        logger.info(f"\n{line_str}\nValidation summary for: {spider.name}\n{line_str}")
+        logger.info(f"Validating {self.item_count} items\n")
         valid_list = []
         for prop in props:
             valid = (self.item_count - self.error_count[prop]) / self.item_count
             valid_list.append(valid)
-            print("{}: {:.0%}".format(prop, valid))
+            logger.info("{}: {:.0%}".format(prop, valid))
         try:
             assert all([val >= 0.9 for val in valid_list])
         except AssertionError:
@@ -66,7 +67,7 @@ def validation_report(self, spider):
             if self.enforce_validation:
                 raise ValueError(message)
             else:
-                print(message)
+                logger.info(message)
 
     def _get_props_from_errors(self, errors):
         error_props = []
diff --git a/city_scrapers_core/spiders/legistar.py b/city_scrapers_core/spiders/legistar.py
index 27bc0a6..aa7ed46 100644
--- a/city_scrapers_core/spiders/legistar.py
+++ b/city_scrapers_core/spiders/legistar.py
@@ -18,7 +18,7 @@ def parse(self, response):
     def _call_legistar(self, since=None):
         les = LegistarEventsScraper()
         les.BASE_URL = self.base_url
-        les.EVENTSPAGE = "{}/Calendar.aspx".format(self.base_url)
+        les.EVENTSPAGE = f"{self.base_url}/Calendar.aspx"
         if not since:
             since = datetime.today().year
         return les.events(since=since)
@@ -29,7 +29,7 @@ def legistar_start(self, item):
         if start_date and start_time:
             try:
                 return datetime.strptime(
-                    "{} {}".format(start_date, start_time), "%m/%d/%Y %I:%M %p"
+                    f"{start_date} {start_time}", "%m/%d/%Y %I:%M %p"
                 )
             except ValueError:
                 return datetime.strptime(start_date, "%m/%d/%Y")
@@ -42,7 +42,7 @@ def legistar_links(self, item):
         return links
 
     def legistar_source(self, item):
-        default_url = "{}/Calendar.aspx".format(self.base_url)
+        default_url = f"{self.base_url}/Calendar.aspx"
         if isinstance(item.get("Name"), dict):
             return item["Name"].get("url", default_url)
         if isinstance(item.get("Meeting Details"), dict):
@@ -52,4 +52,4 @@ def legistar_source(self, item):
     @property
     def base_url(self):
         parsed_url = urlparse(self.start_urls[0])
-        return "{}://{}".format(parsed_url.scheme, parsed_url.netloc)
+        return f"{parsed_url.scheme}://{parsed_url.netloc}"
diff --git a/setup.py b/setup.py
index de0ce89..0761c8a 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
     install_requires=["jsonschema>=3.0.0a5", "pytz", "requests", "scrapy"],
     tests_requires=["flake8", "pytest", "isort"],
     extras_require={"aws": ["boto3"], "azure": ["azure-storage-blob>=12"]},
-    python_requires=">=3.5,<4.0",
+    python_requires=">=3.6,<4.0",
     classifiers=[
         "Programming Language :: Python :: 3",
         "License :: OSI Approved :: MIT License",