From d3afe5b247f28074cab3277d6dfa3e3ef6e92ec7 Mon Sep 17 00:00:00 2001
From: Lane Smith <lane.smith@breakthroughenergy.org>
Date: Tue, 16 Feb 2021 16:17:17 -0800
Subject: [PATCH] test: make NREL EFS download tests faster

---
 .../nrel_efs/tests/test_get_efs_data.py       | 217 +++++++++++-------
 1 file changed, 134 insertions(+), 83 deletions(-)

diff --git a/prereise/gather/demanddata/nrel_efs/tests/test_get_efs_data.py b/prereise/gather/demanddata/nrel_efs/tests/test_get_efs_data.py
index ae277236d..36540064b 100644
--- a/prereise/gather/demanddata/nrel_efs/tests/test_get_efs_data.py
+++ b/prereise/gather/demanddata/nrel_efs/tests/test_get_efs_data.py
@@ -1,67 +1,113 @@
 import os
+import zipfile
 
 import pandas as pd
 from pandas.testing import assert_frame_equal
 from powersimdata.network.usa_tamu.constants.zones import abv2state
 
 from prereise.gather.demanddata.nrel_efs.get_efs_data import (
+    _check_electrification_scenarios_for_download,
+    _check_path,
+    _check_technology_advancements_for_download,
+    _download_data,
+    _extract_data,
     account_for_leap_year,
-    download_demand_data,
-    download_flexibility_data,
     partition_demand_by_sector,
     partition_flexibility_by_sector,
 )
 
 
-def test_download_demand_data():
-    # Download one of the EFS demand data sets
-    download_demand_data(es={"Reference"}, ta={"Slow"})
+def test_check_electrification_scenarios_for_download():
+    # Run the check
+    test_es = _check_electrification_scenarios_for_download(es={"All"})
 
-    try:
-        # Load the downloaded EFS demand data set
-        df = pd.read_csv("EFSLoadProfile_Reference_Slow.csv")
+    # Specify the expected values
+    exp_es = {"Reference", "Medium", "High"}
 
-        # Access the columns
-        test_cols = list(df.columns)
-        exp_cols = [
-            "Electrification",
-            "TechnologyAdvancement",
-            "Year",
-            "LocalHourID",
-            "State",
-            "Sector",
-            "Subsector",
-            "LoadMW",
-        ]
+    # Compare the two sets
+    assert test_es == exp_es
 
-        # Compare the two values
-        assert len(test_cols) == len(exp_cols)
 
-        # Remove the downloaded EFS data set
-        os.remove("EFSLoadProfile_Reference_Slow.csv")
+def test_check_technology_advancements_for_download():
+    # Run the check
+    test_ta = _check_technology_advancements_for_download(ta={"All"})
+
+    # Specify the expected values
+    exp_ta = {"Slow", "Moderate", "Rapid"}
+
+    # Compare the two sets
+    assert test_ta == exp_ta
+
+
+def test_check_path():
+    # Run check
+    test_fpath = _check_path(fpath="")
+
+    # Specify the expected file path
+    exp_fpath = os.getcwd()
+
+    # Compare the two file paths
+    assert test_fpath == exp_fpath
 
-    except FileNotFoundError:
-        # If the automated extraction did not work, check that the .zip file was created
-        assert os.path.isfile("EFSLoadProfile_Reference_Slow.zip")
 
+def test_download_data():
+    try:
+        # Download a file using _download_data
+        _download_data(
+            zip_name="project_resstock_efs_2013.zip",
+            url="https://data.nrel.gov/system/files/128/project_resstock_efs_2013.zip",
+            fpath="",
+        )
+
+        # Check that the expected .zip file was downloaded
+        assert os.path.isfile("project_resstock_efs_2013.zip")
+
+    finally:
         # Remove the downloaded .zip file
-        os.remove("EFSLoadProfile_Reference_Slow.zip")
+        os.remove("project_resstock_efs_2013.zip")
 
 
-def test_download_flexibility_data():
-    # Download one of the EFS flexibility data sets
-    download_flexibility_data(es={"Reference"})
+def test_extract_data():
+    # Create a dummy demand data set
+    cont_states = sorted(set(abv2state) - {"AK", "HI"})
+    dummy_demand_data = {
+        "Electrification": ["High"] * 4 * 48 * 8760,
+        "TechnologyAdvancement": ["Rapid"] * 4 * 48 * 8760,
+        "Year": [2030] * 4 * 48 * 8760,
+        "LocalHourID": sorted(list(range(1, 8761)) * 4 * 48),
+        "State": sorted(list(cont_states) * 4) * 8760,
+        "Sector": ["Commercial", "Industrial", "Residential", "Transportation"]
+        * 48
+        * 8760,
+        "LoadMW": [1, 2, 3, 4] * 48 * 8760,
+    }
+    dummy_demand_df = pd.DataFrame(data=dummy_demand_data)
+    dummy_demand_df.to_csv("test_demand.csv", index=False)
+
+    # Create a .zip file of the dummy demand data set
+    with zipfile.ZipFile("test_demand.zip", "w") as z:
+        z.write("test_demand.csv")
+    os.remove("test_demand.csv")
 
     try:
-        # Load the downloaded EFS flexibility data set
-        df = pd.read_csv("EFSFlexLoadProfiles_Reference.csv")
+        # Try extracting the dummy .csv file from the dummy .zip file
+        _extract_data(
+            z=None,
+            zf_works=False,
+            zip_name="test_demand.zip",
+            csv_name="test_demand.csv",
+            fpath=os.getcwd(),
+            sz_path="C:/Program Files/7-Zip/7z.exe",
+        )
+
+        # Load the downloaded EFS demand data set
+        df = pd.read_csv("test_demand.csv")
 
         # Access the columns
         test_cols = list(df.columns)
         exp_cols = [
             "Electrification",
             "TechnologyAdvancement",
-            "Flexibility",
             "Year",
             "LocalHourID",
             "State",
@@ -72,92 +118,97 @@ def test_download_flexibility_data():
         # Compare the two values
         assert len(test_cols) == len(exp_cols)
 
-        # Remove the downloaded EFS data set
-        os.remove("EFSFlexLoadProfiles_Reference.csv")
-
     except FileNotFoundError:
         # If the automated extraction did not work, check that the .zip file was created
-        assert os.path.isfile(
-            "EFS Flexible Load Profiles - Reference Electrification.zip"
-        )
+        assert os.path.isfile("test_demand.zip")
 
         # Remove the downloaded .zip file
-        os.remove("EFS Flexible Load Profiles - Reference Electrification.zip")
+        os.remove("test_demand.zip")
+
+    finally:
+        # Remove the downloaded EFS data set
+        os.remove("test_demand.csv")
 
 
 def test_partition_demand_by_sector():
-    # Create a dummy data set
+    # Create a dummy demand data set
     cont_states = sorted(set(abv2state) - {"AK", "HI"})
-    dummy_data = {
+    dummy_demand_data = {
         "Electrification": ["High"] * 4 * 48 * 8760,
         "TechnologyAdvancement": ["Rapid"] * 4 * 48 * 8760,
         "Year": [2030] * 4 * 48 * 8760,
-        "LocalHourID": sorted([i for i in range(1, 8761)] * 4 * 48),
-        "State": sorted([i for i in cont_states] * 4) * 8760,
+        "LocalHourID": sorted(list(range(1, 8761)) * 4 * 48),
+        "State": sorted(list(cont_states) * 4) * 8760,
         "Sector": ["Commercial", "Industrial", "Residential", "Transportation"]
         * 48
         * 8760,
         "LoadMW": [1, 2, 3, 4] * 48 * 8760,
     }
-    dummy_df = pd.DataFrame(data=dummy_data)
-    dummy_df.to_csv("EFSLoadProfile_High_Rapid.csv", index=False)
+    dummy_demand_df = pd.DataFrame(data=dummy_demand_data)
+    dummy_demand_df.to_csv("EFSLoadProfile_High_Rapid.csv", index=False)
 
-    # Generate the test results
-    test_sect_dem = partition_demand_by_sector(
-        es="High", ta="Rapid", year=2030, save=False
-    )
+    try:
+        # Generate the test results
+        test_sect_dem = partition_demand_by_sector(
+            es="High", ta="Rapid", year=2030, save=False
+        )
 
-    # Create the expected results
-    exp_res_dem = pd.DataFrame(
-        3,
-        index=pd.date_range("2016-01-01", "2017-01-01", freq="H", closed="left"),
-        columns=cont_states,
-    )
-    exp_res_dem.index.name = "Local Time"
+        # Create the expected results
+        exp_res_dem = pd.DataFrame(
+            3,
+            index=pd.date_range("2016-01-01", "2017-01-01", freq="H", closed="left"),
+            columns=cont_states,
+        )
+        exp_res_dem.index.name = "Local Time"
 
-    # Compare the two DataFrames
-    assert_frame_equal(exp_res_dem, test_sect_dem["Residential"], check_names=False)
+        # Compare the two DataFrames
+        assert_frame_equal(exp_res_dem, test_sect_dem["Residential"], check_names=False)
 
-    # Delete the test .csv file
-    os.remove("EFSLoadProfile_High_Rapid.csv")
+    finally:
+        # Delete the test .csv file
+        os.remove("EFSLoadProfile_High_Rapid.csv")
 
 
 def test_partition_flexibility_by_sector():
-    # Create a dummy data set
+    # Create a dummy flexibility data set
     cont_states = sorted(set(abv2state) - {"AK", "HI"})
-    dummy_data = {
+    dummy_flex_data = {
         "Electrification": ["High"] * 4 * 48 * 8760,
         "TechnologyAdvancement": ["Rapid"] * 4 * 48 * 8760,
         "Flexibility": ["Base"] * 4 * 48 * 8760,
         "Year": [2030] * 4 * 48 * 8760,
-        "LocalHourID": sorted([i for i in range(1, 8761)] * 4 * 48),
-        "State": sorted([i for i in cont_states] * 4) * 8760,
+        "LocalHourID": sorted(list(range(1, 8761)) * 4 * 48),
+        "State": sorted(list(cont_states) * 4) * 8760,
         "Sector": ["Commercial", "Industrial", "Residential", "Transportation"]
         * 48
         * 8760,
         "LoadMW": [1, 2, 3, 4] * 48 * 8760,
     }
-    dummy_df = pd.DataFrame(data=dummy_data)
-    dummy_df.to_csv("EFSFlexLoadProfiles_High.csv", index=False)
+    dummy_flex_df = pd.DataFrame(data=dummy_flex_data)
+    dummy_flex_df.to_csv("EFSFlexLoadProfiles_High.csv", index=False)
 
-    # Generate the test results
-    test_sect_flex = partition_flexibility_by_sector(
-        es="High", ta="Rapid", flex="Base", year=2030, save=False
-    )
+    try:
+        # Generate the test results
+        test_sect_flex = partition_flexibility_by_sector(
+            es="High", ta="Rapid", flex="Base", year=2030, save=False
+        )
 
-    # Create the expected results
-    exp_res_flex = pd.DataFrame(
-        3,
-        index=pd.date_range("2016-01-01", "2017-01-01", freq="H", closed="left"),
-        columns=cont_states,
-    )
-    exp_res_flex.index.name = "Local Time"
+        # Create the expected results
+        exp_res_flex = pd.DataFrame(
+            3,
+            index=pd.date_range("2016-01-01", "2017-01-01", freq="H", closed="left"),
+            columns=cont_states,
+        )
+        exp_res_flex.index.name = "Local Time"
 
-    # Compare the two DataFrames
-    assert_frame_equal(exp_res_flex, test_sect_flex["Residential"], check_names=False)
+        # Compare the two DataFrames
+        assert_frame_equal(
+            exp_res_flex, test_sect_flex["Residential"], check_names=False
+        )
 
-    # Delete the test .csv file
-    os.remove("EFSFlexLoadProfiles_High.csv")
+    finally:
+        # Delete the test .csv file
+        os.remove("EFSFlexLoadProfiles_High.csv")
 
 
 def test_account_for_leap_year():