From d3afe5b247f28074cab3277d6dfa3e3ef6e92ec7 Mon Sep 17 00:00:00 2001 From: Lane Smith Date: Tue, 16 Feb 2021 16:17:17 -0800 Subject: [PATCH] test: make NREL EFS download tests faster --- .../nrel_efs/tests/test_get_efs_data.py | 217 +++++++++++------- 1 file changed, 134 insertions(+), 83 deletions(-) diff --git a/prereise/gather/demanddata/nrel_efs/tests/test_get_efs_data.py b/prereise/gather/demanddata/nrel_efs/tests/test_get_efs_data.py index ae277236d..36540064b 100644 --- a/prereise/gather/demanddata/nrel_efs/tests/test_get_efs_data.py +++ b/prereise/gather/demanddata/nrel_efs/tests/test_get_efs_data.py @@ -1,67 +1,113 @@ import os +import zipfile import pandas as pd from pandas.testing import assert_frame_equal from powersimdata.network.usa_tamu.constants.zones import abv2state from prereise.gather.demanddata.nrel_efs.get_efs_data import ( + _check_electrification_scenarios_for_download, + _check_path, + _check_technology_advancements_for_download, + _download_data, + _extract_data, account_for_leap_year, - download_demand_data, - download_flexibility_data, partition_demand_by_sector, partition_flexibility_by_sector, ) -def test_download_demand_data(): - # Download one of the EFS demand data sets - download_demand_data(es={"Reference"}, ta={"Slow"}) +def test_check_electrification_scenarios_for_download(): + # Run the check + test_es = _check_electrification_scenarios_for_download(es={"All"}) - try: - # Load the downloaded EFS demand data set - df = pd.read_csv("EFSLoadProfile_Reference_Slow.csv") + # Specify the expected values + exp_es = {"Reference", "Medium", "High"} - # Access the columns - test_cols = list(df.columns) - exp_cols = [ - "Electrification", - "TechnologyAdvancement", - "Year", - "LocalHourID", - "State", - "Sector", - "Subsector", - "LoadMW", - ] + # Compare the two sets + assert test_es == exp_es - # Compare the two values - assert len(test_cols) == len(exp_cols) - # Remove the downloaded EFS data set - os.remove("EFSLoadProfile_Reference_Slow.csv") +def test_check_technology_advancements_for_download(): + # Run the check + test_ta = _check_technology_advancements_for_download(ta={"All"}) + + # Specify the expected values + exp_ta = {"Slow", "Moderate", "Rapid"} + + # Compare the two sets + assert test_ta == exp_ta + + +def test_check_path(): + # Run check + test_fpath = _check_path(fpath="") + + # Specify the expected file path + exp_fpath = os.getcwd() + + # Compare the two file paths + assert test_fpath == exp_fpath - except FileNotFoundError: - # If the automated extraction did not work, check that the .zip file was created - assert os.path.isfile("EFSLoadProfile_Reference_Slow.zip") +def test_download_data(): + try: + # Download a file using _download_data + _download_data( + zip_name="project_resstock_efs_2013.zip", + url="https://data.nrel.gov/system/files/128/project_resstock_efs_2013.zip", + fpath="", + ) + + # Check that the expected .zip file was downloaded + assert os.path.isfile("project_resstock_efs_2013.zip") + + finally: # Remove the downloaded .zip file - os.remove("EFSLoadProfile_Reference_Slow.zip") + os.remove("project_resstock_efs_2013.zip") -def test_download_flexibility_data(): - # Download one of the EFS flexibility data sets - download_flexibility_data(es={"Reference"}) +def test_extract_data(): + # Create a dummy demand data set + cont_states = sorted(set(abv2state) - {"AK", "HI"}) + dummy_demand_data = { + "Electrification": ["High"] * 4 * 48 * 8760, + "TechnologyAdvancement": ["Rapid"] * 4 * 48 * 8760, + "Year": [2030] * 4 * 48 * 8760, + "LocalHourID": sorted(list(range(1, 8761)) * 4 * 48), + "State": sorted(list(cont_states) * 4) * 8760, + "Sector": ["Commercial", "Industrial", "Residential", "Transportation"] + * 48 + * 8760, + "LoadMW": [1, 2, 3, 4] * 48 * 8760, + } + dummy_demand_df = pd.DataFrame(data=dummy_demand_data) + dummy_demand_df.to_csv("test_demand.csv", index=False) + + # Create a .zip file of the dummy demand data set + with zipfile.ZipFile("test_demand.zip", "w") as z: + z.write("test_demand.csv") + os.remove("test_demand.csv") try: - # Load the downloaded EFS flexibility data set - df = pd.read_csv("EFSFlexLoadProfiles_Reference.csv") + # Try extracting the dummy .csv file from the dummy .zip file + _extract_data( + z=None, + zf_works=False, + zip_name="test_demand.zip", + csv_name="test_demand.csv", + fpath=os.getcwd(), + sz_path="C:/Program Files/7-Zip/7z.exe", + ) + + # Load the downloaded EFS demand data set + df = pd.read_csv("test_demand.csv") # Access the columns test_cols = list(df.columns) exp_cols = [ "Electrification", "TechnologyAdvancement", - "Flexibility", "Year", "LocalHourID", "State", @@ -72,92 +118,97 @@ def test_download_flexibility_data(): # Compare the two values assert len(test_cols) == len(exp_cols) - # Remove the downloaded EFS data set - os.remove("EFSFlexLoadProfiles_Reference.csv") - except FileNotFoundError: # If the automated extraction did not work, check that the .zip file was created - assert os.path.isfile( - "EFS Flexible Load Profiles - Reference Electrification.zip" - ) + assert os.path.isfile("test_demand.zip") # Remove the downloaded .zip file - os.remove("EFS Flexible Load Profiles - Reference Electrification.zip") + os.remove("test_demand.zip") + + finally: + # Remove the downloaded EFS data set + os.remove("test_demand.csv") def test_partition_demand_by_sector(): - # Create a dummy data set + # Create a dummy demand data set cont_states = sorted(set(abv2state) - {"AK", "HI"}) - dummy_data = { + dummy_demand_data = { "Electrification": ["High"] * 4 * 48 * 8760, "TechnologyAdvancement": ["Rapid"] * 4 * 48 * 8760, "Year": [2030] * 4 * 48 * 8760, - "LocalHourID": sorted([i for i in range(1, 8761)] * 4 * 48), - "State": sorted([i for i in cont_states] * 4) * 8760, + "LocalHourID": sorted(list(range(1, 8761)) * 4 * 48), + "State": sorted(list(cont_states) * 4) * 8760, "Sector": ["Commercial", "Industrial", "Residential", "Transportation"] * 48 * 8760, "LoadMW": [1, 2, 3, 4] * 48 * 8760, } - dummy_df = pd.DataFrame(data=dummy_data) - dummy_df.to_csv("EFSLoadProfile_High_Rapid.csv", index=False) + dummy_demand_df = pd.DataFrame(data=dummy_demand_data) + dummy_demand_df.to_csv("EFSLoadProfile_High_Rapid.csv", index=False) - # Generate the test results - test_sect_dem = partition_demand_by_sector( - es="High", ta="Rapid", year=2030, save=False - ) + try: + # Generate the test results + test_sect_dem = partition_demand_by_sector( + es="High", ta="Rapid", year=2030, save=False + ) - # Create the expected results - exp_res_dem = pd.DataFrame( - 3, - index=pd.date_range("2016-01-01", "2017-01-01", freq="H", closed="left"), - columns=cont_states, - ) - exp_res_dem.index.name = "Local Time" + # Create the expected results + exp_res_dem = pd.DataFrame( + 3, + index=pd.date_range("2016-01-01", "2017-01-01", freq="H", closed="left"), + columns=cont_states, + ) + exp_res_dem.index.name = "Local Time" - # Compare the two DataFrames - assert_frame_equal(exp_res_dem, test_sect_dem["Residential"], check_names=False) + # Compare the two DataFrames + assert_frame_equal(exp_res_dem, test_sect_dem["Residential"], check_names=False) - # Delete the test .csv file - os.remove("EFSLoadProfile_High_Rapid.csv") + finally: + # Delete the test .csv file + os.remove("EFSLoadProfile_High_Rapid.csv") def test_partition_flexibility_by_sector(): - # Create a dummy data set + # Create a dummy flexibility data set cont_states = sorted(set(abv2state) - {"AK", "HI"}) - dummy_data = { + dummy_flex_data = { "Electrification": ["High"] * 4 * 48 * 8760, "TechnologyAdvancement": ["Rapid"] * 4 * 48 * 8760, "Flexibility": ["Base"] * 4 * 48 * 8760, "Year": [2030] * 4 * 48 * 8760, - "LocalHourID": sorted([i for i in range(1, 8761)] * 4 * 48), - "State": sorted([i for i in cont_states] * 4) * 8760, + "LocalHourID": sorted(list(range(1, 8761)) * 4 * 48), + "State": sorted(list(cont_states) * 4) * 8760, "Sector": ["Commercial", "Industrial", "Residential", "Transportation"] * 48 * 8760, "LoadMW": [1, 2, 3, 4] * 48 * 8760, } - dummy_df = pd.DataFrame(data=dummy_data) - dummy_df.to_csv("EFSFlexLoadProfiles_High.csv", index=False) + dummy_flex_df = pd.DataFrame(data=dummy_flex_data) + dummy_flex_df.to_csv("EFSFlexLoadProfiles_High.csv", index=False) - # Generate the test results - test_sect_flex = partition_flexibility_by_sector( - es="High", ta="Rapid", flex="Base", year=2030, save=False - ) + try: + # Generate the test results + test_sect_flex = partition_flexibility_by_sector( + es="High", ta="Rapid", flex="Base", year=2030, save=False + ) - # Create the expected results - exp_res_flex = pd.DataFrame( - 3, - index=pd.date_range("2016-01-01", "2017-01-01", freq="H", closed="left"), - columns=cont_states, - ) - exp_res_flex.index.name = "Local Time" + # Create the expected results + exp_res_flex = pd.DataFrame( + 3, + index=pd.date_range("2016-01-01", "2017-01-01", freq="H", closed="left"), + columns=cont_states, + ) + exp_res_flex.index.name = "Local Time" - # Compare the two DataFrames - assert_frame_equal(exp_res_flex, test_sect_flex["Residential"], check_names=False) + # Compare the two DataFrames + assert_frame_equal( + exp_res_flex, test_sect_flex["Residential"], check_names=False + ) - # Delete the test .csv file - os.remove("EFSFlexLoadProfiles_High.csv") + finally: + # Delete the test .csv file + os.remove("EFSFlexLoadProfiles_High.csv") def test_account_for_leap_year():