Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: make NREL EFS download tests faster #148

Merged
merged 1 commit into from
Feb 17, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
217 changes: 134 additions & 83 deletions prereise/gather/demanddata/nrel_efs/tests/test_get_efs_data.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,113 @@
import os
import zipfile

import pandas as pd
from pandas.testing import assert_frame_equal
from powersimdata.network.usa_tamu.constants.zones import abv2state

from prereise.gather.demanddata.nrel_efs.get_efs_data import (
_check_electrification_scenarios_for_download,
_check_path,
_check_technology_advancements_for_download,
_download_data,
_extract_data,
account_for_leap_year,
download_demand_data,
download_flexibility_data,
partition_demand_by_sector,
partition_flexibility_by_sector,
)


def test_download_demand_data():
# Download one of the EFS demand data sets
download_demand_data(es={"Reference"}, ta={"Slow"})
def test_check_electrification_scenarios_for_download():
# Run the check
test_es = _check_electrification_scenarios_for_download(es={"All"})

try:
# Load the downloaded EFS demand data set
df = pd.read_csv("EFSLoadProfile_Reference_Slow.csv")
# Specify the expected values
exp_es = {"Reference", "Medium", "High"}

# Access the columns
test_cols = list(df.columns)
exp_cols = [
"Electrification",
"TechnologyAdvancement",
"Year",
"LocalHourID",
"State",
"Sector",
"Subsector",
"LoadMW",
]
# Compare the two sets
assert test_es == exp_es

# Compare the two values
assert len(test_cols) == len(exp_cols)

# Remove the downloaded EFS data set
os.remove("EFSLoadProfile_Reference_Slow.csv")
def test_check_technology_advancements_for_download():
# Run the check
test_ta = _check_technology_advancements_for_download(ta={"All"})

# Specify the expected values
exp_ta = {"Slow", "Moderate", "Rapid"}

# Compare the two sets
assert test_ta == exp_ta


def test_check_path():
# Run check
test_fpath = _check_path(fpath="")

# Specify the expected file path
exp_fpath = os.getcwd()

# Compare the two file paths
assert test_fpath == exp_fpath

except FileNotFoundError:
# If the automated extraction did not work, check that the .zip file was created
assert os.path.isfile("EFSLoadProfile_Reference_Slow.zip")

def test_download_data():
try:
# Download a file using _download_data
_download_data(
zip_name="project_resstock_efs_2013.zip",
url="https://data.nrel.gov/system/files/128/project_resstock_efs_2013.zip",
fpath="",
)

# Check that the expected .zip file was downloaded
assert os.path.isfile("project_resstock_efs_2013.zip")

finally:
# Remove the downloaded .zip file
os.remove("EFSLoadProfile_Reference_Slow.zip")
os.remove("project_resstock_efs_2013.zip")


def test_download_flexibility_data():
# Download one of the EFS flexibility data sets
download_flexibility_data(es={"Reference"})
def test_extract_data():
# Create a dummy demand data set
cont_states = sorted(set(abv2state) - {"AK", "HI"})
dummy_demand_data = {
"Electrification": ["High"] * 4 * 48 * 8760,
"TechnologyAdvancement": ["Rapid"] * 4 * 48 * 8760,
"Year": [2030] * 4 * 48 * 8760,
"LocalHourID": sorted(list(range(1, 8761)) * 4 * 48),
"State": sorted(list(cont_states) * 4) * 8760,
"Sector": ["Commercial", "Industrial", "Residential", "Transportation"]
* 48
* 8760,
"LoadMW": [1, 2, 3, 4] * 48 * 8760,
}
dummy_demand_df = pd.DataFrame(data=dummy_demand_data)
dummy_demand_df.to_csv("test_demand.csv", index=False)

# Create a .zip file of the dummy demand data set
with zipfile.ZipFile("test_demand.zip", "w") as z:
z.write("test_demand.csv")
os.remove("test_demand.csv")

try:
# Load the downloaded EFS flexibility data set
df = pd.read_csv("EFSFlexLoadProfiles_Reference.csv")
# Try extracting the dummy .csv file from the dummy .zip file
_extract_data(
z=None,
zf_works=False,
zip_name="test_demand.zip",
csv_name="test_demand.csv",
fpath=os.getcwd(),
sz_path="C:/Program Files/7-Zip/7z.exe",
)

# Load the downloaded EFS demand data set
df = pd.read_csv("test_demand.csv")

# Access the columns
test_cols = list(df.columns)
exp_cols = [
"Electrification",
"TechnologyAdvancement",
"Flexibility",
"Year",
"LocalHourID",
"State",
Expand All @@ -72,92 +118,97 @@ def test_download_flexibility_data():
# Compare the two values
assert len(test_cols) == len(exp_cols)

# Remove the downloaded EFS data set
os.remove("EFSFlexLoadProfiles_Reference.csv")

except FileNotFoundError:
# If the automated extraction did not work, check that the .zip file was created
assert os.path.isfile(
"EFS Flexible Load Profiles - Reference Electrification.zip"
)
assert os.path.isfile("test_demand.zip")

# Remove the downloaded .zip file
os.remove("EFS Flexible Load Profiles - Reference Electrification.zip")
os.remove("test_demand.zip")

finally:
# Remove the downloaded EFS data set
os.remove("test_demand.csv")


def test_partition_demand_by_sector():
# Create a dummy data set
# Create a dummy demand data set
cont_states = sorted(set(abv2state) - {"AK", "HI"})
dummy_data = {
dummy_demand_data = {
"Electrification": ["High"] * 4 * 48 * 8760,
"TechnologyAdvancement": ["Rapid"] * 4 * 48 * 8760,
"Year": [2030] * 4 * 48 * 8760,
"LocalHourID": sorted([i for i in range(1, 8761)] * 4 * 48),
"State": sorted([i for i in cont_states] * 4) * 8760,
"LocalHourID": sorted(list(range(1, 8761)) * 4 * 48),
"State": sorted(list(cont_states) * 4) * 8760,
"Sector": ["Commercial", "Industrial", "Residential", "Transportation"]
* 48
* 8760,
"LoadMW": [1, 2, 3, 4] * 48 * 8760,
}
dummy_df = pd.DataFrame(data=dummy_data)
dummy_df.to_csv("EFSLoadProfile_High_Rapid.csv", index=False)
dummy_demand_df = pd.DataFrame(data=dummy_demand_data)
dummy_demand_df.to_csv("EFSLoadProfile_High_Rapid.csv", index=False)

# Generate the test results
test_sect_dem = partition_demand_by_sector(
es="High", ta="Rapid", year=2030, save=False
)
try:
# Generate the test results
test_sect_dem = partition_demand_by_sector(
es="High", ta="Rapid", year=2030, save=False
)

# Create the expected results
exp_res_dem = pd.DataFrame(
3,
index=pd.date_range("2016-01-01", "2017-01-01", freq="H", closed="left"),
columns=cont_states,
)
exp_res_dem.index.name = "Local Time"
# Create the expected results
exp_res_dem = pd.DataFrame(
3,
index=pd.date_range("2016-01-01", "2017-01-01", freq="H", closed="left"),
columns=cont_states,
)
exp_res_dem.index.name = "Local Time"

# Compare the two DataFrames
assert_frame_equal(exp_res_dem, test_sect_dem["Residential"], check_names=False)
# Compare the two DataFrames
assert_frame_equal(exp_res_dem, test_sect_dem["Residential"], check_names=False)

# Delete the test .csv file
os.remove("EFSLoadProfile_High_Rapid.csv")
finally:
# Delete the test .csv file
os.remove("EFSLoadProfile_High_Rapid.csv")


def test_partition_flexibility_by_sector():
# Create a dummy data set
# Create a dummy flexibility data set
cont_states = sorted(set(abv2state) - {"AK", "HI"})
dummy_data = {
dummy_flex_data = {
"Electrification": ["High"] * 4 * 48 * 8760,
"TechnologyAdvancement": ["Rapid"] * 4 * 48 * 8760,
"Flexibility": ["Base"] * 4 * 48 * 8760,
"Year": [2030] * 4 * 48 * 8760,
"LocalHourID": sorted([i for i in range(1, 8761)] * 4 * 48),
"State": sorted([i for i in cont_states] * 4) * 8760,
"LocalHourID": sorted(list(range(1, 8761)) * 4 * 48),
"State": sorted(list(cont_states) * 4) * 8760,
"Sector": ["Commercial", "Industrial", "Residential", "Transportation"]
* 48
* 8760,
"LoadMW": [1, 2, 3, 4] * 48 * 8760,
}
dummy_df = pd.DataFrame(data=dummy_data)
dummy_df.to_csv("EFSFlexLoadProfiles_High.csv", index=False)
dummy_flex_df = pd.DataFrame(data=dummy_flex_data)
dummy_flex_df.to_csv("EFSFlexLoadProfiles_High.csv", index=False)

# Generate the test results
test_sect_flex = partition_flexibility_by_sector(
es="High", ta="Rapid", flex="Base", year=2030, save=False
)
try:
# Generate the test results
test_sect_flex = partition_flexibility_by_sector(
es="High", ta="Rapid", flex="Base", year=2030, save=False
)

# Create the expected results
exp_res_flex = pd.DataFrame(
3,
index=pd.date_range("2016-01-01", "2017-01-01", freq="H", closed="left"),
columns=cont_states,
)
exp_res_flex.index.name = "Local Time"
# Create the expected results
exp_res_flex = pd.DataFrame(
3,
index=pd.date_range("2016-01-01", "2017-01-01", freq="H", closed="left"),
columns=cont_states,
)
exp_res_flex.index.name = "Local Time"

# Compare the two DataFrames
assert_frame_equal(exp_res_flex, test_sect_flex["Residential"], check_names=False)
# Compare the two DataFrames
assert_frame_equal(
exp_res_flex, test_sect_flex["Residential"], check_names=False
)

# Delete the test .csv file
os.remove("EFSFlexLoadProfiles_High.csv")
finally:
# Delete the test .csv file
os.remove("EFSFlexLoadProfiles_High.csv")


def test_account_for_leap_year():
Expand Down