Skip to content

Commit

Permalink
feat: add wetbulb temp and darkness fraction functions to era5_data_a… (
Browse files Browse the repository at this point in the history
Breakthrough-Energy#29)

* feat: add wetbulb temp and darkness fraction functions to era5_data_agg.py, now renamed weather_data_agg.py

* chore: code style cleanup

* chore: import sorting

* chore: linting fixes

Co-authored-by: Daniel Olsen <[email protected]>
  • Loading branch information
evanpatz and danielolsen authored Jan 19, 2022
1 parent 3bf669a commit 552dadc
Show file tree
Hide file tree
Showing 3 changed files with 215 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import os

import geopandas as gpd
import pandas as pd
import numpy as np
import pandas as pd

from prereise.gather.demanddata.bldg_electrification import const

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,16 @@
import cdsapi
import numpy as np
import pandas as pd
import psychrolib
import xarray as xr
from dateutil import tz
from scipy.spatial import cKDTree
from suntime import Sun

from prereise.gather.demanddata.bldg_electrification import const

psychrolib.SetUnitSystem(psychrolib.SI)

variable_names = {
"temp": {"era5": "2m_temperature", "nc": "t2m"},
"dewpt": {"era5": "2m_dewpoint_temperature", "nc": "d2m"},
Expand Down Expand Up @@ -46,7 +51,7 @@ def era5_download(years, directory, variable="temp"):
if not hasattr(years, "__iter__"):
years = [years]

# Error if 'years' includes any years prior to 1950
# Error if "years" includes any years prior to 1950
if min(years) < 1950:
raise ValueError("Input years must be 1950 or later")

Expand All @@ -63,7 +68,7 @@ def era5_download(years, directory, variable="temp"):
"cdsapi is not configured properly, see https://cds.climate.copernicus.eu/api-how-to"
)

# Create folder to store data for given variable if it doesn't yet exist
# Create folder to store data for given variable if it doesn"t yet exist
os.makedirs(os.path.join(directory, variable), exist_ok=True)

for year in years:
Expand Down Expand Up @@ -102,7 +107,7 @@ def create_era5_pumas(
:param pandas.Series tract_puma_mapping: tract to puma mapping.
:param pandas.Series tract_pop: population, indexed by tract.
:param pandas.DataFrame tract_lat_lon: data frame, indexed by tract, with columns
'state', 'lat', and 'lon'.
"state", "lat", and "lon".
:param str directory: path to root directory for ERA5 downloads (not including variable name)
:param str: variable to produce
temp {Default} -- dry bulb temperataure, corresponds to ERA5 variable "2m_temperature"
Expand Down Expand Up @@ -151,7 +156,7 @@ def lon_lat_to_cartesian(lon, lat):
else:
print("Confirmed: All required ERA5 input files present")

# Create folder to store data for given variable if it doesn't yet exist
# Create folder to store data for given variable if it doesn"t yet exist
os.makedirs(os.path.join(directory, "pumas", f"{variable}s"), exist_ok=True)

# Combine tract-level data into single data frame
Expand Down Expand Up @@ -214,3 +219,196 @@ def lon_lat_to_cartesian(lon, lat):
),
index=False,
)


def dark_fractions(puma, puma_data, year):
"""Compute annual time series of fraction of each hour that is dark for a given puma
:param str puma: puma name
:param pandas.DataFrame puma_data: puma data for lat, long, and timezone
:param int year: year of desired dark fractions
:return: (*list*) -- hourly dark fractions for the year
"""

latitude = puma_data.loc[puma, "latitude"]
longitude = puma_data.loc[puma, "longitude"]

puma_timezone = puma_data.loc[puma, "timezone"]
hours_local = pd.date_range(
start=f"{year}-01-01", end=f"{year+1}-01-01", freq="H", tz="UTC"
)[:-1].tz_convert(puma_timezone)

sun = Sun(latitude, longitude)

sunrise = pd.Series(hours_local).apply(
lambda x: sun.get_local_sunrise_time(x, local_time_zone=tz.gettz(puma_timezone))
)
sunset = pd.Series(hours_local).apply(
lambda x: sun.get_local_sunset_time(x, local_time_zone=tz.gettz(puma_timezone))
)

sun_df = pd.DataFrame(
{"sunrise": sunrise, "sunset": sunset, "hour_local": hours_local.hour}
)

sun_df["sunrise_hour"] = sun_df["sunrise"].apply(lambda x: x.hour)
sun_df["sunset_hour"] = sun_df["sunset"].apply(lambda x: x.hour)

sun_df["sunrise_dark_frac"] = sun_df["sunrise"].apply(lambda x: x.minute / 60)
sun_df["sunset_dark_frac"] = sun_df["sunset"].apply(lambda x: 1 - x.minute / 60)

def dark_hour(local, sunrise, sunset, sunrise_frac, sunset_frac):
"""Return fraction of hour that is dark. 0 if completely light, 1 if completely dark
:param pandas.DateTime local: local hour
:param pandas.DateTime sunrise: local sunrise time
:param pandas.DateTime sunset: local sunset time
:param float sunrise_frac: fraction of the day's sunrise hour that is dark
:param float sunset_frac: fraction of the day's sunset hour that is dark
:return: (*float*) -- hour darkness fraction
"""
if local == sunrise:
return sunrise_frac
elif local == sunset:
return sunset_frac
elif local > sunrise and local < sunset:
return 0
else:
return 1

sun_df["dark_hour_frac"] = sun_df.apply(
lambda x: dark_hour(
x["hour_local"],
x["sunrise_hour"],
x["sunset_hour"],
x["sunrise_dark_frac"],
x["sunset_dark_frac"],
),
axis=1,
)

return sun_df["dark_hour_frac"]


def generate_dark_fracs(year, state_list):
"""Generate puma level hourly time series of darkness fractions for all pumas within a state
:param int year: year of desired dark fractions
:param list state_list: states for csv's to be generated
:export: (*csv*) -- statewide hourly dark fractions for every puma
"""

puma_data = pd.read_csv("data/puma_data.csv", index_col="puma")

for state in state_list:
puma_data_it = puma_data[puma_data["state"] == state]
puma_dark_frac = pd.DataFrame(columns=list(puma_data_it.index))

for i in list(puma_dark_frac.columns):
puma_dark_frac[i] = dark_fractions(i, puma_data, year)

puma_dark_frac.to_csv(
f"https://besciences.blob.core.windows.net/datasets/bldg_el/pumas/dark_frac/dark_frac_pumas_{state}_{year}.csv",
index=False,
)


def t_to_twb(temp_values, dwpt_values, press_values):
"""Compute wetbulb temperature from drybulb, dewpoint, and pressure
:param list temp_values: drybulb temperatures, C
:param list dwpt_values: dewpoint temperatures, C
:param list press_values: pressures, Pa
:return: (*list*) -- wetbulb temperatures
"""
return [
psychrolib.GetTWetBulbFromTDewPoint(
temp_values[i], min(temp_values[i], dwpt_values[i]), press_values[i]
)
for i in range(len(dwpt_values))
]


def generate_wetbulb_temps(year, state_list):
"""Generate puma level hourly time series of wetbulb temperatures for all pumas within a state
:param int year: year of desired dark fractions
:param list state_list: states for csv's to be generated
:export: (*csv*) -- statewide hourly wetbulb temperatures for every puma
"""

for state in state_list:
temps = pd.read_csv(
f"https://besciences.blob.core.windows.net/datasets/bldg_el/pumas/temps/temps_pumas_{state}_{year}.csv"
)
dwpts = pd.read_csv(
f"https://besciences.blob.core.windows.net/datasets/bldg_el/pumas/dewpoints/dewpts_pumas_{state}_{year}.csv"
)
press = pd.read_csv(
f"https://besciences.blob.core.windows.net/datasets/bldg_el/pumas/press/press_pumas_{state}_{year}.csv"
)

temps_wetbulb = temps.apply(lambda x: t_to_twb(x, dwpts[x.name], press[x.name]))

temps_wetbulb.to_csv(
f"https://besciences.blob.core.windows.net/datasets/bldg_el/pumas/temps_wetbulb/temps_wetbulb_pumas_{state}_{year}.csv",
index=False,
)


state_list = [
"AL",
"AZ",
"AR",
"CA",
"CO",
"CT",
"DE",
"DC",
"FL",
"GA",
"ID",
"IL",
"IN",
"IA",
"KS",
"KY",
"LA",
"ME",
"MD",
"MA",
"MI",
"MN",
"MS",
"MO",
"MT",
"NE",
"NV",
"NH",
"NJ",
"NM",
"NY",
"NC",
"ND",
"OH",
"OK",
"OR",
"PA",
"RI",
"SC",
"SD",
"TN",
"TX",
"UT",
"VT",
"VA",
"WA",
"WV",
"WI",
"WY",
]
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar # noqa: N813
from scipy.stats import linregress
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt


def bkpt_scale(df, num_points, bkpt, heat_cool):
Expand Down Expand Up @@ -235,15 +235,15 @@ def make_hourly_series(load_temp_df, i):
load_temp_hr = load_temp_df[
(load_temp_df["hour_local"] == i)
& (load_temp_df["weekday"] < 5)
& (load_temp_df["holiday"] == False)
& ~load_temp_df["holiday"] # boolean column
].reset_index()
numpoints = daily_points * 5
elif wk_wknd == "wknd":
load_temp_hr = load_temp_df[
(load_temp_df["hour_local"] == i)
& (
(load_temp_df["weekday"] >= 5)
| (load_temp_df["holiday"] == True)
| load_temp_df["holiday"] # boolean column
)
].reset_index()
numpoints = daily_points * 2
Expand Down Expand Up @@ -328,14 +328,14 @@ def make_hourly_series(load_temp_df, i):
wk_graph = load_temp_df[
(load_temp_df["hour_local"] == i)
& (load_temp_df["weekday"] < 5)
& (load_temp_df["holiday"] == False)
& ~load_temp_df["holiday"] # boolean column
]
else:
wk_graph = load_temp_df[
(load_temp_df["hour_local"] == i)
& (
(load_temp_df["weekday"] >= 5)
| (load_temp_df["holiday"] == True)
| load_temp_df["holiday"] # boolean column
)
]

Expand Down Expand Up @@ -482,7 +482,7 @@ def temp_to_energy(temp_series, hourly_fits_df, db_wb_fit):

wk_wknd = (
"wk"
if temp_series["weekday"] < 5 and temp_series["holiday"] == False
if temp_series["weekday"] < 5 and ~temp_series["holiday"] # boolean value
else "wknd"
)

Expand Down Expand Up @@ -600,7 +600,9 @@ def main(zone_name, zone_name_shp, load_year, year):
hourly_fits_df, db_wb_fit = hourly_load_fit(temp_df_load_year)
hourly_fits_df.to_csv(f"dayhour_fits/{zone_name}_dayhour_fits_{load_year}.csv")

zone_profile_load_MWh = pd.DataFrame({"hour_utc": list(range(len(hours_utc)))})
zone_profile_load_MWh = pd.DataFrame( # noqa: N806
{"hour_utc": list(range(len(hours_utc)))}
)

temp_df, stats = zonal_data(puma_data_zone, hours_utc)

Expand All @@ -618,7 +620,7 @@ def main(zone_name, zone_name_shp, load_year, year):
energy_list.apply(lambda x: x[2]),
energy_list.apply(lambda x: sum(x)),
)
zone_profile_load_MWh = zone_profile_load_MWh.set_index("hour_utc")
zone_profile_load_MWh.set_index("hour_utc", inplace=True)
zone_profile_load_MWh.to_csv(f"Profiles/{zone_name}_profile_load_mw_{year}.csv")

(
Expand Down

0 comments on commit 552dadc

Please sign in to comment.