diff --git a/prereise/gather/demanddata/eia/clean_data.py b/prereise/gather/demanddata/eia/clean_data.py index c72dc4207..866ab1141 100644 --- a/prereise/gather/demanddata/eia/clean_data.py +++ b/prereise/gather/demanddata/eia/clean_data.py @@ -4,7 +4,7 @@ def fix_dataframe_outliers(demand): """Make a dataframe of demand with outliers replaced with values - interpolated from the non-outlier edge points using slope_interpolate + interpolated from the non-outlier edge points using slope_interpolate :param pandas.Dataframe demand: demand data frame with UTC time index and BA name as column name @@ -21,10 +21,10 @@ def fix_dataframe_outliers(demand): def slope_interpolate(ba_df): """Look for demand outliers by applying a z-score threshold to the demand - slope. Loop through all the outliers detected, determine the non-outlier - edge points and then interpolate a line joining these 2 edge points. The - line value at the timestamp of the the outlier event is used to replace - the anomalous value. + slope. Loop through all the outliers detected, determine the non-outlier + edge points and then interpolate a line joining these 2 edge points. The + line value at the timestamp of the the outlier event is used to replace + the anomalous value. :param pandas.DataFrame ba_df: demand data frame with UTC time as index and BA name as column name @@ -111,7 +111,7 @@ def slope_interpolate(ba_df): def replace_with_shifted_demand(demand, start, end): """Replaces missing data within overall demand dataframe with averages - of nearby shifted demand + of nearby shifted demand :param pandas.DataFrame demand: Dataframe with hourly demand where the columns are BA regions diff --git a/prereise/gather/demanddata/eia/get_eia_data.py b/prereise/gather/demanddata/eia/get_eia_data.py index 095132194..485bd7bba 100644 --- a/prereise/gather/demanddata/eia/get_eia_data.py +++ b/prereise/gather/demanddata/eia/get_eia_data.py @@ -8,8 +8,8 @@ def from_download(tok, start_date, end_date, offset_days, series_list): - """Downloads and assemble dataset of demand data per balancing authority \ - for desired date range. + """Downloads and assemble dataset of demand data per balancing authority + for desired date range. :param str tok: token obtained by registering with EIA. :param datetime.datetime start_date: start date. @@ -39,7 +39,7 @@ def from_download(tok, start_date, end_date, offset_days, series_list): def from_excel(directory, series_list, start_date, end_date): """Assembles EIA balancing authority (BA) data from pre-downloaded Excel - spreadsheets. The spreadsheets contain data from July 2015 to present. + spreadsheets. The spreadsheets contain data from July 2015 to present. :param str directory: location of Excel files. :param list series_list: list of BA initials, e.g., ['PSE',BPAT','CISO']. @@ -69,7 +69,7 @@ def from_excel(directory, series_list, start_date, end_date): def get_ba_demand(ba_code_list, start_date, end_date, api_key): """Downloads the demand between the start and end dates for a list of - balancing authorities + balancing authorities :param pandas.DataFrame ba_code_list: List of BAs to download from eia :param datetime.datetime start_date: beginning bound for the demand df @@ -87,7 +87,7 @@ def get_ba_demand(ba_code_list, start_date, end_date, api_key): class EIAgov(object): """Copied from `this link `_. + 2014/11/18/downloading-eias-data-with-python/>`_. :param str token: EIA token. :param list series: id code(s) of the series to be downloaded. diff --git a/prereise/gather/demanddata/eia/map_ba.py b/prereise/gather/demanddata/eia/map_ba.py index df152e5a5..003a37f54 100644 --- a/prereise/gather/demanddata/eia/map_ba.py +++ b/prereise/gather/demanddata/eia/map_ba.py @@ -4,7 +4,7 @@ def transform_ba_to_region(demand, mapping): """Transforms column of demand dataframe to regions defined by - dictionary mapping + dictionary mapping :param demand: dataframe for the demand :type demand: pandas.DataFrame @@ -35,7 +35,7 @@ def transform_ba_to_region(demand, mapping): def map_to_loadzone(agg_demand, bus_map): """Transforms columns of demand dataframe from BA regions to load zones - according to bus_map + according to bus_map :param agg_demand: dataframe for the aggregated region demand :type agg_demand: pandas.DataFrame @@ -71,7 +71,7 @@ def map_to_loadzone(agg_demand, bus_map): def map_grid_buses_to_county(grid): """Find the county in the U.S. territory that each load bus - in the query grid belongs to + in the query grid belongs to :param grid: the name of the query grid :type grid: Grid diff --git a/prereise/gather/hydrodata/eia/get_profile_by_us_monthly_factor.py b/prereise/gather/hydrodata/eia/get_profile_by_us_monthly_factor.py index c39a10488..a1f59d643 100644 --- a/prereise/gather/hydrodata/eia/get_profile_by_us_monthly_factor.py +++ b/prereise/gather/hydrodata/eia/get_profile_by_us_monthly_factor.py @@ -5,7 +5,7 @@ def get_profile(hydro_plant, start="2016-01-01-00", end="2016-12-31-23"): """Creates hydro profile from monthly capacity factors reported by EIA - `here `_. + `here `_. :param pandas.DataFrame hydro_plant: data frame with *'Pmax'* as column and *'plant_id'* as indices. diff --git a/prereise/gather/solardata/ga_wind/ga_wind.py b/prereise/gather/solardata/ga_wind/ga_wind.py index 50180fe3b..4462d7235 100644 --- a/prereise/gather/solardata/ga_wind/ga_wind.py +++ b/prereise/gather/solardata/ga_wind/ga_wind.py @@ -14,7 +14,7 @@ def retrieve_data( solar_plant, hs_api_key, start_date="2007-01-01", end_date="2014-01-01" ): """Retrieves irradiance data from Gridded Atmospheric Wind Integration - National dataset. + National dataset. :param pandas.DataFrame solar_plant: data frame with *'lat'*, *'lon'* and *'Pmax'* as columns and *'plant_id'* as indices. diff --git a/prereise/gather/solardata/helpers.py b/prereise/gather/solardata/helpers.py index 867f02619..3cff57ae9 100644 --- a/prereise/gather/solardata/helpers.py +++ b/prereise/gather/solardata/helpers.py @@ -34,7 +34,7 @@ def to_reise(data): def get_plant_info_unique_location(plant): """Identify unique location and return relevant information of plants at - location. + location. :param pandas.DataFrame plant: plant data frame. :return: (*dict*) -- keys are coordinates of location. Values is a list of diff --git a/prereise/gather/solardata/nsrdb/naive.py b/prereise/gather/solardata/nsrdb/naive.py index e7a1356fc..5607619b1 100644 --- a/prereise/gather/solardata/nsrdb/naive.py +++ b/prereise/gather/solardata/nsrdb/naive.py @@ -7,7 +7,7 @@ def retrieve_data(solar_plant, email, api_key, year="2016"): """Retrieve irradiance data from NSRDB and calculate the power output - using a simple normalization. + using a simple normalization. :param pandas.DataFrame solar_plant: data frame with *'lat'*, *'lon'* and *'Pmax' as columns and *'plant_id'* as index. @@ -22,35 +22,24 @@ def retrieve_data(solar_plant, email, api_key, year="2016"): # Identify unique location coord = get_plant_info_unique_location(solar_plant) - # Build query - attributes = "ghi" - leap_day = "true" - interval = "60" - utc = "true" - - # URL - url = "http://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?" - url = url + "api_key={key}".format(key=api_key) - - payload = ( - "names={year}".format(year=year) - + "&" - + "leap_day={leap}".format(leap=leap_day) - + "&" - + "interval={interval}".format(interval=interval) - + "&" - + "utc={utc}".format(utc=utc) - + "&" - + "email={email}".format(email=email) - + "&" - + "attributes={attr}".format(attr=attributes) - ) + base_url = "https://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv" + payload = { + "api_key": api_key, + "names": year, + "leap_day": "true", + "interval": "60", + "utc": "true", + "email": email, + "attributes": "ghi", + } + qs = "&".join([f"{key}={value}" for key, value in payload.items()]) + url = f"{base_url}?{qs}" data = pd.DataFrame({"Pout": [], "plant_id": [], "ts": [], "ts_id": []}) for key in tqdm(coord.keys(), total=len(coord)): query = "wkt=POINT({lon}%20{lat})".format(lon=key[0], lat=key[1]) - data_loc = pd.read_csv(url + "&" + payload + "&" + query, skiprows=2) + data_loc = pd.read_csv(f"{url}&{query}", skiprows=2) ghi = data_loc.GHI.values data_loc = pd.DataFrame({"Pout": ghi}) data_loc["Pout"] /= max(ghi) diff --git a/prereise/gather/solardata/nsrdb/sam.py b/prereise/gather/solardata/nsrdb/sam.py index be0a01b70..936cb6534 100644 --- a/prereise/gather/solardata/nsrdb/sam.py +++ b/prereise/gather/solardata/nsrdb/sam.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd from powersimdata.network.usa_tamu.constants.zones import ( - id2state, + id2abv, interconnect2state, state2interconnect, ) @@ -19,7 +19,7 @@ def retrieve_data(solar_plant, email, api_key, ssc_lib, year="2016"): """Retrieves irradiance data from NSRDB and calculate the power output using - the System Adviser Model (SAM). + the System Adviser Model (SAM). :param pandas.DataFrame solar_plant: data frame with *'lat'*, *'lon'* and *'Pmax' as columns and *'plant_id'* as index. @@ -47,28 +47,18 @@ def retrieve_data(solar_plant, email, api_key, ssc_lib, year="2016"): # Identify unique location coord = get_plant_info_unique_location(solar_plant) - # Build query - attributes = "dhi,dni,wind_speed,air_temperature" - interval = "60" - utc = "true" - - # URL - url = "http://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?" - url = url + "api_key={key}".format(key=api_key) - - payload = ( - "names={year}".format(year=year) - + "&" - + "leap_day={leap}".format(leap="false") - + "&" - + "interval={interval}".format(interval=interval) - + "&" - + "utc={utc}".format(utc=utc) - + "&" - + "email={email}".format(email=email) - + "&" - + "attributes={attr}".format(attr=attributes) - ) + base_url = "https://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv" + payload = { + "api_key": api_key, + "names": year, + "leap_day": "false", + "interval": "60", + "utc": "true", + "email": email, + "attributes": "dhi,dni,wind_speed,air_temperature", + } + qs = "&".join([f"{key}={value}" for key, value in payload.items()]) + url = f"{base_url}?{qs}" data = pd.DataFrame({"Pout": [], "plant_id": [], "ts": [], "ts_id": []}) @@ -79,7 +69,7 @@ def retrieve_data(solar_plant, email, api_key, ssc_lib, year="2016"): zone_id = solar_plant.zone_id.unique() frac = {} for i in zone_id: - state = id2state[i] + state = id2abv[i] frac[i] = get_pv_tracking_ratio_state(pv_info, [state]) if frac[i] is None: frac[i] = get_pv_tracking_ratio_state( @@ -91,11 +81,12 @@ def retrieve_data(solar_plant, email, api_key, ssc_lib, year="2016"): for key in tqdm(coord.keys(), total=len(coord)): query = "wkt=POINT({lon}%20{lat})".format(lon=key[0], lat=key[1]) + current_url = f"{url}&{query}" - info = pd.read_csv(url + "&" + payload + "&" + query, nrows=1) + info = pd.read_csv(current_url, nrows=1) tz, elevation = info["Local Time Zone"], info["Elevation"] - data_resource = pd.read_csv(url + "&" + payload + "&" + query, skiprows=2) + data_resource = pd.read_csv(current_url, skiprows=2) data_resource.set_index( dates + timedelta(hours=int(tz.values[0])), inplace=True ) diff --git a/prereise/gather/solardata/pv_tracking.py b/prereise/gather/solardata/pv_tracking.py index bcdc927c3..f25254ea2 100644 --- a/prereise/gather/solardata/pv_tracking.py +++ b/prereise/gather/solardata/pv_tracking.py @@ -1,7 +1,7 @@ import os import pandas as pd -from powersimdata.network.usa_tamu.constants.zones import id2state +from powersimdata.network.usa_tamu.constants.zones import abv def get_pv_tracking_data(): @@ -32,8 +32,7 @@ def get_pv_tracking_data(): def get_pv_tracking_ratio_state(pv_info, state): - """Get solar PV tracking technology ratios for the query state in 2016 from - EIA860 + """Get solar PV tracking technology ratios for the query state in 2016 from EIA860 :param pandas.DataFrame pv_info: solar pv plant information as found in form EIA860 as returned by :func:`get_pv_tracking_data`. @@ -47,7 +46,7 @@ def get_pv_tracking_ratio_state(pv_info, state): raise TypeError("state must be a list") for s in state: - if s not in set(id2state.values()): + if s not in abv: raise ValueError("Invalid State: %s" % s) pv_info_state = pv_info[pv_info["State"].isin(state)].copy() diff --git a/setup.py b/setup.py index 05a9f60ae..5b9b7dea3 100644 --- a/setup.py +++ b/setup.py @@ -4,9 +4,9 @@ name="prereise", version="0.3", description="Create and run an energy scenario", - url="https://github.com/intvenlab/PreREISE", + url="https://github.com/Breakthrough-Energy/PreREISE", author="Kaspar Mueller", - author_email="kmueller@intven.com", + author_email="kaspar.mueller@breakthroughenergy.org", packages=find_packages(), package_data={ "prereise": [ @@ -15,6 +15,7 @@ "gather/data/EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx", "gather/hydrodata/data/hydro_gmt.csv", "gather/hydrodata/data/texas_hydro_gmt.csv", + "gather/solardata/data/3_3_Solar_Y2016.csv", ] }, zip_safe=False,