From ee0ab73197d52cc701eb8796b534c728b2c69a99 Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Mon, 20 Sep 2021 10:21:53 -0700 Subject: [PATCH 1/5] chore: add EIA information to const.py --- prereise/gather/griddata/hifld/const.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/prereise/gather/griddata/hifld/const.py b/prereise/gather/griddata/hifld/const.py index 370d7d7b2..d482cf721 100644 --- a/prereise/gather/griddata/hifld/const.py +++ b/prereise/gather/griddata/hifld/const.py @@ -105,7 +105,8 @@ } blob_paths = { - "eia_form860_2019": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/3_1_Generator_Y2019_Operable.csv", + "eia_form860_2019_generator": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/3_1_Generator_Y2019_Operable.csv", + "eia_form860_2019_plant": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/2___Plant_Y2019.csv", "epa_ampd": "https://besciences.blob.core.windows.net/datasets/EPA_AMPD/", "epa_needs": "https://besciences.blob.core.windows.net/datasets/EPA_NEEDS/needs-v620_06-30-21-2_active.csv", "substations": "https://besciences.blob.core.windows.net/datasets/hifld/Electric_Substations_Jul2020.csv", @@ -182,3 +183,19 @@ (500, 765): 0.00436, } transformer_rating = 800 # MVA + +eia_storage_gen_types = { + "Batteries", + "Flywheels", +} + +nercregion2interconnect = { + "ASCC": "Alaska", # Not currently used + "HICC": "Hawaii", # Not currently used + "MRO": "Eastern", + "NPCC": "Eastern", + "RFC": "Eastern", + "SERC": "Eastern", + "TRE": "ERCOT", + "WECC": "Western", +} From 330bad0a3adf153b086662b433ea78890075dc5a Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Thu, 29 Jul 2021 11:46:53 -0700 Subject: [PATCH 2/5] chore: add states/county interconnect assumptions to const.py --- prereise/gather/griddata/hifld/const.py | 123 ++++++++++++++++++++++++ 1 file changed, 123 insertions(+) diff --git a/prereise/gather/griddata/hifld/const.py b/prereise/gather/griddata/hifld/const.py index d482cf721..6176d71b8 100644 --- a/prereise/gather/griddata/hifld/const.py +++ b/prereise/gather/griddata/hifld/const.py @@ -199,3 +199,126 @@ "TRE": "ERCOT", "WECC": "Western", } + +interconnect2state = { + "Eastern": { + "AL", + "AR", + "CT", + "DE", + "FL", + "GA", + "IA", + "IL", + "IN", + "KS", + "KY", + "LA", + "MA", + "MD", + "ME", + "MI", + "MN", + "MO", + "MS", + "NC", + "ND", + "NE", + "NH", + "NJ", + "NY", + "OH", + "OK", + "PA", + "RI", + "SC", + "TN", + "VA", + "VT", + "WI", + "WV", + }, + "ignore": {"AK", "HI"}, + "split": {"MT", "SD", "TX"}, + "Western": {"AZ", "CA", "CO", "ID", "NM", "NV", "OR", "UT", "WA", "WY"}, +} + +state_county_splits = { + "MT": { + "default": "Western", + "Eastern": { + "CARTER", + "CUSTER", + "DANIELS", + "DAWSON", + "FALLON", + "GARFIELD", + "MCCONE", + "PHILLIPS", + "POWDER RIVER", + "PRAIRIE", + "RICHLAND", + "ROOSEVELT", + "ROSEBUD", + "SHERIDAN", + "VALLEY", + "WIBAUX", + }, + }, + "NM": { + "default": "Western", + "Eastern": {"CURRY", "LEA", "QUAY", "ROOSEVELT", "UNION"}, + }, + "SD": {"default": "Eastern", "Western": {"BUTTE", "FALL RIVER", "LAWRENCE"}}, + "TX": { + "default": "ERCOT", + "Eastern": { + "BAILEY", + "BOWIE", + "CAMP", + "CASS", + "COCHRAN", + "DALLAM", + "DONLEY", + "GAINES", + "GREGG", + "HALE", + "HANSFORD", + "HARDIN", + "HARRISON", + "HARTLEY", + "HEMPHILL", + "HOCKLEY", + "HUTCHINSON", + "JASPER", + "JEFFERSON", + "LAMB", + "LIBERTY", + "LIPSCOMB", + "LUBBOCK", + "LYNN", + "MARION", + "MOORE", + "MORRIS", + "NEWTON", + "OCHLTREE", + "ORANGE", + "PANOLA", + "PARMER", + "POLK", + "RANDALL", + "SABINE", + "SAN AUGUSTINE", + "SAN JACINTO", + "SHELBY", + "SHERMAN", + "TERRY", + "TRINITY", + "TYLER", + "UPSHUR", + "WALKER", + "YOAKUM", + }, + "Western": {"EL PASO", "HUDSPETH"}, + }, +} From 19c9a9ba85a11acb07f271e85cf7cb4a68478aba Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Mon, 13 Sep 2021 11:42:09 -0700 Subject: [PATCH 3/5] feat: add helper function to infer interconnections --- .../griddata/hifld/data_process/helpers.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 prereise/gather/griddata/hifld/data_process/helpers.py diff --git a/prereise/gather/griddata/hifld/data_process/helpers.py b/prereise/gather/griddata/hifld/data_process/helpers.py new file mode 100644 index 000000000..a94dfb9b7 --- /dev/null +++ b/prereise/gather/griddata/hifld/data_process/helpers.py @@ -0,0 +1,21 @@ +from prereise.gather.griddata.hifld import const + + +def map_state_and_county_to_interconnect(state_abv, county): + """Map a state and a county to an assumed interconnection. + + :param str state_abv: two-letter state abbreviation. + :param str county: county name. + :raises ValueError: if the provided state abbreviation isn't present in mappings. + :return: (*str*) -- interconnection name. + """ + state_upper = state_abv.upper() + for region in ("Eastern", "Western"): + if state_upper in const.interconnect2state[region]: + return region + if state_upper in const.interconnect2state["split"]: + for region in set(const.state_county_splits[state_upper].keys()) - {"default"}: + if county.upper() in const.state_county_splits[state_upper][region]: + return region + return const.state_county_splits[state_upper]["default"] + raise ValueError(f"Got an unexpected state: {state_abv}") From 25d6b706e3b97ad1fd5bfa50727cba35c7781391 Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Mon, 13 Sep 2021 11:44:38 -0700 Subject: [PATCH 4/5] feat: add interconnect information to substations --- .../gather/griddata/hifld/data_process/transmission.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/prereise/gather/griddata/hifld/data_process/transmission.py b/prereise/gather/griddata/hifld/data_process/transmission.py index 4d5748762..e4ccc175d 100644 --- a/prereise/gather/griddata/hifld/data_process/transmission.py +++ b/prereise/gather/griddata/hifld/data_process/transmission.py @@ -15,6 +15,9 @@ get_hifld_electric_substations, get_zone, ) +from prereise.gather.griddata.hifld.data_process.helpers import ( + map_state_and_county_to_interconnect, +) from prereise.gather.griddata.hifld.data_process.topology import ( connect_islands_with_minimum_cost, ) @@ -623,4 +626,9 @@ def build_transmission(method="sub2line", kwargs={"rounding": 3}): # Add voltages to lines with missing data augment_line_voltages(lines, substations) + # Add additional information to substations + substations["interconnect"] = substations.apply( + lambda x: map_state_and_county_to_interconnect(x.STATE, x.COUNTY), axis=1 + ) + return lines, substations From 431c4bb0e9b261778695d7207126461332b7ea52 Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Mon, 20 Sep 2021 11:21:50 -0700 Subject: [PATCH 5/5] feat: add main build_plant function and lower-level mapping functions --- .../griddata/hifld/data_process/generators.py | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 prereise/gather/griddata/hifld/data_process/generators.py diff --git a/prereise/gather/griddata/hifld/data_process/generators.py b/prereise/gather/griddata/hifld/data_process/generators.py new file mode 100644 index 000000000..2ec1d225e --- /dev/null +++ b/prereise/gather/griddata/hifld/data_process/generators.py @@ -0,0 +1,136 @@ +import pandas as pd +from powersimdata.utility.distance import haversine + +from prereise.gather.griddata.hifld import const +from prereise.gather.griddata.hifld.data_access import load + + +def floatify(value, default=float("nan")): + """Return a float if possible, otherwise return a default value. + + :param object value: value to be coerced to float, if possible. + :param object default: value to be returned if float conversion isn't possible. + :return: (*float/object*) -- float or default value as applicable. + """ + try: + if isinstance(value, str): + value = value.replace(",", "") + return float(value) + except Exception: + return default + + +def map_generator_to_sub_by_location(generator, substation_groupby): + """Determine a likely substation for a generator to be connected to. Priority order + of mapping is: 1) if location is available and one or more substations exist in that + ZIP code, map by location to closest substation within that ZIP code, 2) if location + is available but no substations exist in that ZIP code, map to the closest + substation within neighboring ZIP codes, 3) if only ZIP code is available + (no location), and one or more substations exist, map to an arbitrarily chosen + substation within that ZIP code, 4) if only ZIP code is available (no location) + but no substations exist in that ZIP code, return NA. + + :param pandas.Series generator: one generating unit from data frame. + :param pandas.GroupBy substation_groupby: data frame of substations, grouped by + (interconnect, ZIP). + :return: (*int/pd.NA*) -- substation ID if the generator can be mapped successfully + to a substation, else pd.NA. + """ + lookup_params = tuple(generator.loc[["interconnect", "ZIP"]]) + if pd.isna(generator["lat"]) or pd.isna(generator["lon"]): + # No location available + try: + matching_subs = substation_groupby.get_group(lookup_params) + return matching_subs.index[0] + except KeyError: + return pd.NA + try: + # This ZIP code contains substations, this block will execute successfully + matching_subs = substation_groupby.get_group(lookup_params) + except KeyError: + # If this ZIP code does not contain substations, this block will execute, and + # we select a set of 'nearby' substations + zip_range = [int(generator.loc["ZIP"]) + offset for offset in range(-100, 101)] + zip_range_strings = [str(z).rjust(5, "0") for z in zip_range] + try: + matching_subs = pd.concat( + [ + substation_groupby.get_group((generator.loc["interconnect"], z)) + for z in zip_range_strings + if (generator.loc["interconnect"], z) in substation_groupby.groups + ] + ) + except ValueError: + # If no matching subs within the given interconnection and ZIPs, give up + return pd.NA + distance_to_subs = matching_subs.apply( + lambda x: haversine((x.LATITUDE, x.LONGITUDE), (generator.lat, generator.lon)), + axis=1, + ) + return distance_to_subs.idxmin() + + +def map_generator_to_bus_by_sub(generator, bus_groupby): + """Determine a likely bus for a generator to be connected to, based on the bus with + the lowest voltage within the generator's specified substation. + + :param pandas.Series generator: one generating unit from data frame. + :param pandas.GroupBy bus_groupby: data frame of buses, grouped by substation ID. + :return: (*int/pd.NA*) -- bus ID if the generator has a substation ID, else pd.NA. + """ + if pd.isna(generator.sub_id): + return pd.NA + else: + return bus_groupby.get_group(generator.sub_id)["baseKV"].idxmin() + + +def build_plant(bus, substations): + """Use source data on generating units from EIA/EPA, along with transmission network + data, to produce a plant data frame. + + :param pandas.DataFrame bus: data frame of buses, to be used within + :func:`map_generator_to_bus_by_sub`. + :param pandas.DataFrame substations: data frame of substations. + :return: (*pandas.DataFrame*) -- data frame of generator data. + """ + # Initial loading + generators = load.get_eia_form_860(const.blob_paths["eia_form860_2019_generator"]) + plants = load.get_eia_form_860(const.blob_paths["eia_form860_2019_plant"]) + + # Data interpretation + plants = plants.set_index("Plant Code") + plants["Latitude"] = plants["Latitude"].map(floatify) + plants["Longitude"] = plants["Longitude"].map(floatify) + for col in ["Summer Capacity (MW)", "Winter Capacity (MW)", "Minimum Load (MW)"]: + generators[col] = generators[col].map(floatify) + + # Filtering / Grouping + generators = generators.query( + "Technology not in @const.eia_storage_gen_types" + ).copy() + bus_groupby = bus.groupby(bus["sub_id"].astype(int)) + # Filter substations with no buses + substations = substations.loc[set(bus_groupby.groups.keys())] + substation_groupby = substations.groupby(["interconnect", "ZIP"]) + + # Add information + generators["interconnect"] = ( + generators["Plant Code"] + .map(plants["NERC Region"]) + .map(const.nercregion2interconnect) + ) + generators["lat"] = generators["Plant Code"].map(plants["Latitude"]) + generators["lon"] = generators["Plant Code"].map(plants["Longitude"]) + generators["ZIP"] = generators["Plant Code"].map(plants["Zip"]) + generators["sub_id"] = generators.apply( + lambda x: map_generator_to_sub_by_location(x, substation_groupby), axis=1 + ) + generators["bus_id"] = generators.apply( + lambda x: map_generator_to_bus_by_sub(x, bus_groupby), axis=1 + ) + generators["Pmax"] = generators[ + ["Summer Capacity (MW)", "Winter Capacity (MW)"] + ].max(axis=1) + generators.rename({"Minimum Load (MW)": "Pmin"}, inplace=True, axis=1) + + return generators