From 186adbda7d86f1c8ef477c311e736102b10f4ffc Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Thu, 28 Oct 2021 16:47:45 -0700 Subject: [PATCH 1/3] chore: add demand-related constants and population data paths to const.py --- ATTRIBUTION.md | 32 +++++++++++++++++++++++++ prereise/gather/griddata/hifld/const.py | 6 +++++ 2 files changed, 38 insertions(+) diff --git a/ATTRIBUTION.md b/ATTRIBUTION.md index ecadf61af..557cc6b20 100644 --- a/ATTRIBUTION.md +++ b/ATTRIBUTION.md @@ -447,4 +447,36 @@ The data are used to populate the power generation profile of U.S. transmission ##### Note Public use (see https://hifld-geoplatform.opendata.arcgis.com/datasets/geoplatform::power-plants/about). Users are advised to read the data set's metadata thoroughly to understand appropriate use and data limitations. + +##### Source +* Name: U.S. Zips +* Author: Pareto Software, LLC +* Description: Data on United States ZIP codes +* Source: https://simplemaps.com +* Exact source location: https://simplemaps.com/data/us-zips +* Note: version 1.78, accessed 2021-10-27. + +##### Destination +* Modifications to source file(s): None +* Location: https://besciences.blob.core.windows.net/datasets/geo_data/uszips.csv + +##### General Purpose +The data are used to estimate population served by each substation, to distribute demand. + + +##### Source +* Name: U.S. Counties +* Author: Pareto Software, LLC +* Description: Data on United States counties +* Source: https://simplemaps.com +* Exact source location: https://simplemaps.com/data/us-counties +* Note: version 1.71, accessed 2021-10-27. + +##### Destination +* Modifications to source file(s): None +* Location: https://besciences.blob.core.windows.net/datasets/geo_data/uscounties.csv + +##### General Purpose +The data are used to estimate population served by each substation, to distribute demand. + --- diff --git a/prereise/gather/griddata/hifld/const.py b/prereise/gather/griddata/hifld/const.py index f2012551f..b5a89ba38 100644 --- a/prereise/gather/griddata/hifld/const.py +++ b/prereise/gather/griddata/hifld/const.py @@ -111,6 +111,8 @@ "epa_needs": "https://besciences.blob.core.windows.net/datasets/EPA_NEEDS/needs-v620_06-30-21-2_active.csv", "substations": "https://besciences.blob.core.windows.net/datasets/hifld/Electric_Substations_Jul2020.csv", "transmission_lines": "https://besciences.blob.core.windows.net/datasets/hifld/Electric_Power_Transmission_Lines_Jul2020.geojson.zip", + "us_counties": "https://besciences.blob.core.windows.net/datasets/geo_data/uscounties.csv", + "us_zips": "https://besciences.blob.core.windows.net/datasets/geo_data/uszips.csv", } eia_epa_crosswalk_path = "https://raw.githubusercontent.com/Breakthrough-Energy/camd-eia-crosswalk/master/epa_eia_crosswalk.csv" @@ -386,6 +388,7 @@ "Solar Thermal without Energy Storage": 0, } + # These lines were manually identified based on a combination of: their 'TYPE' # classification, their substation names, and their geographical paths. The capacities # for each line were compiled from a variety of public sources. @@ -402,3 +405,6 @@ 310053: 400, # Trans-Bay Cable 311958: 5, # Alamogordo Solar Energy Center } + +substation_load_share = 0.5 +demand_per_person = 2.01e-3 From 2c79b0028bf070ee977ea51c5b2195652829c552 Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Tue, 2 Nov 2021 10:52:18 -0700 Subject: [PATCH 2/3] feat: add functions to load county and ZIP code data --- .../gather/griddata/hifld/data_access/load.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/prereise/gather/griddata/hifld/data_access/load.py b/prereise/gather/griddata/hifld/data_access/load.py index f18c12a95..feb4dedd8 100644 --- a/prereise/gather/griddata/hifld/data_access/load.py +++ b/prereise/gather/griddata/hifld/data_access/load.py @@ -231,3 +231,21 @@ def get_zone(path): :return: (*pandas.DataFrame*) -- information related to load zone """ return pd.read_csv(path, index_col="zone_id") + + +def get_us_counties(path): + """Read the file containing county data. + + :param str path: path to file. Either local or URL. + :return: (*pandas.DataFrame*) -- information related to counties + """ + return pd.read_csv(path).set_index("county_fips") + + +def get_us_zips(path): + """Read the file containing ZIP code data. + + :param str path: path to file. Either local or URL. + :return: (*pandas.DataFrame*) -- information related to ZIP codes + """ + return pd.read_csv(path, dtype={"zip": "string"}).set_index("zip") From b7115abb676e2d9263defa4d6f712bf3d29f252f Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Thu, 28 Oct 2021 16:54:03 -0700 Subject: [PATCH 3/3] feat: add function to distribute demand to buses by population --- .../griddata/hifld/data_process/demand.py | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 prereise/gather/griddata/hifld/data_process/demand.py diff --git a/prereise/gather/griddata/hifld/data_process/demand.py b/prereise/gather/griddata/hifld/data_process/demand.py new file mode 100644 index 000000000..408eeef34 --- /dev/null +++ b/prereise/gather/griddata/hifld/data_process/demand.py @@ -0,0 +1,76 @@ +import pandas as pd + +from prereise.gather.griddata.hifld import const +from prereise.gather.griddata.hifld.data_access.load import get_us_counties, get_us_zips + + +def assign_demand_to_buses(substations, branch, plant, bus): + """Using data on population by county and ZIP code, assign demand to substations, + then to the lowest-voltage bus within each substation. + This demand parameter is added inplace as a 'Pd' column to the ``bus`` data frame. + + :param pandas.DataFrame substations: table of substation data. + :param pandas.DataFrame branch: table of branch data. + :param pandas.DataFrame plant: table of plant data. + :param pandas.DataFrame bus: table of bus data. + """ + # Load data + zip_data = get_us_zips(const.blob_paths["us_zips"]) + county_data = get_us_counties(const.blob_paths["us_counties"]) + + # Determine each substation's transmission capacity, then sort for selection + filtered_branch = branch.query("SUB_1_ID != SUB_2_ID") + from_cap = filtered_branch.groupby("SUB_1_ID").sum()["rateA"] + to_cap = filtered_branch.groupby("SUB_2_ID").sum()["rateA"] + sub_cap = from_cap.combine(to_cap, lambda x, y: x + y, fill_value=0) + # Sort substations by their capacities for later ordered selection + sorted_subs = substations.loc[sub_cap.sort_values(ascending=False).index].copy() + + # Determine for each ZIP, how much demand to assign to each load substation + # Assume here that generator substations don't have load attached to them + filtered_subs = sorted_subs.loc[~sorted_subs.index.isin(plant["sub_id"])] + subs_per_zip = filtered_subs.value_counts("ZIP") + zip_load_substations = subs_per_zip * const.substation_load_share + zip_load_substations = zip_load_substations.round().clip(lower=1) + zip_assigned_population = (zip_data["population"] / zip_load_substations).dropna() + # Select the N substations per ZIP with greatest transmission capacity + load_substations = pd.concat( + df.head(int(zip_load_substations[name])) + for name, df in filtered_subs.groupby("ZIP") + ) + substations["pop_ZIP"] = load_substations["ZIP"].map(zip_assigned_population) + + # Assign remaining county population to substations with load already, + # plus the most connected substation in any county without a load substation. + load_subs_from_zips = substations.query("pop_ZIP > 0") + load_subs_per_county = load_subs_from_zips.value_counts("COUNTYFIPS") + county_pop = county_data["population"] + + # Select the one substation per missing county with greatest transmission capacity + counties_without_load_subs = set(county_pop.index) - set(load_subs_per_county.index) + subs_in_counties_without_load_subs = sorted_subs.loc[ + sorted_subs["COUNTYFIPS"].isin(counties_without_load_subs) + ] + added_load_subs = pd.concat( + df.head(1) + for name, df in subs_in_counties_without_load_subs.groupby("COUNTYFIPS") + ) + load_subs = pd.concat([load_subs_from_zips, added_load_subs]) + load_subs_per_county = load_subs_per_county.reindex(county_pop.index).fillna(1) + + # Distribute population remaining after ZIP distribution to identified load buses + distributed_pop = load_subs.groupby("COUNTYFIPS")["pop_ZIP"].sum() + remaining_pop = county_pop - distributed_pop.reindex(county_pop.index).fillna(0) + remaining_pop_per_sub = remaining_pop.clip(lower=0) / load_subs_per_county + # We may still miss some population, since there may be a county without any + # substations, but we should cover the vast majority. + substations["pop_county"] = load_subs["COUNTYFIPS"].map(remaining_pop_per_sub) + + # Translate population to demand + total_pop = substations["pop_ZIP"].fillna(0) + substations["pop_county"].fillna(0) + sub_demand = total_pop * const.demand_per_person + + load_buses = pd.concat( + df.head(1) for sub_id, df in bus.sort_values("baseKV").groupby("sub_id") + ) + bus["Pd"] = load_buses["sub_id"].map(sub_demand).reindex(bus.index).fillna(0)