Skip to content

Commit

Permalink
Merge pull request #235 from Breakthrough-Energy/daniel/hifld_bus_demand
Browse files Browse the repository at this point in the history
feat: add function to assign demand to buses proportional to population
  • Loading branch information
danielolsen committed Jan 8, 2022
2 parents 2154424 + b7115ab commit a71599e
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 0 deletions.
32 changes: 32 additions & 0 deletions ATTRIBUTION.md
Original file line number Diff line number Diff line change
Expand Up @@ -447,4 +447,36 @@ The data are used to populate the power generation profile of U.S. transmission
##### Note
Public use (see https://hifld-geoplatform.opendata.arcgis.com/datasets/geoplatform::power-plants/about). Users are advised to read the data set's metadata thoroughly to understand appropriate use and data limitations.


##### Source
* Name: U.S. Zips
* Author: Pareto Software, LLC
* Description: Data on United States ZIP codes
* Source: https://simplemaps.com
* Exact source location: https://simplemaps.com/data/us-zips
* Note: version 1.78, accessed 2021-10-27.

##### Destination
* Modifications to source file(s): None
* Location: https://besciences.blob.core.windows.net/datasets/geo_data/uszips.csv

##### General Purpose
The data are used to estimate population served by each substation, to distribute demand.


##### Source
* Name: U.S. Counties
* Author: Pareto Software, LLC
* Description: Data on United States counties
* Source: https://simplemaps.com
* Exact source location: https://simplemaps.com/data/us-counties
* Note: version 1.71, accessed 2021-10-27.

##### Destination
* Modifications to source file(s): None
* Location: https://besciences.blob.core.windows.net/datasets/geo_data/uscounties.csv

##### General Purpose
The data are used to estimate population served by each substation, to distribute demand.

---
6 changes: 6 additions & 0 deletions prereise/gather/griddata/hifld/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@
"epa_needs": "https://besciences.blob.core.windows.net/datasets/EPA_NEEDS/needs-v620_06-30-21-2_active.csv",
"substations": "https://besciences.blob.core.windows.net/datasets/hifld/Electric_Substations_Jul2020.csv",
"transmission_lines": "https://besciences.blob.core.windows.net/datasets/hifld/Electric_Power_Transmission_Lines_Jul2020.geojson.zip",
"us_counties": "https://besciences.blob.core.windows.net/datasets/geo_data/uscounties.csv",
"us_zips": "https://besciences.blob.core.windows.net/datasets/geo_data/uszips.csv",
}
eia_epa_crosswalk_path = "https://raw.githubusercontent.com/Breakthrough-Energy/camd-eia-crosswalk/master/epa_eia_crosswalk.csv"

Expand Down Expand Up @@ -386,6 +388,7 @@
"Solar Thermal without Energy Storage": 0,
}


# These lines were manually identified based on a combination of: their 'TYPE'
# classification, their substation names, and their geographical paths. The capacities
# for each line were compiled from a variety of public sources.
Expand All @@ -402,3 +405,6 @@
310053: 400, # Trans-Bay Cable
311958: 5, # Alamogordo Solar Energy Center
}

substation_load_share = 0.5
demand_per_person = 2.01e-3
18 changes: 18 additions & 0 deletions prereise/gather/griddata/hifld/data_access/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,3 +231,21 @@ def get_zone(path):
:return: (*pandas.DataFrame*) -- information related to load zone
"""
return pd.read_csv(path, index_col="zone_id")


def get_us_counties(path):
"""Read the file containing county data.
:param str path: path to file. Either local or URL.
:return: (*pandas.DataFrame*) -- information related to counties
"""
return pd.read_csv(path).set_index("county_fips")


def get_us_zips(path):
"""Read the file containing ZIP code data.
:param str path: path to file. Either local or URL.
:return: (*pandas.DataFrame*) -- information related to ZIP codes
"""
return pd.read_csv(path, dtype={"zip": "string"}).set_index("zip")
76 changes: 76 additions & 0 deletions prereise/gather/griddata/hifld/data_process/demand.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import pandas as pd

from prereise.gather.griddata.hifld import const
from prereise.gather.griddata.hifld.data_access.load import get_us_counties, get_us_zips


def assign_demand_to_buses(substations, branch, plant, bus):
"""Using data on population by county and ZIP code, assign demand to substations,
then to the lowest-voltage bus within each substation.
This demand parameter is added inplace as a 'Pd' column to the ``bus`` data frame.
:param pandas.DataFrame substations: table of substation data.
:param pandas.DataFrame branch: table of branch data.
:param pandas.DataFrame plant: table of plant data.
:param pandas.DataFrame bus: table of bus data.
"""
# Load data
zip_data = get_us_zips(const.blob_paths["us_zips"])
county_data = get_us_counties(const.blob_paths["us_counties"])

# Determine each substation's transmission capacity, then sort for selection
filtered_branch = branch.query("SUB_1_ID != SUB_2_ID")
from_cap = filtered_branch.groupby("SUB_1_ID").sum()["rateA"]
to_cap = filtered_branch.groupby("SUB_2_ID").sum()["rateA"]
sub_cap = from_cap.combine(to_cap, lambda x, y: x + y, fill_value=0)
# Sort substations by their capacities for later ordered selection
sorted_subs = substations.loc[sub_cap.sort_values(ascending=False).index].copy()

# Determine for each ZIP, how much demand to assign to each load substation
# Assume here that generator substations don't have load attached to them
filtered_subs = sorted_subs.loc[~sorted_subs.index.isin(plant["sub_id"])]
subs_per_zip = filtered_subs.value_counts("ZIP")
zip_load_substations = subs_per_zip * const.substation_load_share
zip_load_substations = zip_load_substations.round().clip(lower=1)
zip_assigned_population = (zip_data["population"] / zip_load_substations).dropna()
# Select the N substations per ZIP with greatest transmission capacity
load_substations = pd.concat(
df.head(int(zip_load_substations[name]))
for name, df in filtered_subs.groupby("ZIP")
)
substations["pop_ZIP"] = load_substations["ZIP"].map(zip_assigned_population)

# Assign remaining county population to substations with load already,
# plus the most connected substation in any county without a load substation.
load_subs_from_zips = substations.query("pop_ZIP > 0")
load_subs_per_county = load_subs_from_zips.value_counts("COUNTYFIPS")
county_pop = county_data["population"]

# Select the one substation per missing county with greatest transmission capacity
counties_without_load_subs = set(county_pop.index) - set(load_subs_per_county.index)
subs_in_counties_without_load_subs = sorted_subs.loc[
sorted_subs["COUNTYFIPS"].isin(counties_without_load_subs)
]
added_load_subs = pd.concat(
df.head(1)
for name, df in subs_in_counties_without_load_subs.groupby("COUNTYFIPS")
)
load_subs = pd.concat([load_subs_from_zips, added_load_subs])
load_subs_per_county = load_subs_per_county.reindex(county_pop.index).fillna(1)

# Distribute population remaining after ZIP distribution to identified load buses
distributed_pop = load_subs.groupby("COUNTYFIPS")["pop_ZIP"].sum()
remaining_pop = county_pop - distributed_pop.reindex(county_pop.index).fillna(0)
remaining_pop_per_sub = remaining_pop.clip(lower=0) / load_subs_per_county
# We may still miss some population, since there may be a county without any
# substations, but we should cover the vast majority.
substations["pop_county"] = load_subs["COUNTYFIPS"].map(remaining_pop_per_sub)

# Translate population to demand
total_pop = substations["pop_ZIP"].fillna(0) + substations["pop_county"].fillna(0)
sub_demand = total_pop * const.demand_per_person

load_buses = pd.concat(
df.head(1) for sub_id, df in bus.sort_values("baseKV").groupby("sub_id")
)
bus["Pd"] = load_buses["sub_id"].map(sub_demand).reindex(bus.index).fillna(0)

0 comments on commit a71599e

Please sign in to comment.