-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #199 from Breakthrough-Energy/daniel/hifld_transmi…
…ssion feat: add functions to perform transmission line filtering
- Loading branch information
Showing
3 changed files
with
178 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
173 changes: 173 additions & 0 deletions
173
prereise/gather/griddata/hifld/data_process/transmission.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
import os | ||
|
||
from powersimdata.utility.distance import haversine | ||
|
||
from prereise.gather.griddata.hifld import const | ||
from prereise.gather.griddata.hifld.data_access.load import ( | ||
get_hifld_electric_power_transmission_lines, | ||
get_hifld_electric_substations, | ||
get_zone, | ||
) | ||
|
||
|
||
def check_for_location_conflicts(substations): | ||
"""Check for multiple substations with identical lat/lon. | ||
:param pandas.DataFrame substations: data frame of substations. | ||
:raises ValueError: if multiple substations with identical lat/lon. | ||
""" | ||
num_substations = len(substations) | ||
num_lat_lon_groups = len(substations.groupby(["LATITUDE", "LONGITUDE"])) | ||
if num_lat_lon_groups != num_substations: | ||
num_collisions = num_substations - num_lat_lon_groups | ||
raise ValueError( | ||
f"There are {num_collisions} substations with duplicate lat/lon values" | ||
) | ||
|
||
|
||
def filter_substations_with_zero_lines(substations): | ||
"""Filter substations with LINES attribute equal to zero, and report the number | ||
dropped. | ||
:param pandas.DataFrame substations: data frame of all substations. | ||
:return: (*pandas.DataFrame*) -- substations with non-zero values for LINES. | ||
""" | ||
num_substations = len(substations) | ||
num_substations_without_lines = len(substations.query("LINES == 0")) | ||
print( | ||
f"dropping {num_substations_without_lines} substations " | ||
f"of {num_substations} total due to LINES parameter equal to 0" | ||
) | ||
|
||
return substations.query("LINES != 0").copy() | ||
|
||
|
||
def filter_lines_with_unavailable_substations(lines): | ||
"""Filter lines with SUB_1 or SUB_2 attribute equal to 'NOT AVAILABLE', and report | ||
the number dropped. | ||
:param pandas.DataFrame lines: data frame of all lines. | ||
:return: (*pandas.DataFrame*) -- lines with available substations. | ||
""" | ||
num_lines = len(lines) | ||
filtered = lines.query("SUB_1 == 'NOT AVAILABLE' or SUB_2 == 'NOT AVAILABLE'") | ||
num_filtered = len(filtered) | ||
print( | ||
f"dropping {num_filtered} lines with one or more substations listed as " | ||
f"'NOT AVAILABLE' out of a starting total of {num_lines}" | ||
) | ||
return lines.query("SUB_1 != 'NOT AVAILABLE' and SUB_2 != 'NOT AVAILABLE'").copy() | ||
|
||
|
||
def filter_lines_with_no_matching_substations(lines, substations): | ||
"""Filter lines with one or more substation name not present in the ``substations`` | ||
data frame, and report the number dropped. | ||
:param pandas.DataFrame lines: data frame of lines. | ||
:param pandas.DataFrame substations: data frame of substations. | ||
:return: (*pandas.DataFrame*) -- lines with matching substations. | ||
""" | ||
num_lines = len(lines) | ||
matching_names = substations["NAME"] # noqa: F841 | ||
filtered = lines.query( | ||
"SUB_1 not in @matching_names or SUB_2 not in @matching_names" | ||
) | ||
num_filtered = len(filtered) | ||
print( | ||
f"dropping {num_filtered} lines with one or more substations not found in " | ||
f"substations table out of a starting total of {num_lines}" | ||
) | ||
return lines.query("SUB_1 in @matching_names and SUB_2 in @matching_names").copy() | ||
|
||
|
||
def filter_lines_with_nonmatching_substation_coords(lines, substations, threshold=100): | ||
"""Filter lines for which either the starting or ending substation, by name, has | ||
coordinates judged as too far away (based on the ``threshold`` parameter) from the | ||
coodinated listed for the line. | ||
:param pandas.DataFrame lines: data frame of lines. | ||
:param pandas.DataFrame substations: data frame of substations. | ||
:param int/float threshold: maximum mismatch distance (miles). | ||
:return: (*pandas.DataFrame*) -- lines with matching substations. | ||
""" | ||
|
||
def find_distance_to_closest_substation(coordinates, name, substations): | ||
matching_substations = substations.loc[substations.NAME == name] | ||
minimum_distance = matching_substations.apply( | ||
lambda x: haversine(coordinates, (x.LATITUDE, x.LONGITUDE)), axis=1 | ||
).min() | ||
return minimum_distance | ||
|
||
print("Evaluating endpoint location mismatches... (this may take several minutes)") | ||
# Coordinates are initially (lon, lat); we reverse to (lat, lon) for haversine | ||
start_distance_mismatch = lines.apply( | ||
lambda x: find_distance_to_closest_substation( | ||
x.loc["COORDINATES"][0][::-1], x.loc["SUB_1"], substations | ||
), | ||
axis=1, | ||
) | ||
end_distance_mismatch = lines.apply( | ||
lambda x: find_distance_to_closest_substation( | ||
x.loc["COORDINATES"][-1][::-1], x.loc["SUB_2"], substations | ||
), | ||
axis=1, | ||
) | ||
filtered = lines.loc[ | ||
(start_distance_mismatch > threshold) | (end_distance_mismatch > threshold) | ||
] | ||
num_filtered = len(filtered) | ||
num_lines = len(lines) | ||
print( | ||
f"dropping {num_filtered} lines with one or more substations with non-matching " | ||
f"coordinates out of a starting total of {num_lines}" | ||
) | ||
|
||
remaining = lines.loc[ | ||
(start_distance_mismatch <= threshold) & (end_distance_mismatch <= threshold) | ||
] | ||
return remaining.copy() | ||
|
||
|
||
def filter_lines_with_identical_substation_names(lines): | ||
"""Filter lines with SUB_1 or SUB_2 attributes equal to each other, and report the | ||
number dropped. | ||
:param pandas.DataFrame lines: data frame of lines. | ||
:return: (*pandas.DataFrame*) -- lines with distinct substations. | ||
""" | ||
num_lines = len(lines) | ||
filtered = lines.query("SUB_1 == SUB_2") | ||
num_filtered = len(filtered) | ||
print( | ||
f"dropping {num_filtered} lines with matching SUB_1 and SUB_2 out of a " | ||
f"starting total of {num_lines}" | ||
) | ||
return lines.query("SUB_1 != SUB_2").copy() | ||
|
||
|
||
def build_transmission(): | ||
"""Main user-facing entry point.""" | ||
# Load input data | ||
hifld_substations = get_hifld_electric_substations(const.blob_paths["substations"]) | ||
hifld_lines = get_hifld_electric_power_transmission_lines( | ||
const.blob_paths["transmission_lines"] | ||
) | ||
hifld_data_dir = os.path.join(os.path.dirname(__file__), "..", "data") | ||
hifld_zones = get_zone(os.path.join(hifld_data_dir, "zone.csv")) # noqa: F841 | ||
|
||
# Filter substations | ||
substations_with_lines = filter_substations_with_zero_lines(hifld_substations) | ||
check_for_location_conflicts(substations_with_lines) | ||
|
||
# Filter lines | ||
lines_with_substations = filter_lines_with_unavailable_substations(hifld_lines) | ||
lines_with_matching_substations = filter_lines_with_no_matching_substations( | ||
lines_with_substations, substations_with_lines | ||
) | ||
lines_with_matching_substations = filter_lines_with_nonmatching_substation_coords( | ||
lines_with_matching_substations, substations_with_lines | ||
) | ||
lines_with_matching_substations = filter_lines_with_identical_substation_names( | ||
lines_with_matching_substations | ||
) | ||
return lines_with_matching_substations |