Skip to content

Commit

Permalink
refactor: use groupby to speed up distance calculations (#204)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielolsen committed Aug 26, 2021
1 parent 0991c05 commit 9ac4fb2
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions prereise/gather/griddata/hifld/data_process/transmission.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,24 +93,25 @@ def filter_lines_with_nonmatching_substation_coords(lines, substations, threshol
:return: (*pandas.DataFrame*) -- lines with matching substations.
"""

def find_closest_substation_and_distance(coordinates, name, substations):
matching_substations = substations.loc[substations.NAME == name]
def find_closest_substation_and_distance(coordinates, name, substations_groupby):
matching_substations = substations_groupby.get_group(name)
distances = matching_substations.apply(
lambda x: haversine(coordinates, (x.LATITUDE, x.LONGITUDE)), axis=1
)
return pd.Series([distances.idxmin(), distances.min()], index=["sub", "dist"])

print("Evaluating endpoint location mismatches... (this may take several minutes)")
substations_groupby = substations.groupby("NAME")
# Coordinates are initially (lon, lat); we reverse to (lat, lon) for haversine
start_subs = lines.apply(
lambda x: find_closest_substation_and_distance(
x.loc["COORDINATES"][0][::-1], x.loc["SUB_1"], substations
x.loc["COORDINATES"][0][::-1], x.loc["SUB_1"], substations_groupby
),
axis=1,
)
end_subs = lines.apply(
lambda x: find_closest_substation_and_distance(
x.loc["COORDINATES"][-1][::-1], x.loc["SUB_2"], substations
x.loc["COORDINATES"][-1][::-1], x.loc["SUB_2"], substations_groupby
),
axis=1,
)
Expand Down

0 comments on commit 9ac4fb2

Please sign in to comment.