From 889b1559d254828181a76bc8937582ded2eee70e Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Tue, 14 Sep 2021 15:56:45 -0700 Subject: [PATCH 1/3] fix: add a step to detect and remove duplicated buses --- .../design/investment/create_mapping_files.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/powersimdata/design/investment/create_mapping_files.py b/powersimdata/design/investment/create_mapping_files.py index 7335e0e87..5d1dc6fbd 100644 --- a/powersimdata/design/investment/create_mapping_files.py +++ b/powersimdata/design/investment/create_mapping_files.py @@ -4,6 +4,7 @@ from powersimdata.design.investment import const from powersimdata.input.grid import Grid +from powersimdata.utility.distance import haversine from powersimdata.utility.helpers import _check_import @@ -67,6 +68,29 @@ def _find_nearest(series, polygons, search_dist): points_in_regions = gpd.sjoin(left_df=left_df, right_df=right_df, op="intersects") points_in_regions["dist"] = 0 + # Since polygons may overlap, there can be duplicated buses that we want to filter + duplicated = points_in_regions.loc[points_in_regions.index.duplicated(keep=False)] + to_drop = set() + for bus in set(duplicated["bus_id"]): + entries = duplicated.query("bus_id == @bus") + coords = entries["geometry"].iloc[0].coords[0] # First duped entry, only point + regions = set(entries["name_abbr"]) # noqa: F841 + candidates = points_in_regions.query( + "index not in @duplicated.index and name_abbr in @regions" + ) + neighbor = candidates.apply( + lambda x: haversine((x.geometry.x, x.geometry.y), coords), axis=1 + ).idxmin() + closest_region = candidates.loc[neighbor, "name_abbr"] # noqa: F841 + # There may be more than two overlapping geometries, capture all but the closest + drop_regions = set(entries.query("name_abbr != @closest_region")["name_abbr"]) + # Since indices are duplicated, we need to drop via two-column tuples + to_drop |= {(bus, d) for d in drop_regions} + + points_in_regions = points_in_regions.loc[ + ~points_in_regions.set_index(["bus_id", "name_abbr"]).index.isin(to_drop) + ] + # Find closest Polygons, for points that don't fall within any missing_indices = set(left_df.index) - set(points_in_regions.index) points_not_in_regions = left_df.loc[missing_indices] From 1a627f9c954108cffa7d6a6a9235358206da2c09 Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Tue, 14 Sep 2021 16:06:13 -0700 Subject: [PATCH 2/3] fix: change makedirs to create directories, not files --- powersimdata/design/investment/create_mapping_files.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/powersimdata/design/investment/create_mapping_files.py b/powersimdata/design/investment/create_mapping_files.py index 5d1dc6fbd..5a698a2bb 100644 --- a/powersimdata/design/investment/create_mapping_files.py +++ b/powersimdata/design/investment/create_mapping_files.py @@ -208,7 +208,7 @@ def write_bus_neem_map(base_grid): raise TypeError("base_grid must be a Grid instance") df_pts_bus = bus_to_neem_reg(base_grid.bus) df_pts_bus.sort_index(inplace=True) - os.makedirs(const.bus_neem_regions_path, exist_ok=True) + os.makedirs(os.path.dirname(const.bus_neem_regions_path), exist_ok=True) df_pts_bus.to_csv(const.bus_neem_regions_path) @@ -222,7 +222,7 @@ def write_bus_reeds_map(base_grid): raise TypeError("base_grid must be a Grid instance") df_pts_bus = bus_to_reeds_reg(base_grid.bus) df_pts_bus.sort_index(inplace=True) - os.makedirs(const.bus_reeds_regions_path, exist_ok=True) + os.makedirs(os.path.dirname(const.bus_reeds_regions_path), exist_ok=True) df_pts_bus.to_csv(const.bus_reeds_regions_path) From 1b585004cb34d6c3cb9e8070f9b2b3d2a8513558 Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Tue, 14 Sep 2021 16:06:39 -0700 Subject: [PATCH 3/3] chore: update data to remove duplicated bus-to-NEEM mapping --- powersimdata/design/investment/data/buses_NEEMregion.csv | 9 --------- 1 file changed, 9 deletions(-) diff --git a/powersimdata/design/investment/data/buses_NEEMregion.csv b/powersimdata/design/investment/data/buses_NEEMregion.csv index d174ba8ab..52b8a95ff 100644 --- a/powersimdata/design/investment/data/buses_NEEMregion.csv +++ b/powersimdata/design/investment/data/buses_NEEMregion.csv @@ -12358,7 +12358,6 @@ bus_id,name_abbr,dist,lat,lon 12357,PJM E,0.0,40.0887,-75.6346 12358,PJM E,0.0,40.0887,-75.6346 12359,PJM ROM,0.0,39.9515,-75.826 -12359,PJM E,0.0,39.9515,-75.826 12360,PJM E,0.0,39.8624,-75.8187 12361,PJM E,0.0,39.9648,-75.7912 12362,PJM E,0.0,39.9648,-75.7912 @@ -12458,10 +12457,8 @@ bus_id,name_abbr,dist,lat,lon 12456,PJM E,0.0,39.8364,-75.8027 12457,PJM E,0.0,39.8364,-75.8027 12458,PJM E,0.0,39.8364,-75.8027 -12459,PJM E,0.0,39.9024,-75.8392 12459,PJM ROM,0.0,39.9024,-75.8392 12460,PJM ROM,0.0,39.9024,-75.8392 -12460,PJM E,0.0,39.9024,-75.8392 12461,PJM ROM,0.0,41.1524,-77.3123 12462,PJM ROM,0.0,41.1524,-77.3123 12463,PJM ROM,0.0,41.2429,-77.2389 @@ -36057,7 +36054,6 @@ bus_id,name_abbr,dist,lat,lon 36053,PJM ROR,0.0,39.3891,-83.3393 36054,PJM ROR,0.0,39.2689,-83.432 36055,NonRTO Midwest,0.0,39.6566,-83.5378 -36055,PJM ROR,0.0,39.6566,-83.5378 36056,PJM ROR,0.0,39.7052,-83.3107 36057,PJM ROR,0.0,39.6489,-83.3377 36058,NonRTO Midwest,0.003978078694527869,39.479,-83.5037 @@ -42496,7 +42492,6 @@ bus_id,name_abbr,dist,lat,lon 42491,MISO IN,0.0,38.0288,-87.5769 42492,MISO IN,0.0,38.0288,-87.5769 42493,MISO IN,0.0,38.0811,-87.553 -42494,NonRTO Midwest,0.0,37.9335,-87.5617 42494,MISO IN,0.0,37.9335,-87.5617 42495,MISO IN,0.0,38.0137,-87.5274 42496,NonRTO Midwest,0.0,37.9065,-87.5288 @@ -43089,7 +43084,6 @@ bus_id,name_abbr,dist,lat,lon 43083,PJM ROR,0.0,42.2138,-88.7904 43084,PJM ROR,0.0,42.3539,-88.6689 43085,MISO WUMS,0.0,42.4947,-88.6446 -43085,PJM ROR,0.0,42.4947,-88.6446 43086,PJM ROR,0.0,42.4093,-88.6218 43087,PJM ROR,0.0,42.4093,-88.6218 43088,PJM ROR,0.0,42.3461,-88.9741 @@ -47993,9 +47987,7 @@ bus_id,name_abbr,dist,lat,lon 47986,MISO WUMS,0.0,42.598,-89.1008 47987,MISO WUMS,0.0,42.598,-89.1008 47988,MISO WUMS,0.0,42.5008,-89.1299 -47988,PJM ROR,0.0,42.5008,-89.1299 47989,MISO WUMS,0.0,42.5008,-89.1299 -47989,PJM ROR,0.0,42.5008,-89.1299 47990,MISO WUMS,0.0,42.5598,-88.7798 47991,PJM ROR,0.0,42.4831,-88.8725 47992,PJM ROR,0.0,42.4831,-88.8725 @@ -48141,7 +48133,6 @@ bus_id,name_abbr,dist,lat,lon 48132,MISO WUMS,0.0,42.6161,-88.3851 48133,MISO WUMS,0.0,42.6161,-88.3851 48134,MISO WUMS,0.0,42.496,-88.5134 -48134,PJM ROR,0.0,42.496,-88.5134 48135,MISO W,0.0,45.9493,-91.938 48136,MISO WUMS,0.0,43.2861,-88.2452 48137,MISO WUMS,0.0,43.2861,-88.2452