Skip to content

Commit

Permalink
fix(chore(scripts):-clean-download-loop-and-remove-unused-code): - Re…
Browse files Browse the repository at this point in the history
…moved deprecated time calculations and unnecessary code not required for the API.

- Fixed a FutureWarning related to OSMnx functions. Updated usage of `geometries_from_polygon` to `features_from_polygon` to align with the OSMnx v2 API changes.
- Refactored download loop for clarity and efficiency.

See the OSMnx v2 migration guide for details: gboeing/osmnx#1123
  • Loading branch information
Roger G. March committed Jan 28, 2025
1 parent d9832f5 commit e83f257
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 236 deletions.
14 changes: 2 additions & 12 deletions backend/models/scripts/cluster_pois.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,11 +176,9 @@ def main(

for poi_type in pois.keys(): # Loop through each POI type in the category
poi_file = Path(PATH["data"]) / placeid / f"{placeid}_poi_{poi_type}.gpkg"
print(poi_file)
print(poi_file.exists())

if poi_file.exists():
geo = gpd.read_file(poi_file)
print(geo)
geo["poi_source"] = poi_type # Assign POI type (e.g., 'hospital')
geo["category"] = category_name # Assign category name (e.g., 'sanidad')
geo["weight"] = slider_weights.get(category_name, 1) # Assign slider weight
Expand All @@ -205,8 +203,6 @@ def main(
.reset_index(name="weighted_point_count")
)

#gdf_hex = gdp.read_file("/media/M2_disk/roger/tomtom/data/h_index_geometries/gdf_hex_intersected.geojson")

# Merge the weighted counts back with the original hexagons
gdf_hex = gdf_hex.merge(hexagon_counts, on="hex_id", how="left").fillna(0)

Expand All @@ -232,12 +228,6 @@ def main(
# Step 5: Create the final dataframe keeping centroids and exemplar labels
gdf_hex = gdf_hex[["geometry", "centroid", "weighted_point_count", "cluster", "is_exemplar"]]

# Step 6: Plot clusters and exemplars
ax = gdf_hex.plot(column="cluster", cmap="tab20", legend=True, alpha=0.5, figsize=(10, 6))
gdf_hex[gdf_hex["is_exemplar"] == 1].plot(ax=ax, color="red", markersize=50, label="Exemplars")
plt.legend()
plt.title("Affinity Propagation Clusters with Exemplars (Distance + Point Count)")
plt.show()

else:
print("No POI files found.")
Expand All @@ -246,7 +236,7 @@ def main(

# It may need to execute the first cell firts
# Define the snapping threshold (in meters)
snapthreshold = 50 # Adjust this value as needed
snapthreshold = 100 # Adjust this value as needed

# Initialize a set to store snapped node IDs
nnids = set()
Expand Down
28 changes: 1 addition & 27 deletions backend/models/scripts/prepare_networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,10 @@
from scripts.functions import fill_holes, extract_relevant_polygon, ox_to_csv, compress_file
from parameters.parameters import networktypes, osmnxparameters



def main(PATH, cities):

# Initialize a list to store timing information
timing_data = []

# File to store the timing information
timing_file = PATH["data"] / "01_download_times.csv"

for placeid, placeinfo in tqdm(cities.items(), desc="Cities"):
start_time_city = time.time() # Start timing for the city


if placeinfo["nominatimstring"] != '':
location = ox.geocoder.geocode_to_gdf(placeinfo["nominatimstring"])
location = fill_holes(extract_relevant_polygon(placeid, shapely.geometry.shape(location['geometry'][0])))
Expand All @@ -66,7 +57,6 @@ def main(PATH, cities):

Gs = {}
for parameterid, parameterinfo in tqdm(osmnxparameters.items(), desc="Networks", leave=False):
start_time_network = time.time() # Start timing for the network type

for i in range(0, 10): # retry loop
try:
Expand All @@ -90,10 +80,6 @@ def main(PATH, cities):
if parameterinfo['export']:
ox_to_csv(Gs[parameterid], PATH["data"] / placeid, placeid, parameterid)

end_time_network = time.time() # End timing for the network type
network_duration = end_time_network - start_time_network
timing_data.append([placeid, parameterid, network_duration]) # Record timing data

# Compose special cases
parameterid = 'biketrack'
Gs[parameterid] = nx.compose_all([
Expand All @@ -116,18 +102,6 @@ def main(PATH, cities):
for parameterid in networktypes[:-2]:
ox_to_csv(ox.simplify_graph(Gs[parameterid]), PATH["data"] / placeid, placeid, parameterid, "_simplified")

end_time_city = time.time() # End timing for the city
city_duration = end_time_city - start_time_city
timing_data.append([placeid, "total_city_time", city_duration]) # Record total time for the city

# Write timing data to CSV
with open(timing_file, mode='w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["PlaceID", "NetworkType", "Time (seconds)"]) # CSV header
writer.writerows(timing_data)

print(f"Timing information saved to {timing_file}")

# Compress all data files
for folder, subfolders, files in os.walk(PATH["data"]):
for file in files:
Expand Down
231 changes: 34 additions & 197 deletions backend/models/scripts/prepare_pois.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

# Local
from scripts.functions import fill_holes, extract_relevant_polygon, csv_to_ox, count_and_merge, rotate_grid, reverse_bearing
from parameters.parameters import gridl, bearingbins, poiparameters, osmnxparameters, snapthreshold
from parameters.parameters import poiparameters, snapthreshold



Expand All @@ -38,18 +38,10 @@ def main(PATH, cities):
G_caralls = {}
G_caralls_simplified = {}
locations = {}
parameterinfo = osmnxparameters['carall']


timing_data = []

timing_file = PATH["data"] / "02_download_times.csv"


# Iterate through cities to load polygons and graphs
for placeid, placeinfo in tqdm(cities.items(), desc="Cities"):
start_time_load = time.time()


print(f"{placeid}: Loading location polygon and carall graph")

# Check if 'nominatimstring' exists
Expand All @@ -73,197 +65,42 @@ def main(PATH, cities):
G_caralls_simplified[placeid] = csv_to_ox(PATH["data"] / placeid, placeid, 'carall_simplified')
G_caralls_simplified[placeid].graph["crs"] = 'epsg:4326'

end_time_load = time.time() # End timing for the network type
load_duration = end_time_load - start_time_load
timing_data.append([placeid, parameterinfo, load_duration]) # Record timing data


for placeid, placeinfo in tqdm(cities.items(), desc="Cities"):
start_time_download_POI = time.time() # Start timing for the whole city
print(f"{placeid}: Creating POIs")

# Retrieve location geometry and simplified carall graph
location = locations[placeid]
G_carall = G_caralls_simplified[placeid]

# Loop through POI parameters and process each tag
for poiid, poitag in poiparameters.items():
start_time_tags_POI = time.time() # Start timing for each POI tag
try:
# Extract relevant geometries (Points only) from the location polygon
gdf = ox.geometries.geometries_from_polygon(location, poitag)
gdf = gdf[gdf['geometry'].type == "Point"] # Only consider Points (exclude polygons)

# Snap points to the nearest nodes in the network
nnids = set()
for g in gdf['geometry']:
n = ox.distance.nearest_nodes(G_carall, g.x, g.y)
# Only snap if within the defined threshold
if n not in nnids and haversine((g.y, g.x), (G_carall.nodes[n]["y"], G_carall.nodes[n]["x"]), unit="m") <= snapthreshold:
nnids.add(n)

# Save nearest node ids for POIs in a CSV file
output_file_path = PATH["data"] / placeid / f"{placeid}_poi_{poiid}_nnidscarall.csv"
with output_file_path.open('w', newline='', encoding='utf-8') as f:
for item in nnids:
f.write(f"{item}\n")

# Convert the gdf to string type for writing, and save locally (if feasible)
gdf = gdf.apply(lambda c: c.astype(str) if c.name != 'geometry' else c, axis=0)
try:
gdf.to_file(PATH["data"] / placeid / f"{placeid}_poi_{poiid}.gpkg", driver='GPKG')
except:
print(f"Notice: Writing the gdf did not work for {placeid}")

if debug: gdf.plot(color='red')
location = locations[placeid]
G_carall = G_caralls[placeid]
# Loop through POI parameters and process each tag
for poiid, poitag in poiparameters.items():

try:
# Extract relevant geometries (Points only) from the location polygon
gdf = ox.features.features_from_polygon(location, poitag)
gdf = gdf[gdf['geometry'].type == "Point"] # Only consider Points (exclude polygons)

except Exception as e:
print(f"No stations in {placeinfo['name']}. No POIs created. Error: {e}")
# Snap points to the nearest nodes in the network
nnids = set()
for g in gdf['geometry']:
n = ox.distance.nearest_nodes(G_carall, g.x, g.y)
# Only snap if within the defined threshold
if n not in nnids and haversine((g.y, g.x), (G_carall.nodes[n]["y"], G_carall.nodes[n]["x"]), unit="m") <= snapthreshold:
nnids.add(n)

# End timing for POI processing
end_time_tags_POI = time.time()
tags_duration = end_time_tags_POI - start_time_tags_POI
timing_data.append([placeid, poiid, "POI Processing", tags_duration])

# End timing for the whole download and processing for this city
end_time_download_POI = time.time()
download_POI_duration = end_time_download_POI - start_time_download_POI
timing_data.append([placeid, None, "Download and Match", download_POI_duration]) # Record timing data




# Create a grid for each city
for placeid, placeinfo in tqdm(cities.items(), desc="Cities"):
start_time_grid_creation = time.time() # Start timing for grid creation
print(f"{placeid}: Creating grid")

location = locations[placeid]

# First step: Calculate the most common bearing to orient the grid
start_time_bearing_calc = time.time() # Start timing for bearing calculation
G = G_caralls[placeid]
bearings = {}

# Add edge bearings and calculate them for the network
Gu = ox.bearing.add_edge_bearings(ox.get_undirected(G))
city_bearings = []

# Weight bearings by edge lengths
for u, v, k, d in Gu.edges(keys=True, data=True):
city_bearings.extend([d['bearing']] * int(d['length']))
b = pd.Series(city_bearings)

# Combine bearings for both directions and calculate bins
bearings = pd.concat([b, b.map(reverse_bearing)]).reset_index(drop=True)
bins = np.arange(bearingbins + 1) * 360 / bearingbins
count = count_and_merge(bearingbins, bearings)

# Determine the principal bearing for grid orientation
principalbearing = bins[np.argmax(count)]
if debug:
print(f"Principal bearing: {principalbearing}")

end_time_bearing_calc = time.time() # End timing for bearing calculation
bearing_calc_duration = end_time_bearing_calc - start_time_bearing_calc
timing_data.append([placeid, "Bearing Calculation", bearing_calc_duration]) # Save timing data

# Second step: Generate the grid with the calculated orientation
start_time_grid_gen = time.time() # Start timing for grid generation
G = G_caralls_simplified[placeid]

# Calculate buffer for snapping POIs to nodes outside the grid
buf = max(((2 * snapthreshold) / 6378000) * (180 / math.pi),
((2 * snapthreshold) / 6378000) * (180 / math.pi) / math.cos(location.centroid.y * math.pi / 180))
cities[placeid]["bbox"] = location.buffer(buf).bounds

# Define projection for grid creation
p_ll = pyproj.Proj('+proj=longlat +datum=WGS84')
aeqd_proj = '+proj=aeqd +lat_0={lat} +lon_0={lon} +x_0=0 +y_0=0'
p_mt = pyproj.Proj(aeqd_proj.format(lat=location.centroid.y, lon=location.centroid.x))

# Enlarge grid boundaries to account for rotation
deltax = cities[placeid]["bbox"][2] - cities[placeid]["bbox"][0]
deltay = cities[placeid]["bbox"][3] - cities[placeid]["bbox"][1]
enlargefactor = 10
sw = shapely.geometry.Point(cities[placeid]["bbox"][:2])
ne = shapely.geometry.Point(cities[placeid]["bbox"][2:] + np.array([enlargefactor * deltax, enlargefactor * deltay]))

# Project the SW and NE points
transformed_sw = pyproj.transform(p_ll, p_mt, sw.x, sw.y)
transformed_ne = pyproj.transform(p_ll, p_mt, ne.x, ne.y)

# Define the principal bearing
principalbearing %= 90
if principalbearing > 45:
principalbearing -= 90

# Generate grid points
xcoords = np.arange(transformed_sw[0], transformed_ne[0], gridl)
ycoords = np.arange(transformed_sw[1], transformed_ne[1], gridl)

gridpoints = np.dstack(np.meshgrid(xcoords, ycoords)).reshape(-1, 2)
new_points = rotate_grid(gridpoints, origin=transformed_sw, degrees=principalbearing)

# Project back to lat-lon
fx, fy = pyproj.transform(p_mt, p_ll, new_points[:, 0], new_points[:, 1])
gridpoints = np.vstack([fx, fy]).T

# Adjust grid points based on rotation
if principalbearing >= 0:
gridpoints[:, 0] -= 0.4 * enlargefactor * deltax * np.sin(np.deg2rad(principalbearing))
else:
gridpoints[:, 0] += 0.4 * enlargefactor * deltax * np.sin(np.deg2rad(principalbearing))
gridpoints[:, 1] -= 0.4 * enlargefactor * deltay

# Filter grid points back to bounding box
mask = (gridpoints[:, 0] >= cities[placeid]["bbox"][0]) & (gridpoints[:, 0] <= cities[placeid]["bbox"][2]) & \
(gridpoints[:, 1] >= cities[placeid]["bbox"][1]) & (gridpoints[:, 1] <= cities[placeid]["bbox"][3])
gridpoints_cut = gridpoints[mask]

end_time_grid_gen = time.time() # End timing for grid generation
grid_gen_duration = end_time_grid_gen - start_time_grid_gen
timing_data.append([placeid, "Grid Generation", grid_gen_duration]) # Save timing data

# Optionally plot the grid points
if debug:
plt.figure(figsize=[12.8, 9.6])
plt.plot(gridpoints_cut[:, 0], gridpoints_cut[:, 1], ".", color="red")

# Start timing for snapping grid points
start_time_snapping = time.time()

# Snap grid points to the nearest nodes in the street network
nnids = set()
for g in gridpoints_cut:
n = ox.distance.nearest_nodes(G, g[0], g[1])
if n not in nnids and haversine((g[1], g[0]), (G.nodes[n]["y"], G.nodes[n]["x"]), unit="m") <= snapthreshold:
nnids.add(n)

# Save the snapped nodes to a CSV
with (PATH["data"] / placeid / f"{placeid}_poi_grid_nnidscarall.csv").open('w', newline='', encoding='utf-8') as f:
for item in nnids:
f.write(f"{item}\n")

# End timing for snapping grid points
end_time_snapping = time.time()
snapping_duration = end_time_snapping - start_time_snapping
timing_data.append([placeid, "Snapping Grid Points", snapping_duration]) # Save timing data
# Save nearest node ids for POIs in a CSV file
output_file_path = PATH["data"] / placeid / f"{placeid}_poi_{poiid}_nnidscarall.csv"
with output_file_path.open('w', newline='', encoding='utf-8') as f:
for item in nnids:
f.write(f"{item}\n")

# Convert the gdf to string type for writing, and save locally (if feasible)
gdf = gdf.apply(lambda c: c.astype(str) if c.name != 'geometry' else c, axis=0)
try:
gdf.to_file(PATH["data"] / placeid / f"{placeid}_poi_{poiid}.gpkg", driver='GPKG')
except:
print(f"Notice: Writing the gdf did not work for {placeid}")

if debug: gdf.plot(color='red')

# End timing for overall grid creation for the city
end_time_grid_creation = time.time()
grid_creation_duration = end_time_grid_creation - start_time_grid_creation
timing_data.append([placeid, "Total Grid Creation", grid_creation_duration])

# Write timing data to CSV
with open(timing_file, mode='w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["PlaceID", "NetworkType","ProcessStep", "Time (seconds)"]) # CSV header
writer.writerows(timing_data)

print(f"Timing information saved to {timing_file}")

pass
except Exception as e:
print(f"No {poiid} in {placeinfo}. No POIs created. Error: {e}")

if __name__ == "__main__":
main()

0 comments on commit e83f257

Please sign in to comment.