Skip to content

Commit

Permalink
Merge pull request #80 from Urban-Analytics-Technology-Platform/79-ch…
Browse files Browse the repository at this point in the history
…ecking-and-validating-travel-distance-assumptions

Use estimated travel distances instead of Euclidean distance (#79)
  • Loading branch information
sgreenbury authored Jan 31, 2025
2 parents cdb1389 + 22c98de commit 8f87677
Show file tree
Hide file tree
Showing 12 changed files with 231 additions and 12 deletions.
13 changes: 13 additions & 0 deletions config/base.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,19 @@ optional_columns = [
]
n_matches = 10 # What is the maximum number of NTS matches we want for each SPC household?

[feasible_assignment]
# `actual_distance = distance * (1 + ((detour_factor - 1) * np.exp(-decay_rate * distance)))`
#
# `detour factor` when converting Euclidean distance to actual travel distance
detour_factor = 1.56

# `decay rate` is the inverse of the distance (in units of the data, e.g. metres) at which the
# scaling from using the detour factor to Euclidean distance reduces by `exp(−1)`.
#
# 0.0001 is a good value for Leeds when units are metres, choice of decay_rate can be explored in an
# [interactive plot](https://www.wolframalpha.com/input?i=plot+exp%28-0.0001x%29+from+x%3D0+to+x%3D50000)
decay_rate = 0.0001

[work_assignment]
commute_level = "OA"
use_percentages = true # if true, optimization problem will try to minimize percentage difference at OD level (not absolute numbers). Recommended to set it to true
Expand Down
78 changes: 78 additions & 0 deletions config/base_all_msoa.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
[parameters]
seed = 0
# this is used to query poi data from osm and to load in SPC data
region = "leeds"
# how many people from the SPC do we want to run the model for? Comment out if you want to run the analysis on the entire SPC populaiton
number_of_households = 25000
# "OA21CD": OA level, "MSOA11CD": MSOA level
zone_id = "MSOA21CD"
# Only set to true if you have travel time matrix at the level specified in boundary_geography
travel_times = false
boundary_geography = "MSOA"
# NTS years to use
nts_years = [2019, 2021, 2022]
# NTS regions to use
nts_regions = [
'Yorkshire and the Humber',
'North West',
'North East',
'East Midlands',
'West Midlands',
'East of England',
'South East',
'South West']
# nts day of the week to use
# 1: Monday, 2: Tuesday, 3: Wednesday, 4: Thursday, 5: Friday, 6: Saturday, 7: Sunday
nts_day_of_week = 3
# what crs do we want the output to be in? (just add the number, e.g. 3857)
output_crs = 3857

[matching]
# for optional and required columns, see the [iterative_match_categorical](https://github.com/Urban-Analytics-Technology-Platform/acbm/blob/ca181c54d7484ebe44706ff4b43c26286b22aceb/src/acbm/matching.py#L110) function
# Do not add any column not listed below. You can only move a column from optional to require (or vise versa)
required_columns = [
"number_adults",
"number_children",
"num_pension_age",
]
optional_columns = [
"number_cars",
"rural_urban_2_categories",
"employment_status",
"tenure_status",
]
# What is the maximum number of NTS matches we want for each SPC household?
n_matches = 10

[feasible_assignment]
# detour factor when converting euclidian distance to actual travel distance
detour_factor = 1.56
# decay rate when converting euclidian to travel distance (0.0001 is a good value)
# actual_distance = distance * (1 + ((detour_factor - 1) * np.exp(-decay_rate * distance)))
decay_rate = 0.0001

[work_assignment]
commute_level = "MSOA"
# if true, optimization problem will try to minimize percentage difference at OD level (not absolute numbers). Recommended to set it to true
use_percentages = true
# weights to add for each objective in the optimization problem
weight_max_dev = 0.2
weight_total_dev = 0.8
# maximum number of feasible zones to include in the optimization problem (less zones makes problem smaller - so faster, but at the cost of a better solution)
max_zones = 10

[postprocessing]
pam_jitter = 30
pam_min_duration = 10
# for get_pt_subscription: everyone above this age has a subscription (pensioners get free travel)
# TODO: more sophisticated approach
pt_subscription_age = 66
# to define if a person is a student:
# eveyone below this age is a student
student_age_base = 16
# everyone below this age that has at least one "education" activity is a student
student_age_upper = 30
# eveyone who uses one of the modes below is classified as a passenger (isPassenger = True)
modes_passenger = ['car_passenger', 'taxi']
# yearly state pension: for getting hhlIncome of pensioners
state_pension = 11502
10 changes: 9 additions & 1 deletion scripts/3.1_assign_primary_feasible_zones.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,11 @@ def main(config_file):
logger.info("Creating estimated travel times matrix")
# Create a new travel time matrix based on distances between zones
travel_time_estimates = zones_to_time_matrix(
zones=boundaries, id_col=config.zone_id, time_units="m"
zones=boundaries,
id_col=config.zone_id,
time_units="m",
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
)
logger.info("Travel time estimates created")

Expand Down Expand Up @@ -203,6 +207,8 @@ def main(config_file):
time_tolerance=config.parameters.tolerance_edu
if config.parameters.tolerance_edu is not None
else 0.3,
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
)

logger.info("Saving feasible zones for education activities")
Expand Down Expand Up @@ -230,6 +236,8 @@ def main(config_file):
time_tolerance=config.parameters.tolerance_work
if config.parameters.tolerance_work is not None
else 0.3,
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
)

logger.info("Saving feasible zones for work activities")
Expand Down
12 changes: 9 additions & 3 deletions scripts/3.3_assign_facility_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,10 @@ def main(config_file):
x_col="TripDisIncSW",
y_col="length",
x_label="Reported Travel Distance (km)",
y_label="Actual Distance - Euclidian (km)",
y_label="Actual Distance - Estimated (km)",
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
x_axis_max=50,
crs=f"EPSG:{config.output_crs}",
title_prefix=f"Scatter plot of TripDisIncSW vs. Length for {activity_type}",
save_dir=config.output_path / "plots/assigning/",
Expand All @@ -330,8 +333,11 @@ def main(config_file):
activity_type_col="destination activity",
x_col="TripTotalTime",
y_col="length",
x_label="Reported Travel TIme (min)",
y_label="Actual Distance - Euclidian (km)",
x_label="Reported Travel Time (min)",
y_label="Actual Distance - Estimated (km)",
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
x_axis_max=180,
crs=f"EPSG:{config.output_crs}",
title_prefix="Scatter plot of TripTotalTime vs. Length",
save_dir=config.output_path / "plots/assigning/",
Expand Down
2 changes: 2 additions & 0 deletions scripts/4_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ def main(config_file):
start_wkt_col="start_location_geometry_wkt",
end_wkt_col="end_location_geometry_wkt",
crs_epsg=config.output_crs,
detour_factor=1.56,
decay_rate=0.0001,
)

# Plot: Aggregate
Expand Down
17 changes: 15 additions & 2 deletions src/acbm/assigning/feasible_zones_primary.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ def get_possible_zones(
travel_times: Optional[pd.DataFrame] = None,
filter_by_activity: bool = False,
time_tolerance: float = 0.2,
detour_factor: float = 1.56,
decay_rate: float = 0.0001,
) -> dict:
"""
Get possible zones for all activity chains in the dataset. This function loops over the travel_times dataframe and filters by mode, time of day and weekday/weekend.
Expand Down Expand Up @@ -106,6 +108,13 @@ def get_possible_zones(
activity chain's travel time (which is stored in "TripTotalTime"). Allowable travel_times are those that fall in the range of:
travel_time_reported * (1 - time_tolerance) <= travel_time_reported <= travel_time_reported * (1 + time_tolerance)
Default = 0.2
detour_factor: float
The detour factor used to calculate travel distances between zones from euclidian distances.
Default = 1.56
decay_rate: float
The decay rate used to calculate travel times between zones from travel distances. Detours make up a smaller portion of
longer distance trips. Default = 0.0001
Returns
-------
Expand Down Expand Up @@ -137,7 +146,11 @@ def get_possible_zones(
if travel_times is None:
logger.info("Travel time matrix not provided: Creating travel times estimates")
travel_times = zones_to_time_matrix(
zones=boundaries, id_col=zone_id, time_units="m"
zones=boundaries,
id_col=zone_id,
time_units="m",
detour_factor=detour_factor,
decay_rate=decay_rate,
)

list_of_modes = activity_chains["mode"].unique()
Expand Down Expand Up @@ -224,7 +237,7 @@ def get_possible_zones(
travel_times_filtered_mode_time_day = travel_times_filtered_mode
# if mode = car_passenger, we compare to the travel time for car (we don't
# have travel times for car_passenger)
if mode == "car_passenger":
if mode in ("car_passenger", "taxi"):
activity_chains_filtered = activity_chains[
(activity_chains["mode"] == "car")
]
Expand Down
19 changes: 19 additions & 0 deletions src/acbm/assigning/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from matplotlib import patches as mpatches
from shapely.geometry import LineString

from acbm.assigning.utils import _adjust_distance


def plot_workzone_assignment_line(
assignment_results: pd.DataFrame,
Expand Down Expand Up @@ -387,7 +389,10 @@ def plot_scatter_actual_reported(
title_prefix: str,
activity_type: str,
activity_type_col: str,
detour_factor: float,
decay_rate: float,
crs: str,
x_axis_max: float,
save_dir: str | Path | None = None,
display: bool = False,
y_scale: float = 1 / 1000,
Expand All @@ -405,7 +410,10 @@ def plot_scatter_actual_reported(
- title_prefix: Prefix for the plot titles.
- activity_type: Type of activity to plot.
- activity_type_col: Column name for the activity type.
- detour_factor: Detour factor for the distance adjustment. (see `_adjust_distance`)
- decay_rate: Decay rate for the distance adjustment.
- crs: Coordinate reference system (CRS) of the activities data.
- x_axis_max: Maximum value for the x-axis. (use to remove very high reported distances / times)
- save_dir: Directory to save the plots. If None, plots are not saved.
- display: Whether to display plots by calling `plt.show()`.
"""
Expand Down Expand Up @@ -434,6 +442,13 @@ def plot_scatter_actual_reported(
# calculate the length of the line_geometry in meters
activity_chains_plot["length"] = activity_chains_plot["line_geometry"].length

# Create a column for estimated distance
activity_chains_plot["length"] = activity_chains_plot["length"].apply(
lambda d: _adjust_distance(
d, detour_factor=detour_factor, decay_rate=decay_rate
)
)

# Calculate the number of rows and columns for the subplots. It is a function of the number of modes
nrows = math.ceil(len(activity_chains_plot["mode"].unique()) / 2)
ncols = 2
Expand All @@ -460,6 +475,10 @@ def plot_scatter_actual_reported(
p = np.poly1d(z)
ax.plot(subset_mode[x_col], p(subset_mode[x_col]), "r--")

# Set x-axis limit
if x_axis_max:
ax.set_xlim(0, x_axis_max)

ax.set_title(f"{title_prefix} for mode: {mode}")
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
Expand Down
39 changes: 38 additions & 1 deletion src/acbm/assigning/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,10 +286,39 @@ def _get_activities_per_zone_df(activities_per_zone: dict) -> pd.DataFrame:
return pd.concat(activities_per_zone.values())


def _adjust_distance(
distance: float,
detour_factor: float,
decay_rate: float,
) -> float:
"""
Adjusts euclidian distances by adding a detour factor. We use minkowski distance
and a decay rate, as longer detour makes up a smaller proportion of the total
distance as the distance increases.
Parameters
----------
distance : float
The original distance.
detour_factor : float
The detour factor to be applied.
decay_rate : float
The decay rate to be applied.
Returns
-------
float
The adjusted distance.
"""
return distance * (1 + ((detour_factor - 1) * np.exp(-decay_rate * distance)))


def zones_to_time_matrix(
zones: gpd.GeoDataFrame,
time_units: str,
id_col: Optional[str] = None,
detour_factor: float = 1.56,
decay_rate: float = 0.0001,
) -> pd.DataFrame:
"""
Calculates the distance matrix between the centroids of the given zones and returns it as a DataFrame. The matrix also adds
Expand All @@ -307,6 +336,8 @@ def zones_to_time_matrix(
The name of the column in the zones GeoDataFrame to use as the ID. If None, the index values are used. Default is None.
time_units: str, optional
The units to use for the travel time. Options are 's' for seconds and 'm' for minutes.
detour_factor: float, optional
The detour factor to apply to the distance. Default is 1.56.
Returns
-------
pd.DataFrame
Expand All @@ -323,6 +354,11 @@ def zones_to_time_matrix(
distances = distances.stack().reset_index()
distances.columns = [f"{id_col}_from", f"{id_col}_to", "distance"]

# adjust distance column by adding a detour factor
distances["distance"] = distances["distance"].apply(
lambda d: _adjust_distance(d, detour_factor, decay_rate)
)

# replace the index values with the id_col values if id_col is specified
if id_col is not None:
# create a mapping from index to id_col values
Expand Down Expand Up @@ -449,7 +485,8 @@ def intrazone_time(zones: gpd.GeoDataFrame, key_column: str) -> dict:
# Calculate the area of each zone
zones["area"] = zones["geometry"].area
# Calculate the average distance within each zone
zones["average_dist"] = np.sqrt(zones["area"]) / 2
# sqrt(area) / 2 would be radius
zones["average_dist"] = np.sqrt(zones["area"]) / 1.5

# Mode speeds in m/s
mode_speeds_mps = {
Expand Down
11 changes: 10 additions & 1 deletion src/acbm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ class MatchingParams(BaseModel):
chunk_size: int = 50_000


@dataclass(frozen=True)
class FeasibleAssignmentParams(BaseModel):
detour_factor: float
decay_rate: float


@dataclass(frozen=True)
class WorkAssignmentParams(BaseModel):
use_percentages: bool
Expand Down Expand Up @@ -84,10 +90,13 @@ def serialize_filepath(self, filepath: Path | str | None) -> str | None:

class Config(BaseModel):
parameters: Parameters = Field(description="Config: parameters.")
matching: MatchingParams = Field(description="Config: parameters for matching.")
feasible_assignment: FeasibleAssignmentParams = Field(
description="Config: parameters for assignment of feasible zones."
)
work_assignment: WorkAssignmentParams = Field(
description="Config: parameters for work assignment."
)
matching: MatchingParams = Field(description="Config: parameters for matching.")
postprocessing: Postprocessing = Field(
description="Config: parameters for postprocessing."
)
Expand Down
Loading

0 comments on commit 8f87677

Please sign in to comment.