Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revise travel day #82

Closed
wants to merge 30 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
84a9273
adjusted distances
Hussein-Mahfouz Dec 13, 2024
a588c67
update _adjust_distance
Hussein-Mahfouz Dec 13, 2024
9fa6b0f
undo update _adjust_distance
Hussein-Mahfouz Dec 13, 2024
40d2e2f
edit intrazone
Hussein-Mahfouz Dec 13, 2024
7411e46
Merge remote-tracking branch 'origin/79-checking-and-validating-trave…
sgreenbury Jan 7, 2025
3e521ea
Fix missing function args
sgreenbury Jan 7, 2025
5b6cfbb
config for reference
Hussein-Mahfouz Jan 10, 2025
489e085
Merge remote-tracking branch 'origin/79-checking-and-validating-trave…
sgreenbury Jan 14, 2025
0e06e29
Update config
sgreenbury Jan 14, 2025
9c0003a
Fix test
sgreenbury Jan 14, 2025
6164a28
Adjust matching to account for common travel day
sgreenbury Jan 7, 2025
0cea61f
Add 'DayID' to outputs
sgreenbury Jan 8, 2025
88f4911
Fix missing subset of nts_trips
sgreenbury Jan 8, 2025
e0fc2f9
Add interim output with chosen random day for households
sgreenbury Jan 8, 2025
ed45b92
Add todo
sgreenbury Jan 9, 2025
ce5a261
Handle missing data and revise test for common trav days
sgreenbury Jan 14, 2025
ced648d
Fix test
sgreenbury Jan 14, 2025
5f17c56
Add chosen travel day modelling using pwkstat
sgreenbury Jan 15, 2025
4b6db8c
Revise subsetting of households given trav day config
sgreenbury Jan 15, 2025
1f5e0b9
Fix test
sgreenbury Jan 15, 2025
4ce4b1a
Add logging for NTS filtering
sgreenbury Jan 15, 2025
46946d1
Fix merge columns
sgreenbury Jan 15, 2025
4301493
Add matching for remaining individuals
sgreenbury Jan 15, 2025
be2eb6b
Revise region variable to 'PSUStatsReg_B01ID'
sgreenbury Jan 15, 2025
748260a
Fix logging
sgreenbury Jan 15, 2025
5105dab
Remove obsolete comment
sgreenbury Jan 16, 2025
32dfe63
Uncomment code
sgreenbury Jan 16, 2025
6ebd5be
Limit the number of processes to proportion of cpu_count()
sgreenbury Jan 16, 2025
1dd281b
Only consider working not from home
sgreenbury Jan 20, 2025
a07c035
Add scaling
sgreenbury Jan 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion config/base.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ nts_regions = [
]
# nts day of the week to use
# 1: Monday, 2: Tuesday, 3: Wednesday, 4: Thursday, 5: Friday, 6: Saturday, 7: Sunday
nts_day_of_week = 3
nts_days_of_week = [3]
# what crs do we want the output to be in? (just add the number, e.g. 3857)
output_crs = 3857

Expand All @@ -37,6 +37,13 @@ optional_columns = [
]
n_matches = 10 # What is the maximum number of NTS matches we want for each SPC household?

[feasible_assignment]
# detour factor when converting euclidian distance to actual travel distance
detour_factor = 1.56
# decay rate when converting euclidian to travel distance (0.0001 is a good value)
# actual_distance = distance * (1 + ((detour_factor - 1) * np.exp(-decay_rate * distance)))
decay_rate = 0.0001

[work_assignment]
commute_level = "OA"
use_percentages = true # if true, optimization problem will try to minimize percentage difference at OD level (not absolute numbers). Recommended to set it to true
Expand All @@ -48,3 +55,15 @@ max_zones = 8 # maximum number of feasible zones to include in the opti
[postprocessing]
pam_jitter = 30
pam_min_duration = 10
# for get_pt_subscription: everyone above this age has a subscription (pensioners get free travel)
# TODO: more sophisticated approach
pt_subscription_age = 66
# to define if a person is a student:
# eveyone below this age is a student
student_age_base = 16
# everyone below this age that has at least one "education" activity is a student
student_age_upper = 30
# eveyone who uses one of the modes below is classified as a passenger (isPassenger = True)
modes_passenger = ['car_passenger', 'taxi']
# yearly state pension: for getting hhlIncome of pensioners
state_pension = 11502
78 changes: 78 additions & 0 deletions config/base_all_msoa.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
[parameters]
seed = 0
# this is used to query poi data from osm and to load in SPC data
region = "leeds"
# how many people from the SPC do we want to run the model for? Comment out if you want to run the analysis on the entire SPC populaiton
number_of_households = 25000
# "OA21CD": OA level, "MSOA11CD": MSOA level
zone_id = "MSOA21CD"
# Only set to true if you have travel time matrix at the level specified in boundary_geography
travel_times = false
boundary_geography = "MSOA"
# NTS years to use
nts_years = [2019, 2021, 2022]
# NTS regions to use
nts_regions = [
'Yorkshire and the Humber',
'North West',
'North East',
'East Midlands',
'West Midlands',
'East of England',
'South East',
'South West']
# nts day of the week to use
# 1: Monday, 2: Tuesday, 3: Wednesday, 4: Thursday, 5: Friday, 6: Saturday, 7: Sunday
nts_day_of_week = 3
# what crs do we want the output to be in? (just add the number, e.g. 3857)
output_crs = 3857

[matching]
# for optional and required columns, see the [iterative_match_categorical](https://github.com/Urban-Analytics-Technology-Platform/acbm/blob/ca181c54d7484ebe44706ff4b43c26286b22aceb/src/acbm/matching.py#L110) function
# Do not add any column not listed below. You can only move a column from optional to require (or vise versa)
required_columns = [
"number_adults",
"number_children",
"num_pension_age",
]
optional_columns = [
"number_cars",
"rural_urban_2_categories",
"employment_status",
"tenure_status",
]
# What is the maximum number of NTS matches we want for each SPC household?
n_matches = 10

[feasible_assignment]
# detour factor when converting euclidian distance to actual travel distance
detour_factor = 1.56
# decay rate when converting euclidian to travel distance (0.0001 is a good value)
# actual_distance = distance * (1 + ((detour_factor - 1) * np.exp(-decay_rate * distance)))
decay_rate = 0.0001

[work_assignment]
commute_level = "MSOA"
# if true, optimization problem will try to minimize percentage difference at OD level (not absolute numbers). Recommended to set it to true
use_percentages = true
# weights to add for each objective in the optimization problem
weight_max_dev = 0.2
weight_total_dev = 0.8
# maximum number of feasible zones to include in the optimization problem (less zones makes problem smaller - so faster, but at the cost of a better solution)
max_zones = 10

[postprocessing]
pam_jitter = 30
pam_min_duration = 10
# for get_pt_subscription: everyone above this age has a subscription (pensioners get free travel)
# TODO: more sophisticated approach
pt_subscription_age = 66
# to define if a person is a student:
# eveyone below this age is a student
student_age_base = 16
# everyone below this age that has at least one "education" activity is a student
student_age_upper = 30
# eveyone who uses one of the modes below is classified as a passenger (isPassenger = True)
modes_passenger = ['car_passenger', 'taxi']
# yearly state pension: for getting hhlIncome of pensioners
state_pension = 11502
48 changes: 45 additions & 3 deletions scripts/2_match_households_and_individuals.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from acbm.assigning.utils import cols_for_assignment_all
from acbm.cli import acbm_cli
from acbm.config import load_and_setup_config
from acbm.matching import MatcherExact, match_individuals
from acbm.matching import MatcherExact, match_individuals, match_remaining_individuals
from acbm.preprocessing import (
count_per_group,
nts_filter_by_region,
Expand All @@ -16,6 +16,10 @@
transform_by_group,
truncate_values,
)
from acbm.utils import (
households_with_common_travel_days,
households_with_travel_days_in_nts_weeks,
)


@acbm_cli
Expand Down Expand Up @@ -222,23 +226,48 @@ def get_interim_path(

logger.info("Filtering NTS data by specified year(s)")

logger.info(f"Total NTS households: {nts_households.shape[0]:,.0f}")
years = config.parameters.nts_years

nts_individuals = nts_filter_by_year(nts_individuals, psu, years)
nts_households = nts_filter_by_year(nts_households, psu, years)
nts_trips = nts_filter_by_year(nts_trips, psu, years)

logger.info(
f"Total NTS households (after year filtering): {nts_households.shape[0]:,.0f}"
)
# #### Filter by geography
#

regions = config.parameters.nts_regions

nts_individuals = nts_filter_by_region(nts_individuals, psu, regions)
nts_households = nts_filter_by_region(nts_households, psu, regions)
nts_trips = nts_filter_by_region(nts_trips, psu, regions)

# Create dictionaries of key value pairs
logger.info(
f"Total NTS households (after region filtering): {nts_households.shape[0]:,.0f}"
)

# Ensure that the households have at least one day in `nts_days_of_week` that
# all household members have trips for
if config.parameters.common_household_day:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new parameter boolean common_household_day determines whether all individuals of the household need to have a TravDay in common.

hids = households_with_common_travel_days(
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gets the subset of households where all individuals have a common TravDay that is in the set of configured days (config.parameters.nts_days_of_week)

nts_trips, config.parameters.nts_days_of_week
)
else:
hids = households_with_travel_days_in_nts_weeks(
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gets the subset of households where all individuals have any TravDay that is in the set of configured days (config.parameters.nts_days_of_week)

nts_trips, config.parameters.nts_days_of_week
)

# Subset individuals and households given filtering of trips
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Subset to the households subset above before matching to ensure matches have required TravDays

nts_trips = nts_trips[
nts_trips["HouseholdID"].isin(hids)
& nts_trips["TravDay"].isin(config.parameters.nts_days_of_week)
]
nts_individuals = nts_individuals[nts_individuals["HouseholdID"].isin(hids)]
nts_households = nts_households[nts_households["HouseholdID"].isin(hids)]

# Create dictionaries of key value pairs
"""
guide to the dictionaries:

Expand Down Expand Up @@ -924,6 +953,19 @@ def get_interim_path(
show_progress=True,
)

# match remaining individuals
remaining_ids = spc_edited.loc[
~spc_edited.index.isin(matches_ind.keys()), "id"
].to_list()
matches_remaining_ind = match_remaining_individuals(
df1=spc_edited,
df2=nts_individuals,
matching_columns=["age_group", "sex"],
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could update the matching_columns here to enable more precision when not using households: e.g. for employment status and urban rural classification.

remaining_ids=remaining_ids,
show_progress=True,
)
matches_ind.update(matches_remaining_ind)

Comment on lines +956 to +968
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add matching for any remaining individuals that were part of unmatched households. It might be worth considering if this should be more configurable.

# save random sample
with open(
get_interim_path("matches_ind_level_categorical_random_sample.pkl"), "wb"
Expand Down
23 changes: 18 additions & 5 deletions scripts/3.1_assign_primary_feasible_zones.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from acbm.assigning.utils import (
activity_chains_for_assignment,
get_activities_per_zone,
get_chosen_day,
intrazone_time,
replace_intrazonal_travel_time,
zones_to_time_matrix,
Expand All @@ -28,11 +29,15 @@ def main(config_file):
activity_chains = activity_chains_for_assignment(config)
logger.info("Activity chains loaded")

# Filter to a specific day of the week
logger.info("Filtering activity chains to a specific day of the week")
activity_chains = activity_chains[
activity_chains["TravDay"] == config.parameters.nts_day_of_week
]

# Generate random sample of days by household
get_chosen_day(config).to_parquet(
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Get a chosen day for each individual to represent a "sample" day given the configured days of the week and whether the household is configured to share a common day.

config.output_path / "interim" / "assigning" / "chosen_trav_day.parquet"
)

# Filter to chosen day
activity_chains = activity_chains_for_assignment(config, subset_to_chosen_day=True)

# --- Study area boundaries

Expand Down Expand Up @@ -74,7 +79,11 @@ def main(config_file):
logger.info("Creating estimated travel times matrix")
# Create a new travel time matrix based on distances between zones
travel_time_estimates = zones_to_time_matrix(
zones=boundaries, id_col=config.zone_id, time_units="m"
zones=boundaries,
id_col=config.zone_id,
time_units="m",
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
)
logger.info("Travel time estimates created")

Expand Down Expand Up @@ -203,6 +212,8 @@ def main(config_file):
time_tolerance=config.parameters.tolerance_edu
if config.parameters.tolerance_edu is not None
else 0.3,
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
)

logger.info("Saving feasible zones for education activities")
Expand Down Expand Up @@ -230,6 +241,8 @@ def main(config_file):
time_tolerance=config.parameters.tolerance_work
if config.parameters.tolerance_work is not None
else 0.3,
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
)

logger.info("Saving feasible zones for work activities")
Expand Down
5 changes: 1 addition & 4 deletions scripts/3.2.1_assign_primary_zone_edu.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,8 @@ def main(config_file):
logger.info("Loading activity chains")

activity_chains = activity_chains_for_assignment(
config, columns=cols_for_assignment_edu()
config, columns=cols_for_assignment_edu(), subset_to_chosen_day=True
)
activity_chains = activity_chains[
activity_chains["TravDay"] == config.parameters.nts_day_of_week
]

logger.info("Filtering activity chains for trip purpose: education")
activity_chains_edu = activity_chains[activity_chains["dact"] == "education"]
Expand Down
16 changes: 8 additions & 8 deletions scripts/3.2.2_assign_primary_zone_work.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,9 @@ def main(config_file):

# --- Activity chains
logger.info("Loading activity chains")

activity_chains = activity_chains_for_assignment(config, cols_for_assignment_work())
activity_chains = activity_chains[
activity_chains["TravDay"] == config.parameters.nts_day_of_week
]
activity_chains = activity_chains_for_assignment(
config, cols_for_assignment_work(), subset_to_chosen_day=True
)

logger.info("Filtering activity chains for trip purpose: work")
activity_chains_work = activity_chains[activity_chains["dact"] == "work"]
Expand Down Expand Up @@ -100,7 +98,7 @@ def main(config_file):

logger.info("Step 4: Filtering rows and dropping unnecessary columns")
travel_demand_clipped = travel_demand[
travel_demand["Place of work indicator (4 categories) code"].isin([1, 3])
travel_demand["Place of work indicator (4 categories) code"].isin([3])
]
travel_demand_clipped = travel_demand_clipped.drop(
columns=[
Expand Down Expand Up @@ -141,7 +139,7 @@ def main(config_file):

logger.info("Step 2: Filtering rows and dropping unnecessary columns")
travel_demand_clipped = travel_demand[
travel_demand["Place of work indicator (4 categories) code"].isin([1, 3])
travel_demand["Place of work indicator (4 categories) code"].isin([3])
]
travel_demand_clipped = travel_demand_clipped.drop(
columns=[
Expand Down Expand Up @@ -202,7 +200,9 @@ def main(config_file):
#### ASSIGN TO ZONE FROM FEASIBLE ZONES ####

zone_assignment = WorkZoneAssignment(
activities_to_assign=possible_zones_work, actual_flows=travel_demand_dict_nomode
activities_to_assign=possible_zones_work,
actual_flows=travel_demand_dict_nomode,
scaling=config.parameters.part_time_work_prob,
)

assignments_df = zone_assignment.select_work_zone_optimization(
Expand Down
5 changes: 1 addition & 4 deletions scripts/3.2.3_assign_secondary_zone.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,7 @@ def main(config_file):
# --- Load in the data
logger.info("Loading: activity chains")

activity_chains = activity_chains_for_assignment(config)
activity_chains = activity_chains[
activity_chains["TravDay"] == config.parameters.nts_day_of_week
]
activity_chains = activity_chains_for_assignment(config, subset_to_chosen_day=True)

# TODO: remove obsolete comment
# --- Add OA21CD to the data
Expand Down
15 changes: 12 additions & 3 deletions scripts/3.3_assign_facility_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,10 @@ def main(config_file):
x_col="TripDisIncSW",
y_col="length",
x_label="Reported Travel Distance (km)",
y_label="Actual Distance - Euclidian (km)",
y_label="Actual Distance - Estimated (km)",
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
x_axis_max=50,
crs=f"EPSG:{config.output_crs}",
title_prefix=f"Scatter plot of TripDisIncSW vs. Length for {activity_type}",
save_dir=config.output_path / "plots/assigning/",
Expand All @@ -330,8 +333,11 @@ def main(config_file):
activity_type_col="destination activity",
x_col="TripTotalTime",
y_col="length",
x_label="Reported Travel TIme (min)",
y_label="Actual Distance - Euclidian (km)",
x_label="Reported Travel Time (min)",
y_label="Actual Distance - Estimated (km)",
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
x_axis_max=180,
crs=f"EPSG:{config.output_crs}",
title_prefix="Scatter plot of TripTotalTime vs. Length",
save_dir=config.output_path / "plots/assigning/",
Expand All @@ -355,6 +361,9 @@ def main(config_file):
y_col="time",
x_label="Reported Travel TIme (min)",
y_label="Modelled time (min)",
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
x_axis_max=180,
crs=f"EPSG:{config.output_crs}",
title_prefix="Scatter plot of TripTotalTime vs. Modelled time",
save_dir=config.output_path / "plots/assigning/",
Expand Down
4 changes: 3 additions & 1 deletion scripts/4_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def main(config_file):
# NTS data
legs_nts = pd.read_parquet(config.output_path / "nts_trips.parquet")

legs_nts = legs_nts[legs_nts["TravDay"] == config.parameters.nts_day_of_week]
legs_nts = legs_nts[legs_nts["TravDay"].isin(config.parameters.nts_days_of_week)]

# Model outputs
legs_acbm = pd.read_csv(config.output_path / "legs.csv")
Expand Down Expand Up @@ -217,6 +217,8 @@ def main(config_file):
start_wkt_col="start_location_geometry_wkt",
end_wkt_col="end_location_geometry_wkt",
crs_epsg=config.output_crs,
detour_factor=1.56,
decay_rate=0.0001,
)

# Plot: Aggregate
Expand Down
Loading
Loading