\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_3327128/1617488548.py:8: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " legs_geo[\"start_loc\"] = legs_geo[\"start_loc\"].apply(convert_to_point)\n",
+ "/tmp/ipykernel_3327128/1617488548.py:9: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " legs_geo[\"end_loc\"] = legs_geo[\"end_loc\"].apply(convert_to_point)\n"
+ ]
+ }
+ ],
"source": [
"# Function to convert to Point if not already a Point\n",
"def convert_to_point(value):\n",
@@ -2262,7 +3360,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
@@ -2328,9 +3426,21 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 34,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Using simple trip based purpose parser, this assumes first activity is 'home'.\n",
+ "If you do not wish to assume this, try setting 'tour_based' = True (default).\n",
+ "\n",
+ "Using freq of 'None' for all trips.\n"
+ ]
+ }
+ ],
"source": [
"population = load_travel_diary(\n",
" trips=legs_geo,\n",
@@ -2344,9 +3454,23 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 35,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Person: 200\n",
+ "{'hzone': 'E02002330', 'age': 38, 'sex': 'female', 'isPassenger': True, 'hasPTSubscription': False, 'isStudent': False, 'hhlIncome': 34167.0, 'CarAvailability': 'yes', 'BicycleAvailability': 'yes', 'hasLicence': True}\n",
+ "0:\tActivity(act:home, location:POINT (-167397.56531315646 7143640.45559949), time:00:00:00 --> 09:00:00, duration:9:00:00)\n",
+ "1:\tLeg(mode:car_passenger, area:POINT (-167397.56531315646 7143640.45559949) --> POINT (-171754.62143938692 7132283.384033667), time:09:00:00 --> 09:15:00, duration:0:15:00)\n",
+ "2:\tActivity(act:shop, location:POINT (-171754.62143938692 7132283.384033667), time:09:15:00 --> 09:45:00, duration:0:30:00)\n",
+ "3:\tLeg(mode:car_passenger, area:POINT (-171754.62143938692 7132283.384033667) --> POINT (-167397.56531315646 7143640.45559949), time:09:45:00 --> 10:00:00, duration:0:15:00)\n",
+ "4:\tActivity(act:home, location:POINT (-167397.56531315646 7143640.45559949), time:10:00:00 --> 00:00:00, duration:14:00:00)\n"
+ ]
+ }
+ ],
"source": [
"population[89][200].print()"
]
From 8d4b017e2b4a12e121c46c47f9d5496e352d9599 Mon Sep 17 00:00:00 2001
From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com>
Date: Fri, 13 Dec 2024 16:42:17 +0100
Subject: [PATCH 6/8] add taxi mode
---
.../2_match_households_and_individuals.ipynb | 346 +++++++++---------
scripts/2_match_households_and_individuals.py | 2 +-
scripts/3.2.2_assign_primary_zone_work.py | 2 +-
scripts/run_pipeline.sh | 6 +-
src/acbm/assigning/select_zone_primary.py | 2 +-
src/acbm/assigning/utils.py | 2 +
6 files changed, 181 insertions(+), 179 deletions(-)
diff --git a/notebooks/2_match_households_and_individuals.ipynb b/notebooks/2_match_households_and_individuals.ipynb
index 94d53a1..50c7578 100644
--- a/notebooks/2_match_households_and_individuals.ipynb
+++ b/notebooks/2_match_households_and_individuals.ipynb
@@ -18,7 +18,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -64,17 +64,17 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# useful variables\n",
- "region = \"west-yorkshire\""
+ "region = \"leeds\""
]
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -144,121 +144,121 @@
" 0 | \n",
" 0 | \n",
" NaN | \n",
- " {'concert_f': 1.2791347489984115e-31, 'concert... | \n",
- " [1583, 13161] | \n",
- " [1582, 13160] | \n",
- " E02002183_0001_001 | \n",
- " 11291218 | \n",
- " 1 | \n",
- " 2905399 | \n",
- " E02002183 | \n",
- " E00053954 | \n",
- " [0] | \n",
- " 24.879356 | \n",
- " False | \n",
+ " {'concert_f': 3.6287833784528047e-16, 'concert... | \n",
+ " [954, 1037, 1234, 2981, 6290, 9535, 10385, 106... | \n",
+ " [955, 1036, 1235, 2980, 6291, 9536, 10384, 106... | \n",
+ " E02002330_0001_001 | \n",
+ " 34051017 | \n",
+ " 1 | \n",
+ " 2911721 | \n",
+ " E02002330 | \n",
+ " E00059034 | \n",
+ " [0, 1] | \n",
+ " 27.506445 | \n",
" False | \n",
" False | \n",
+ " True | \n",
" NaN | \n",
- " 3.0 | \n",
" 2.0 | \n",
- " J | \n",
- " 58.0 | \n",
- " 1115.0 | \n",
+ " 4.0 | \n",
+ " Q | \n",
+ " 88.0 | \n",
+ " 1184.0 | \n",
" 6 | \n",
" NaN | \n",
" NaN | \n",
- " E02002183_0001 | \n",
+ " E02002330_0001 | \n",
" 1.0 | \n",
- " 1.0 | \n",
- " NaN | \n",
" 2.0 | \n",
+ " NaN | \n",
+ " 6.0 | \n",
" True | \n",
" 2.0 | \n",
" 2 | \n",
" 1 | \n",
- " 86 | \n",
+ " 68 | \n",
" 1 | \n",
" 1.0 | \n",
" \n",
" \n",
" 1 | \n",
" 1 | \n",
- " 1 | \n",
+ " 0 | \n",
" NaN | \n",
- " {'concert_f': 9.743248151956307e-21, 'concert_... | \n",
- " [2900, 4948, 4972, 7424, 10284, 10586, 12199, ... | \n",
- " [2901, 4949, 4973, 7425, 10285, 10585, 12198, ... | \n",
- " E02002183_0002_001 | \n",
- " 17291219 | \n",
- " 1 | \n",
- " 2905308 | \n",
- " E02002183 | \n",
- " E00053953 | \n",
- " [1, 2] | \n",
- " 27.491207 | \n",
- " False | \n",
+ " {'concert_f': 9.903925281880971e-14, 'concert_... | \n",
+ " [3435, 6069, 13203, 14704] | \n",
+ " [3436, 6068, 13202, 14703] | \n",
+ " E02002330_0001_002 | \n",
+ " 21040818 | \n",
+ " 1 | \n",
+ " 2904618 | \n",
+ " E02002330 | \n",
+ " E00059034 | \n",
+ " [0, 1] | \n",
+ " 30.527805 | \n",
+ " True | \n",
" False | \n",
" True | \n",
- " NaN | \n",
+ " 7.0 | \n",
+ " 2.0 | \n",
" 3.0 | \n",
- " NaN | \n",
- " C | \n",
- " 25.0 | \n",
- " 1121.0 | \n",
+ " I | \n",
+ " 56.0 | \n",
+ " 5434.0 | \n",
" 6 | \n",
" NaN | \n",
" NaN | \n",
- " E02002183_0002 | \n",
+ " E02002330_0001 | \n",
" 1.0 | \n",
- " 3.0 | \n",
+ " 2.0 | \n",
" NaN | \n",
" 6.0 | \n",
" True | \n",
" 2.0 | \n",
" 2 | \n",
+ " 2 | \n",
+ " 65 | \n",
" 1 | \n",
- " 74 | \n",
- " 3 | \n",
- " 1.0 | \n",
+ " 6.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" 1 | \n",
" NaN | \n",
- " {'concert_f': 8.46716103992468e-16, 'concert_f... | \n",
- " [3010, 6389, 9448, 10184, 11598] | \n",
- " [3011, 6388, 9447, 10183, 11599] | \n",
- " E02002183_0002_002 | \n",
- " 17070713 | \n",
- " 2 | \n",
- " 2907681 | \n",
- " E02002183 | \n",
- " E00053953 | \n",
- " [1, 2] | \n",
- " 17.310829 | \n",
+ " {'concert_f': 1.2791347489984115e-31, 'concert... | \n",
+ " [762, 5168, 6201, 8977] | \n",
+ " [761, 5169, 6200, 8976] | \n",
+ " E02002330_0002_001 | \n",
+ " 11131017 | \n",
+ " 1 | \n",
+ " 2902311 | \n",
+ " E02002330 | \n",
+ " E00059022 | \n",
+ " [2] | \n",
+ " 22.261669 | \n",
+ " False | \n",
+ " False | \n",
" False | \n",
- " True | \n",
- " True | \n",
" NaN | \n",
" 2.0 | \n",
- " 4.0 | \n",
- " P | \n",
- " 85.0 | \n",
- " 2311.0 | \n",
+ " 3.0 | \n",
+ " Q | \n",
+ " 86.0 | \n",
+ " 2211.0 | \n",
" 6 | \n",
" NaN | \n",
" NaN | \n",
- " E02002183_0002 | \n",
+ " E02002330_0002 | \n",
+ " 2.0 | \n",
" 1.0 | \n",
- " 3.0 | \n",
" NaN | \n",
- " 6.0 | \n",
+ " 5.0 | \n",
" True | \n",
- " 2.0 | \n",
- " 2 | \n",
+ " 1.0 | \n",
" 2 | \n",
- " 68 | \n",
+ " 1 | \n",
+ " 86 | \n",
" 1 | \n",
" 2.0 | \n",
"
\n",
@@ -266,40 +266,40 @@
" 3 | \n",
" 3 | \n",
" 2 | \n",
- " 56126.0 | \n",
- " {'concert_f': 1.8844366073608398, 'concert_fs'... | \n",
- " [366, 867, 2096, 3678, 5212, 5450, 8145, 9254,... | \n",
- " [365, 868, 2097, 3677, 5213, 5451, 8146, 9253,... | \n",
- " E02002183_0003_001 | \n",
- " 20310313 | \n",
- " 1 | \n",
- " 2902817 | \n",
- " E02002183 | \n",
- " E00053689 | \n",
- " [3, 4] | \n",
- " 20.852091 | \n",
+ " NaN | \n",
+ " {'concert_f': 7.754311082130982e-10, 'concert_... | \n",
+ " [1580, 5417, 5956, 12901] | \n",
+ " [1581, 5416, 5957, 12900] | \n",
+ " E02002330_0003_001 | \n",
+ " 15020311 | \n",
+ " 1 | \n",
+ " 2911131 | \n",
+ " E02002330 | \n",
+ " E00059022 | \n",
+ " [3, 4, 5] | \n",
+ " 21.434204 | \n",
" False | \n",
" False | \n",
" False | \n",
" NaN | \n",
- " 2.0 | \n",
" 1.0 | \n",
- " C | \n",
- " 31.0 | \n",
- " 3422.0 | \n",
- " 1 | \n",
- " 32857.859375 | \n",
- " 14.360952 | \n",
- " E02002183_0003 | \n",
- " 4.0 | \n",
" 3.0 | \n",
+ " O | \n",
+ " 84.0 | \n",
+ " 3314.0 | \n",
+ " 6 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " E02002330_0003 | \n",
+ " 4.0 | \n",
+ " 1.0 | \n",
" NaN | \n",
" 6.0 | \n",
" True | \n",
" 2.0 | \n",
" 1 | \n",
" 1 | \n",
- " 27 | \n",
+ " 58 | \n",
" 1 | \n",
" 4.0 | \n",
" \n",
@@ -307,42 +307,42 @@
" 4 | \n",
" 4 | \n",
" 2 | \n",
- " NaN | \n",
- " {'concert_f': 4.877435207366943, 'concert_fs':... | \n",
- " [1289, 12528, 12870] | \n",
- " [1288, 12529, 12871] | \n",
- " E02002183_0003_002 | \n",
- " 13010909 | \n",
- " 3 | \n",
- " 2900884 | \n",
- " E02002183 | \n",
- " E00053689 | \n",
- " [3, 4] | \n",
- " 20.032526 | \n",
- " False | \n",
+ " 508.0 | \n",
+ " {'concert_f': 2.1388457227544677e-08, 'concert... | \n",
+ " [318, 3145, 10496, 12819, 13943] | \n",
+ " [319, 3144, 10495, 12818, 13942] | \n",
+ " E02002330_0003_002 | \n",
+ " 20090607 | \n",
+ " 1 | \n",
+ " 2909582 | \n",
+ " E02002330 | \n",
+ " E00059022 | \n",
+ " [3, 4, 5] | \n",
+ " 12.644703 | \n",
" False | \n",
" False | \n",
- " 1.0 | \n",
- " 2.0 | \n",
+ " True | \n",
+ " NaN | \n",
" 3.0 | \n",
- " J | \n",
- " 62.0 | \n",
- " 7214.0 | \n",
+ " 2.0 | \n",
+ " O | \n",
+ " 84.0 | \n",
+ " 1131.0 | \n",
" 1 | \n",
- " 18162.451172 | \n",
- " 9.439944 | \n",
- " E02002183_0003 | \n",
+ " 36646.464844 | \n",
+ " 19.576103 | \n",
+ " E02002330_0003 | \n",
" 4.0 | \n",
- " 3.0 | \n",
+ " 1.0 | \n",
" NaN | \n",
" 6.0 | \n",
" True | \n",
" 2.0 | \n",
" 1 | \n",
" 2 | \n",
- " 26 | \n",
+ " 56 | \n",
" 1 | \n",
- " 6.0 | \n",
+ " 1.0 | \n",
" \n",
" \n",
"\n",
@@ -351,76 +351,76 @@
"text/plain": [
" id household workplace \\\n",
"0 0 0 NaN \n",
- "1 1 1 NaN \n",
+ "1 1 0 NaN \n",
"2 2 1 NaN \n",
- "3 3 2 56126.0 \n",
- "4 4 2 NaN \n",
+ "3 3 2 NaN \n",
+ "4 4 2 508.0 \n",
"\n",
" events \\\n",
- "0 {'concert_f': 1.2791347489984115e-31, 'concert... \n",
- "1 {'concert_f': 9.743248151956307e-21, 'concert_... \n",
- "2 {'concert_f': 8.46716103992468e-16, 'concert_f... \n",
- "3 {'concert_f': 1.8844366073608398, 'concert_fs'... \n",
- "4 {'concert_f': 4.877435207366943, 'concert_fs':... \n",
+ "0 {'concert_f': 3.6287833784528047e-16, 'concert... \n",
+ "1 {'concert_f': 9.903925281880971e-14, 'concert_... \n",
+ "2 {'concert_f': 1.2791347489984115e-31, 'concert... \n",
+ "3 {'concert_f': 7.754311082130982e-10, 'concert_... \n",
+ "4 {'concert_f': 2.1388457227544677e-08, 'concert... \n",
"\n",
" weekday_diaries \\\n",
- "0 [1583, 13161] \n",
- "1 [2900, 4948, 4972, 7424, 10284, 10586, 12199, ... \n",
- "2 [3010, 6389, 9448, 10184, 11598] \n",
- "3 [366, 867, 2096, 3678, 5212, 5450, 8145, 9254,... \n",
- "4 [1289, 12528, 12870] \n",
+ "0 [954, 1037, 1234, 2981, 6290, 9535, 10385, 106... \n",
+ "1 [3435, 6069, 13203, 14704] \n",
+ "2 [762, 5168, 6201, 8977] \n",
+ "3 [1580, 5417, 5956, 12901] \n",
+ "4 [318, 3145, 10496, 12819, 13943] \n",
"\n",
" weekend_diaries orig_pid \\\n",
- "0 [1582, 13160] E02002183_0001_001 \n",
- "1 [2901, 4949, 4973, 7425, 10285, 10585, 12198, ... E02002183_0002_001 \n",
- "2 [3011, 6388, 9447, 10183, 11599] E02002183_0002_002 \n",
- "3 [365, 868, 2097, 3677, 5213, 5451, 8146, 9253,... E02002183_0003_001 \n",
- "4 [1288, 12529, 12871] E02002183_0003_002 \n",
+ "0 [955, 1036, 1235, 2980, 6291, 9536, 10384, 106... E02002330_0001_001 \n",
+ "1 [3436, 6068, 13202, 14703] E02002330_0001_002 \n",
+ "2 [761, 5169, 6200, 8976] E02002330_0002_001 \n",
+ "3 [1581, 5416, 5957, 12900] E02002330_0003_001 \n",
+ "4 [319, 3144, 10495, 12818, 13942] E02002330_0003_002 \n",
"\n",
- " id_tus_hh id_tus_p pid_hs msoa11cd oa11cd members bmi \\\n",
- "0 11291218 1 2905399 E02002183 E00053954 [0] 24.879356 \n",
- "1 17291219 1 2905308 E02002183 E00053953 [1, 2] 27.491207 \n",
- "2 17070713 2 2907681 E02002183 E00053953 [1, 2] 17.310829 \n",
- "3 20310313 1 2902817 E02002183 E00053689 [3, 4] 20.852091 \n",
- "4 13010909 3 2900884 E02002183 E00053689 [3, 4] 20.032526 \n",
+ " id_tus_hh id_tus_p pid_hs msoa11cd oa11cd members bmi \\\n",
+ "0 34051017 1 2911721 E02002330 E00059034 [0, 1] 27.506445 \n",
+ "1 21040818 1 2904618 E02002330 E00059034 [0, 1] 30.527805 \n",
+ "2 11131017 1 2902311 E02002330 E00059022 [2] 22.261669 \n",
+ "3 15020311 1 2911131 E02002330 E00059022 [3, 4, 5] 21.434204 \n",
+ "4 20090607 1 2909582 E02002330 E00059022 [3, 4, 5] 12.644703 \n",
"\n",
" has_cardiovascular_disease has_diabetes has_high_blood_pressure \\\n",
- "0 False False False \n",
- "1 False False True \n",
- "2 False True True \n",
+ "0 False False True \n",
+ "1 True False True \n",
+ "2 False False False \n",
"3 False False False \n",
- "4 False False False \n",
+ "4 False False True \n",
"\n",
" number_medications self_assessed_health life_satisfaction sic1d2007 \\\n",
- "0 NaN 3.0 2.0 J \n",
- "1 NaN 3.0 NaN C \n",
- "2 NaN 2.0 4.0 P \n",
- "3 NaN 2.0 1.0 C \n",
- "4 1.0 2.0 3.0 J \n",
+ "0 NaN 2.0 4.0 Q \n",
+ "1 7.0 2.0 3.0 I \n",
+ "2 NaN 2.0 3.0 Q \n",
+ "3 NaN 1.0 3.0 O \n",
+ "4 NaN 3.0 2.0 O \n",
"\n",
" sic2d2007 soc2010 pwkstat salary_yearly salary_hourly hid \\\n",
- "0 58.0 1115.0 6 NaN NaN E02002183_0001 \n",
- "1 25.0 1121.0 6 NaN NaN E02002183_0002 \n",
- "2 85.0 2311.0 6 NaN NaN E02002183_0002 \n",
- "3 31.0 3422.0 1 32857.859375 14.360952 E02002183_0003 \n",
- "4 62.0 7214.0 1 18162.451172 9.439944 E02002183_0003 \n",
+ "0 88.0 1184.0 6 NaN NaN E02002330_0001 \n",
+ "1 56.0 5434.0 6 NaN NaN E02002330_0001 \n",
+ "2 86.0 2211.0 6 NaN NaN E02002330_0002 \n",
+ "3 84.0 3314.0 6 NaN NaN E02002330_0003 \n",
+ "4 84.0 1131.0 1 36646.464844 19.576103 E02002330_0003 \n",
"\n",
" nssec8 accommodation_type communal_type num_rooms central_heat tenure \\\n",
- "0 1.0 1.0 NaN 2.0 True 2.0 \n",
- "1 1.0 3.0 NaN 6.0 True 2.0 \n",
- "2 1.0 3.0 NaN 6.0 True 2.0 \n",
- "3 4.0 3.0 NaN 6.0 True 2.0 \n",
- "4 4.0 3.0 NaN 6.0 True 2.0 \n",
+ "0 1.0 2.0 NaN 6.0 True 2.0 \n",
+ "1 1.0 2.0 NaN 6.0 True 2.0 \n",
+ "2 2.0 1.0 NaN 5.0 True 1.0 \n",
+ "3 4.0 1.0 NaN 6.0 True 2.0 \n",
+ "4 4.0 1.0 NaN 6.0 True 2.0 \n",
"\n",
" num_cars sex age_years ethnicity nssec8_household \n",
- "0 2 1 86 1 1.0 \n",
- "1 2 1 74 3 1.0 \n",
- "2 2 2 68 1 2.0 \n",
- "3 1 1 27 1 4.0 \n",
- "4 1 2 26 1 6.0 "
+ "0 2 1 68 1 1.0 \n",
+ "1 2 2 65 1 6.0 \n",
+ "2 2 1 86 1 2.0 \n",
+ "3 1 1 58 1 4.0 \n",
+ "4 1 2 56 1 1.0 "
]
},
- "execution_count": 3,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -433,7 +433,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
@@ -447,7 +447,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -470,7 +470,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
@@ -487,7 +487,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
@@ -539,7 +539,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
@@ -587,7 +587,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -635,11 +635,11 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "years = [2018, 2019, 2021, 2022]\n",
+ "years = [2019, 2021, 2022]\n",
"\n",
"nts_individuals = nts_filter_by_year(nts_individuals, psu, years)\n",
"nts_households = nts_filter_by_year(nts_households, psu, years)\n",
diff --git a/scripts/2_match_households_and_individuals.py b/scripts/2_match_households_and_individuals.py
index 476d429..864a999 100644
--- a/scripts/2_match_households_and_individuals.py
+++ b/scripts/2_match_households_and_individuals.py
@@ -1040,7 +1040,7 @@ def get_interim_path(
9: "pt", #'Non-local bus',
10: "pt", #'London Underground',
11: "pt", #'Surface Rail',
- 12: "car_passenger", #'Taxi/minicab',
+ 12: "taxi", #'Taxi/minicab',
13: "pt", #'Other public transport',
-10: "DEAD",
-8: "NA",
diff --git a/scripts/3.2.2_assign_primary_zone_work.py b/scripts/3.2.2_assign_primary_zone_work.py
index 66ad872..47b964f 100644
--- a/scripts/3.2.2_assign_primary_zone_work.py
+++ b/scripts/3.2.2_assign_primary_zone_work.py
@@ -101,7 +101,7 @@ def main(config_file):
"Train": "pt",
"Underground, metro, light rail, tram": "pt",
"On foot": "walk",
- "Taxi": "car_passenger",
+ "Taxi": "taxi",
"Other method of travel to work": "other",
"Bicycle": "cycle",
"Passenger in a car or van": "car_passenger",
diff --git a/scripts/run_pipeline.sh b/scripts/run_pipeline.sh
index 025ed8c..9da0845 100755
--- a/scripts/run_pipeline.sh
+++ b/scripts/run_pipeline.sh
@@ -2,9 +2,9 @@
set -e
-python scripts/0_preprocess_inputs.py --config_file $1
-python scripts/0.1_run_osmox.py --config_file $1
-python scripts/1_prep_synthpop.py --config_file $1
+# python scripts/0_preprocess_inputs.py --config_file $1
+# python scripts/0.1_run_osmox.py --config_file $1
+# python scripts/1_prep_synthpop.py --config_file $1
python scripts/2_match_households_and_individuals.py --config_file $1
python scripts/3.1_assign_primary_feasible_zones.py --config_file $1
python scripts/3.2.1_assign_primary_zone_edu.py --config_file $1
diff --git a/src/acbm/assigning/select_zone_primary.py b/src/acbm/assigning/select_zone_primary.py
index 7b63435..e2b5877 100644
--- a/src/acbm/assigning/select_zone_primary.py
+++ b/src/acbm/assigning/select_zone_primary.py
@@ -241,7 +241,7 @@ def _get_zones_using_time_estimate(
The zone that has the estimated time closest to the given time.
"""
- acceptable_modes = ["car", "car_passenger", "pt", "walk", "cycle"]
+ acceptable_modes = ["car", "car_passenger", "pt", "walk", "cycle", "taxi"]
if mode is not None and mode not in acceptable_modes:
error_message = f"Invalid mode: {mode}. Mode must be one of {acceptable_modes}."
diff --git a/src/acbm/assigning/utils.py b/src/acbm/assigning/utils.py
index 2f6caaf..27d9ce7 100644
--- a/src/acbm/assigning/utils.py
+++ b/src/acbm/assigning/utils.py
@@ -334,6 +334,7 @@ def zones_to_time_matrix(
mode_speeds_mps = {
"car": 20 * 1000 / 3600,
"car_passenger": 20 * 1000 / 3600,
+ "taxi": 20 * 1000 / 3600,
"pt": 15 * 1000 / 3600,
"cycle": 15 * 1000 / 3600,
"walk": 5 * 1000 / 3600,
@@ -452,6 +453,7 @@ def intrazone_time(zones: gpd.GeoDataFrame, key_column: str) -> dict:
mode_speeds_mps = {
"car": 20 * 1000 / 3600,
"car_passenger": 20 * 1000 / 3600,
+ "taxi": 20 * 1000 / 3600,
"pt": 15 * 1000 / 3600,
"cycle": 15 * 1000 / 3600,
"walk": 5 * 1000 / 3600,
From e7b375c0cf27bd272dddf9d1376ab67c14212406 Mon Sep 17 00:00:00 2001
From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com>
Date: Fri, 13 Dec 2024 16:46:44 +0100
Subject: [PATCH 7/8] edit optional type
---
src/acbm/postprocessing/matsim.py | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/src/acbm/postprocessing/matsim.py b/src/acbm/postprocessing/matsim.py
index b8d67c2..fd99b4c 100644
--- a/src/acbm/postprocessing/matsim.py
+++ b/src/acbm/postprocessing/matsim.py
@@ -1,5 +1,3 @@
-from typing import Optional
-
import numpy as np
import pandas as pd
@@ -282,8 +280,8 @@ def get_pt_subscription(individuals: pd.DataFrame, age_threshold=60):
def get_students(
individuals: pd.DataFrame,
activities: pd.DataFrame,
- age_base_threshold: Optional[int] = None,
- age_upper_threshold: Optional[int] = None,
+ age_base_threshold: int | None = None,
+ age_upper_threshold: int | None = None,
activity: str = "education",
) -> pd.DataFrame:
"""
From 4cde9a3982a070ee9e0016cc01b6251eb3aabd65 Mon Sep 17 00:00:00 2001
From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com>
Date: Fri, 13 Dec 2024 16:54:46 +0100
Subject: [PATCH 8/8] update config template
---
config/README.md | 63 ++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 53 insertions(+), 10 deletions(-)
diff --git a/config/README.md b/config/README.md
index c1bf75e..8794cb9 100644
--- a/config/README.md
+++ b/config/README.md
@@ -3,31 +3,74 @@ The config.toml file has an explanation for each parameter. You can copy the tom
``` toml
[parameters]
seed = 0
-region = "leeds" # this is used to query poi data from osm and to load in SPC data
-number_of_households = 5000 # how many people from the SPC do we want to run the model for? Comment out if you want to run the analysis on the entire SPC populaiton
-zone_id = "OA21CD" # "OA21CD": OA level, "MSOA11CD": MSOA level
-travel_times = true # Only set to true if you have travel time matrix at the level specified in boundary_geography
-boundary_geography = "OA"
+ # this is used to query poi data from osm and to load in SPC data
+region = "leeds"
+# how many people from the SPC do we want to run the model for? Comment out if you want to run the analysis on the entire SPC populaiton
+number_of_households = 2500
+# "OA21CD": OA level, "MSOA11CD": MSOA level
+zone_id = "MSOA21CD"
+ # Only set to true if you have travel time matrix at the level specified in boundary_geography
+travel_times = false
+boundary_geography = "MSOA"
+# NTS years to use
+nts_years = [2019, 2021, 2022]
+# NTS regions to use
+nts_regions = [
+ 'Yorkshire and the Humber',
+ 'North West',
+ 'North East',
+ 'East Midlands',
+ 'West Midlands',
+ 'East of England',
+ 'South East',
+ 'South West']
+# nts day of the week to use
+# 1: Monday, 2: Tuesday, 3: Wednesday, 4: Thursday, 5: Friday, 6: Saturday, 7: Sunday
+nts_day_of_week = 3
+# what crs do we want the output to be in? (just add the number, e.g. 3857)
+output_crs = 3857
[matching]
# for optional and required columns, see the [iterative_match_categorical](https://github.com/Urban-Analytics-Technology-Platform/acbm/blob/ca181c54d7484ebe44706ff4b43c26286b22aceb/src/acbm/matching.py#L110) function
# Do not add any column not listed below. You can only move a column from optional to require (or vise versa)
-required_columns = ["number_adults", "number_children"]
+required_columns = [
+ "number_adults",
+ "number_children",
+ "num_pension_age",
+]
optional_columns = [
"number_cars",
- "num_pension_age",
"rural_urban_2_categories",
"employment_status",
"tenure_status",
]
-n_matches = 10 # What is the maximum number of NTS matches we want for each SPC household?
+# What is the maximum number of NTS matches we want for each SPC household?
+n_matches = 10
[work_assignment]
-use_percentages = true # if true, optimization problem will try to minimize percentage difference at OD level (not absolute numbers). Recommended to set it to true
+commute_level = "MSOA"
+# if true, optimization problem will try to minimize percentage difference at OD level (not absolute numbers). Recommended to set it to true
+use_percentages = true
# weights to add for each objective in the optimization problem
weight_max_dev = 0.2
weight_total_dev = 0.8
-max_zones = 8 # maximum number of feasible zones to include in the optimization problem (less zones makes problem smaller - so faster, but at the cost of a better solution)
+# maximum number of feasible zones to include in the optimization problem (less zones makes problem smaller - so faster, but at the cost of a better solution)
+max_zones = 10
+[postprocessing]
+pam_jitter = 30
+pam_min_duration = 10
+# for get_pt_subscription: everyone above this age has a subscription (pensioners get free travel)
+# TODO: more sophisticated approach
+pt_subscription_age = 66
+# to define if a person is a student:
+# eveyone below this age is a student
+student_age_base = 16
+# everyone below this age that has at least one "education" activity is a student
+student_age_upper = 30
+# eveyone who uses one of the modes below is classified as a passenger (isPassenger = True)
+modes_passenger = ['car_passenger', 'taxi']
+# yearly state pension: for getting hhlIncome of pensioners
+state_pension = 11502
```