From 1e61347fe2a93267bd11bde48e714f38c4d3df1c Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Tue, 3 Dec 2024 16:04:59 +0100 Subject: [PATCH 1/8] add person attributes --- notebooks/6_acbm_to_matsim_xml.ipynb | 1025 +++++++++++++++++++++++++- scripts/5_acbm_to_matsim_xml.py | 42 ++ src/acbm/config.py | 4 + src/acbm/postprocessing/matsim.py | 120 +++ 4 files changed, 1187 insertions(+), 4 deletions(-) diff --git a/notebooks/6_acbm_to_matsim_xml.ipynb b/notebooks/6_acbm_to_matsim_xml.ipynb index 7e3f2c0..c2d8bd9 100644 --- a/notebooks/6_acbm_to_matsim_xml.ipynb +++ b/notebooks/6_acbm_to_matsim_xml.ipynb @@ -11,8 +11,21 @@ "import acbm\n", "from pam.read import load_travel_diary\n", "from pam import write\n", + "from typing import Optional\n", "\n", - "from shapely import wkt, Point\n" + "\n", + "from shapely import wkt, Point\n", + "\n", + "from acbm.postprocessing.matsim import (\n", + " # add_home_location_to_individuals,\n", + " # calculate_percentage_remaining,\n", + " # filter_by_pid,\n", + " # filter_no_location,\n", + " # log_row_count,\n", + " get_passengers,\n", + " get_pt_subscription,\n", + " get_students,\n", + ")\n" ] }, { @@ -32,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 55, "metadata": {}, "outputs": [], "source": [ @@ -44,13 +57,299 @@ "legs_geo = pd.read_parquet(acbm.root_path / \"data/processed/activities_pam/legs_with_locations.parquet\")" ] }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pidhidfreqhzoneage_years
019989NaNE0200233039
120089NaNE0200233038
2312139NaNE0200233064
3610283NaNE0200233067
4611283NaNE0200233064
\n", + "
" + ], + "text/plain": [ + " pid hid freq hzone age_years\n", + "0 199 89 NaN E02002330 39\n", + "1 200 89 NaN E02002330 38\n", + "2 312 139 NaN E02002330 64\n", + "3 610 283 NaN E02002330 67\n", + "4 611 283 NaN E02002330 64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "individuals.head(5)" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "activities.head(5)" + " # rename age_years to age in individuals\n", + "individuals.rename(columns={\"age_years\": \"age\"}, inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idhouseholdage_yearsOA11CDnts_hh_idnts_ind_ideducation_typeTripIDTravDayseqmodeoactdacttsttetTripDisIncSWTripTotalTimeIndividualID
01998939E000590312.019001e+092019001326education_university2.019018e+093.01.0carhomeshop540.0555.02.015.02.019001e+09
11998939E000590312.019001e+092019001326education_university2.019018e+094.01.0car_passengerhomeother450.0480.020.030.02.019001e+09
21998939E000590312.019001e+092019001326education_university2.019018e+092.02.0carworkhome960.01020.025.060.02.019001e+09
31998939E000590312.019001e+092019001326education_university2.019018e+094.03.0carhomeshop960.0975.02.015.02.019001e+09
41998939E000590312.019001e+092019001326education_university2.019018e+097.02.0carworkhome1005.01065.025.060.02.019001e+09
\n", + "
" + ], + "text/plain": [ + " id household age_years OA11CD nts_hh_id nts_ind_id \\\n", + "0 199 89 39 E00059031 2.019001e+09 2019001326 \n", + "1 199 89 39 E00059031 2.019001e+09 2019001326 \n", + "2 199 89 39 E00059031 2.019001e+09 2019001326 \n", + "3 199 89 39 E00059031 2.019001e+09 2019001326 \n", + "4 199 89 39 E00059031 2.019001e+09 2019001326 \n", + "\n", + " education_type TripID TravDay seq mode oact \\\n", + "0 education_university 2.019018e+09 3.0 1.0 car home \n", + "1 education_university 2.019018e+09 4.0 1.0 car_passenger home \n", + "2 education_university 2.019018e+09 2.0 2.0 car work \n", + "3 education_university 2.019018e+09 4.0 3.0 car home \n", + "4 education_university 2.019018e+09 7.0 2.0 car work \n", + "\n", + " dact tst tet TripDisIncSW TripTotalTime IndividualID \n", + "0 shop 540.0 555.0 2.0 15.0 2.019001e+09 \n", + "1 other 450.0 480.0 20.0 30.0 2.019001e+09 \n", + "2 home 960.0 1020.0 25.0 60.0 2.019001e+09 \n", + "3 shop 960.0 975.0 2.0 15.0 2.019001e+09 \n", + "4 home 1005.0 1065.0 25.0 60.0 2.019001e+09 " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "spc_with_nts = pd.read_parquet(acbm.root_path / \"data/interim/matching/spc_with_nts_trips.parquet\")\n", + "spc_with_nts.head(5)" ] }, { @@ -58,8 +357,726 @@ "execution_count": null, "metadata": {}, "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idsex
00male
11female
22male
33male
44female
\n", + "
" + ], + "text/plain": [ + " id sex\n", + "0 0 male\n", + "1 1 female\n", + "2 2 male\n", + "3 3 male\n", + "4 4 female" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# add sex column to individuals\n", + "\n", + "spc = pd.read_parquet(acbm.root_path / \"data/external/spc_output/leeds_people_hh.parquet\", \n", + " columns=[\"id\", \"sex\"])\n", + "spc.head(5)\n", + "\n", + "# change spc[\"sex\"] column: 1 = male, 2 = female\n", + "\n", + "spc[\"sex\"] = spc[\"sex\"].map({1:'male',\n", + " 2: 'female'})\n", + "\n", + "spc.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pidhidfreqhzoneage_yearsisStudentsex
019989NaNE0200233039Falsemale
120089NaNE0200233038Falsefemale
2312139NaNE0200233064Falsefemale
3610283NaNE0200233067Falsefemale
4611283NaNE0200233064Falsemale
\n", + "
" + ], + "text/plain": [ + " pid hid freq hzone age_years isStudent sex\n", + "0 199 89 NaN E02002330 39 False male\n", + "1 200 89 NaN E02002330 38 False female\n", + "2 312 139 NaN E02002330 64 False female\n", + "3 610 283 NaN E02002330 67 False female\n", + "4 611 283 NaN E02002330 64 False male" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "individuals = individuals.merge(spc, left_on=\"pid\", right_on=\"id\", how=\"left\")\n", + "individuals = individuals.drop(columns=\"id\")\n", + "individuals.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pidhidfreqhzoneage_yearsisPassenger
019989NaNE0200233039False
120089NaNE0200233038True
2312139NaNE0200233064False
3610283NaNE0200233067True
4611283NaNE0200233064False
5612283NaNE020023308True
6613283NaNE020023302True
72016892NaNE0200233049True
82017892NaNE0200233048False
92018892NaNE020023309True
\n", + "
" + ], + "text/plain": [ + " pid hid freq hzone age_years isPassenger\n", + "0 199 89 NaN E02002330 39 False\n", + "1 200 89 NaN E02002330 38 True\n", + "2 312 139 NaN E02002330 64 False\n", + "3 610 283 NaN E02002330 67 True\n", + "4 611 283 NaN E02002330 64 False\n", + "5 612 283 NaN E02002330 8 True\n", + "6 613 283 NaN E02002330 2 True\n", + "7 2016 892 NaN E02002330 49 True\n", + "8 2017 892 NaN E02002330 48 False\n", + "9 2018 892 NaN E02002330 9 True" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "individuals = get_passengers(\n", + " legs = legs, \n", + " individuals = individuals, \n", + " modes = ['car_passenger', 'taxi'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pidhidfreqhzoneage_yearsisPassengerhasPTSubscription
019989NaNE0200233039FalseFalse
120089NaNE0200233038TrueFalse
2312139NaNE0200233064FalseTrue
3610283NaNE0200233067TrueTrue
4611283NaNE0200233064FalseTrue
5612283NaNE020023308TrueFalse
6613283NaNE020023302TrueFalse
72016892NaNE0200233049TrueFalse
82017892NaNE0200233048FalseFalse
92018892NaNE020023309TrueFalse
\n", + "
" + ], + "text/plain": [ + " pid hid freq hzone age_years isPassenger hasPTSubscription\n", + "0 199 89 NaN E02002330 39 False False\n", + "1 200 89 NaN E02002330 38 True False\n", + "2 312 139 NaN E02002330 64 False True\n", + "3 610 283 NaN E02002330 67 True True\n", + "4 611 283 NaN E02002330 64 False True\n", + "5 612 283 NaN E02002330 8 True False\n", + "6 613 283 NaN E02002330 2 True False\n", + "7 2016 892 NaN E02002330 49 True False\n", + "8 2017 892 NaN E02002330 48 False False\n", + "9 2018 892 NaN E02002330 9 True False" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "individuals = get_pt_subscription(individuals = individuals, age_threshold = 66)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pidhidfreqhzoneage_yearsisStudent
019989NaNE0200233039False
120089NaNE0200233038False
2312139NaNE0200233064False
3610283NaNE0200233067False
4611283NaNE0200233064False
.....................
952340710597NaNE0200233313True
962484411334NaNE0200233316False
972543211574NaNE0200233360False
982547411594NaNE0200233330False
992547511594NaNE0200233320False
\n", + "

100 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " pid hid freq hzone age_years isStudent\n", + "0 199 89 NaN E02002330 39 False\n", + "1 200 89 NaN E02002330 38 False\n", + "2 312 139 NaN E02002330 64 False\n", + "3 610 283 NaN E02002330 67 False\n", + "4 611 283 NaN E02002330 64 False\n", + ".. ... ... ... ... ... ...\n", + "95 23407 10597 NaN E02002333 13 True\n", + "96 24844 11334 NaN E02002333 16 False\n", + "97 25432 11574 NaN E02002333 60 False\n", + "98 25474 11594 NaN E02002333 30 False\n", + "99 25475 11594 NaN E02002333 20 False\n", + "\n", + "[100 rows x 6 columns]" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "individuals = get_students(\n", + " individuals = individuals,\n", + " activities = activities,\n", + " age_base_threshold = 16,\n", + " #age_upper_threshold = 30,\n", + " activity = 'education')\n", + "\n", + "individuals.head(10)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "legs_geo.head(5)\n" + "# histogram of x by age and isStudent \n", + "individuals.groupby(['age', 'isStudent']).size().unstack().plot(kind='bar', stacked=True)\n" ] }, { diff --git a/scripts/5_acbm_to_matsim_xml.py b/scripts/5_acbm_to_matsim_xml.py index 2c63170..3c96e20 100644 --- a/scripts/5_acbm_to_matsim_xml.py +++ b/scripts/5_acbm_to_matsim_xml.py @@ -16,6 +16,9 @@ calculate_percentage_remaining, filter_by_pid, filter_no_location, + get_passengers, + get_pt_subscription, + get_students, log_row_count, ) @@ -50,6 +53,45 @@ def main(config_file): # rename age_years to age in individuals individuals.rename(columns={"age_years": "age"}, inplace=True) + # ----- Add some person attributes to the individuals dataframe + + # sex + + # get sex column from spc + # TODO: add sex column upstream in the beginning of the pipeline + spc = pd.read_parquet( + acbm.root_path / f"data/external/spc_output/{config.region}_people_hh.parquet", + columns=["id", "sex"], + ) + spc.head(5) + + # change spc["sex"] column: 1 = male, 2 = female + spc["sex"] = spc["sex"].map({1: "male", 2: "female"}) + # merge it on + individuals = individuals.merge(spc, left_on="pid", right_on="id", how="left") + individuals = individuals.drop(columns="id") + + # isStudent + + individuals = get_students( + individuals=individuals, + activities=activities, + age_base_threshold=config.postprocessing.student_age_base, + # age_upper_threshold = config.postprocessing.student_age_upper,, + activity="education", + ) + + # isPassenger + individuals = get_passengers( + legs=legs, individuals=individuals, modes=config.postprocessing.modes_passenger + ) + + # hasPTsubscription + + individuals = get_pt_subscription( + individuals=individuals, age_threshold=config.postprocessing.pt_subscription_age + ) + # We will be removing some rows in each planning operation. This function helps keep a # record of the number of rows in each table after each operation. diff --git a/src/acbm/config.py b/src/acbm/config.py index bcb8bee..980e881 100644 --- a/src/acbm/config.py +++ b/src/acbm/config.py @@ -42,6 +42,10 @@ class WorkAssignmentParams(BaseModel): class Postprocessing(BaseModel): pam_jitter: int pam_min_duration: int + student_age_base: int + student_age_upper: int + modes_passenger: list[str] + pt_subscription_age: int class Config(BaseModel): diff --git a/src/acbm/postprocessing/matsim.py b/src/acbm/postprocessing/matsim.py index cd414a0..458d7aa 100644 --- a/src/acbm/postprocessing/matsim.py +++ b/src/acbm/postprocessing/matsim.py @@ -1,3 +1,5 @@ +from typing import Optional + import pandas as pd @@ -220,3 +222,121 @@ def calculate_percentage_remaining( percentage_remaining.sort(key=lambda x: x[1]) return percentage_remaining + + +# FUNCTIONS TO ADD ATTRIBUTES TO INDIVIDUALS + + +def get_passengers( + legs: pd.DataFrame, individuals: pd.DataFrame, modes: list +) -> pd.DataFrame: + """ + Marks individuals as (car) passengers based on the mode of transportation in the legs DataFrame. + + Parameters + ---------- + legs : pd.DataFrame + DataFrame containing legs data with info on an activity leg. Needs a 'mode' column and a 'pid' column. + individuals : pd.DataFrame + DataFrame containing individual data with a 'pid' column. + modes : list + List of passenger modes. + + Returns + ------- + pd.DataFrame + Updated individuals DataFrame with an 'isPassenger' boolean column. + """ + # Get a list of unique pids where mode matches the chosen list of modes + passenger_pids = legs[legs["mode"].isin(modes)]["pid"].unique() + + # Add a boolean column 'isPassenger' to the individuals DataFrame + individuals["isPassenger"] = individuals["pid"].isin(passenger_pids) + + return individuals + + +def get_pt_subscription(individuals: pd.DataFrame, age_threshold=60): + """ + Marks individuals as having a public transport subscription based on their age. + + Parameters + ---------- + individuals : pd.DataFrame + DataFrame containing individual data with an 'age' column. + age_threshold : int + Age threshold for public transport subscription. (normally the pension age) + + Returns + ------- + pd.DataFrame + Updated individuals DataFrame with an 'hasPTSubscription' boolean column. + """ + # Add a boolean column 'hasPTSubscription' to the individuals DataFrame + individuals["hasPTSubscription"] = individuals["age"] >= age_threshold + + return individuals + + +def get_students( + individuals: pd.DataFrame, + activities: pd.DataFrame, + age_base_threshold: Optional[int] = None, + age_upper_threshold: Optional[int] = None, + activity: str = "education", +) -> pd.DataFrame: + """ + Marks individuals as students based on whether they have an education activity, + and optionally whether they are also below certain age thresholds. + + Parameters + ---------- + individuals : pd.DataFrame + DataFrame containing individual data with a 'pid' column. + activities : pd.DataFrame + DataFrame containing activity data with a 'pid' column. + age_base_threshold : Optional[int] + If specified, anyone below this age is automatically a student + age_upper_threshold : Optional[int] + If specified, this is the age limit for people to be a student. If someone has an education + trip but is above this threshold, they are not a student + activity : str, optional + Activity type to consider for being a student. Default is 'education'. + + Returns + ------- + pd.DataFrame + Updated individuals DataFrame with an 'isStudent' boolean column. + """ + + # Get a list of unique pids where the activity is 'education' + education_pids = activities[activities["activity"] == activity]["pid"].unique() + + if age_base_threshold is not None: + # Everyone below age_base_threshold should be assigned to student + base_students = individuals[individuals["age"] < age_base_threshold][ + "pid" + ].unique() + # Everyone below age_upper_threshold who has an education trip should also be a student + if age_upper_threshold is not None: + upper_students = individuals[ + (individuals["age"] < age_upper_threshold) + & (individuals["pid"].isin(education_pids)) + ]["pid"].unique() + student_pids = set(base_students).union(set(upper_students)) + else: + student_pids = set(base_students) + elif age_upper_threshold is not None: + # Everyone below age_upper_threshold who has an education trip should be a student + student_pids = individuals[ + (individuals["age"] < age_upper_threshold) + & (individuals["pid"].isin(education_pids)) + ]["pid"].unique() + else: + # Only people with an education trip should be students + student_pids = education_pids + + # Add a boolean column 'isStudent' to the individuals DataFrame + individuals["isStudent"] = individuals["pid"].isin(student_pids) + + return individuals From b497521ab4f3bb5b22c7abacd7982b58b87908b3 Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Tue, 3 Dec 2024 17:30:11 +0100 Subject: [PATCH 2/8] add household income --- notebooks/6_acbm_to_matsim_xml.ipynb | 1278 ++++++++------------------ scripts/5_acbm_to_matsim_xml.py | 19 +- src/acbm/config.py | 1 + src/acbm/postprocessing/matsim.py | 63 ++ 4 files changed, 484 insertions(+), 877 deletions(-) diff --git a/notebooks/6_acbm_to_matsim_xml.ipynb b/notebooks/6_acbm_to_matsim_xml.ipynb index c2d8bd9..47c2f8b 100644 --- a/notebooks/6_acbm_to_matsim_xml.ipynb +++ b/notebooks/6_acbm_to_matsim_xml.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -11,8 +11,6 @@ "import acbm\n", "from pam.read import load_travel_diary\n", "from pam import write\n", - "from typing import Optional\n", - "\n", "\n", "from shapely import wkt, Point\n", "\n", @@ -25,6 +23,7 @@ " get_passengers,\n", " get_pt_subscription,\n", " get_students,\n", + " get_hhlIncome,\n", ")\n" ] }, @@ -45,7 +44,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -59,92 +58,16 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
pidhidfreqhzoneage_years
019989NaNE0200233039
120089NaNE0200233038
2312139NaNE0200233064
3610283NaNE0200233067
4611283NaNE0200233064
\n", - "
" - ], "text/plain": [ - " pid hid freq hzone age_years\n", - "0 199 89 NaN E02002330 39\n", - "1 200 89 NaN E02002330 38\n", - "2 312 139 NaN E02002330 64\n", - "3 610 283 NaN E02002330 67\n", - "4 611 283 NaN E02002330 64" + "1709" ] }, - "execution_count": 15, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -165,7 +88,17 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "spc_with_nts = pd.read_parquet(acbm.root_path / \"data/interim/matching/spc_with_nts_trips.parquet\")\n", + "spc_with_nts.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -191,165 +124,400 @@ " \n", " id\n", " household\n", + " workplace\n", + " events\n", + " weekday_diaries\n", + " weekend_diaries\n", + " orig_pid\n", + " id_tus_hh\n", + " id_tus_p\n", + " pid_hs\n", + " ...\n", + " accommodation_type\n", + " communal_type\n", + " num_rooms\n", + " central_heat\n", + " tenure\n", + " num_cars\n", + " sex\n", " age_years\n", - " OA11CD\n", - " nts_hh_id\n", - " nts_ind_id\n", - " education_type\n", - " TripID\n", - " TravDay\n", - " seq\n", - " mode\n", - " oact\n", - " dact\n", - " tst\n", - " tet\n", - " TripDisIncSW\n", - " TripTotalTime\n", - " IndividualID\n", + " ethnicity\n", + " nssec8_household\n", " \n", " \n", " \n", " \n", " 0\n", - " 199\n", - " 89\n", - " 39\n", - " E00059031\n", - " 2.019001e+09\n", - " 2019001326\n", - " education_university\n", - " 2.019018e+09\n", - " 3.0\n", - " 1.0\n", - " car\n", - " home\n", - " shop\n", - " 540.0\n", - " 555.0\n", + " 0\n", + " 0\n", + " NaN\n", + " {'concert_f': 3.6287833784528047e-16, 'concert...\n", + " [954, 1037, 1234, 2981, 6290, 9535, 10385, 106...\n", + " [955, 1036, 1235, 2980, 6291, 9536, 10384, 106...\n", + " E02002330_0001_001\n", + " 34051017\n", + " 1\n", + " 2911721\n", + " ...\n", + " 2.0\n", + " NaN\n", + " 6.0\n", + " True\n", " 2.0\n", - " 15.0\n", - " 2.019001e+09\n", + " 2\n", + " 1\n", + " 68\n", + " 1\n", + " 1.0\n", " \n", " \n", " 1\n", - " 199\n", - " 89\n", - " 39\n", - " E00059031\n", - " 2.019001e+09\n", - " 2019001326\n", - " education_university\n", - " 2.019018e+09\n", + " 1\n", + " 0\n", + " NaN\n", + " {'concert_f': 9.903925281880971e-14, 'concert_...\n", + " [3435, 6069, 13203, 14704]\n", + " [3436, 6068, 13202, 14703]\n", + " E02002330_0001_002\n", + " 21040818\n", + " 1\n", + " 2904618\n", + " ...\n", + " 2.0\n", + " NaN\n", + " 6.0\n", + " True\n", + " 2.0\n", + " 2\n", + " 2\n", + " 65\n", + " 1\n", + " 6.0\n", + " \n", + " \n", + " 2\n", + " 2\n", + " 1\n", + " NaN\n", + " {'concert_f': 1.2791347489984115e-31, 'concert...\n", + " [762, 5168, 6201, 8977]\n", + " [761, 5169, 6200, 8976]\n", + " E02002330_0002_001\n", + " 11131017\n", + " 1\n", + " 2902311\n", + " ...\n", + " 1.0\n", + " NaN\n", + " 5.0\n", + " True\n", + " 1.0\n", + " 2\n", + " 1\n", + " 86\n", + " 1\n", + " 2.0\n", + " \n", + " \n", + " 3\n", + " 3\n", + " 2\n", + " NaN\n", + " {'concert_f': 7.754311082130982e-10, 'concert_...\n", + " [1580, 5417, 5956, 12901]\n", + " [1581, 5416, 5957, 12900]\n", + " E02002330_0003_001\n", + " 15020311\n", + " 1\n", + " 2911131\n", + " ...\n", + " 1.0\n", + " NaN\n", + " 6.0\n", + " True\n", + " 2.0\n", + " 1\n", + " 1\n", + " 58\n", + " 1\n", " 4.0\n", + " \n", + " \n", + " 4\n", + " 4\n", + " 2\n", + " 508.0\n", + " {'concert_f': 2.1388457227544677e-08, 'concert...\n", + " [318, 3145, 10496, 12819, 13943]\n", + " [319, 3144, 10495, 12818, 13942]\n", + " E02002330_0003_002\n", + " 20090607\n", + " 1\n", + " 2909582\n", + " ...\n", + " 1.0\n", + " NaN\n", + " 6.0\n", + " True\n", + " 2.0\n", + " 1\n", + " 2\n", + " 56\n", + " 1\n", " 1.0\n", - " car_passenger\n", - " home\n", - " other\n", - " 450.0\n", - " 480.0\n", - " 20.0\n", - " 30.0\n", - " 2.019001e+09\n", " \n", " \n", - " 2\n", - " 199\n", - " 89\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " \n", + " \n", + " 794634\n", + " 794634\n", + " 334848\n", + " NaN\n", + " {'concert_f': 0.030085181817412376, 'concert_f...\n", + " [253, 904, 960, 1258, 1666, 1827, 2990, 3158, ...\n", + " [252, 903, 961, 1259, 1667, 1826, 2991, 3159, ...\n", + " E02006876_3658_001\n", + " 15171109\n", + " 2\n", + " 2910202\n", + " ...\n", + " 3.0\n", + " NaN\n", + " 6.0\n", + " True\n", + " 1.0\n", + " 1\n", + " 1\n", " 39\n", - " E00059031\n", - " 2.019001e+09\n", - " 2019001326\n", - " education_university\n", - " 2.019018e+09\n", + " 1\n", + " 3.0\n", + " \n", + " \n", + " 794635\n", + " 794635\n", + " 334849\n", + " NaN\n", + " {'concert_f': 5.36953439222998e-06, 'concert_f...\n", + " [1611, 2074, 3331, 3973, 5305, 7241, 9500, 10413]\n", + " [1610, 2075, 3330, 3974, 5304, 7240, 9499, 10412]\n", + " E02006876_3659_001\n", + " 12080913\n", + " 2\n", + " 2903691\n", + " ...\n", " 2.0\n", + " NaN\n", + " 5.0\n", + " False\n", + " 1.0\n", + " 0\n", + " 2\n", + " 51\n", + " 1\n", + " 7.0\n", + " \n", + " \n", + " 794636\n", + " 794636\n", + " 334850\n", + " NaN\n", + " {'concert_f': 9.81540181321244e-28, 'concert_f...\n", + " [1768, 1833, 2004, 3538, 5690, 5693, 9177, 112...\n", + " [1767, 1832, 2005, 3537, 5691, 5692, 9176, 112...\n", + " E02006876_3660_001\n", + " 15291209\n", + " 1\n", + " 2905917\n", + " ...\n", + " 4.0\n", + " NaN\n", " 2.0\n", - " car\n", - " work\n", - " home\n", - " 960.0\n", - " 1020.0\n", - " 25.0\n", - " 60.0\n", - " 2.019001e+09\n", + " True\n", + " 5.0\n", + " 0\n", + " 1\n", + " 82\n", + " 1\n", + " NaN\n", " \n", " \n", - " 3\n", - " 199\n", - " 89\n", - " 39\n", - " E00059031\n", - " 2.019001e+09\n", - " 2019001326\n", - " education_university\n", - " 2.019018e+09\n", + " 794637\n", + " 794637\n", + " 334851\n", + " NaN\n", + " {'concert_f': 0.05208142101764679, 'concert_fs...\n", + " [2173, 14404, 15340, 16376]\n", + " [2174, 14403, 15339, 16377]\n", + " E02006876_3661_001\n", + " 20280416\n", + " 1\n", + " 2907191\n", + " ...\n", " 4.0\n", + " NaN\n", " 3.0\n", - " car\n", - " home\n", - " shop\n", - " 960.0\n", - " 975.0\n", + " True\n", + " 5.0\n", + " 0\n", + " 1\n", + " 38\n", + " 1\n", " 2.0\n", - " 15.0\n", - " 2.019001e+09\n", " \n", " \n", - " 4\n", - " 199\n", - " 89\n", - " 39\n", - " E00059031\n", - " 2.019001e+09\n", - " 2019001326\n", - " education_university\n", - " 2.019018e+09\n", - " 7.0\n", + " 794638\n", + " 794638\n", + " 334851\n", + " 31329.0\n", + " {'concert_f': 5.096165657043457, 'concert_fs':...\n", + " [3393, 3729, 4300, 5119, 10052, 15867, 15970]\n", + " [3392, 3730, 4299, 5118, 10051, 15866, 15969]\n", + " E02006876_3661_002\n", + " 13031119\n", + " 1\n", + " 2907075\n", + " ...\n", + " 4.0\n", + " NaN\n", + " 3.0\n", + " True\n", + " 5.0\n", + " 0\n", + " 2\n", + " 22\n", + " 2\n", " 2.0\n", - " car\n", - " work\n", - " home\n", - " 1005.0\n", - " 1065.0\n", - " 25.0\n", - " 60.0\n", - " 2.019001e+09\n", " \n", " \n", "\n", + "

794639 rows × 38 columns

\n", "" ], "text/plain": [ - " id household age_years OA11CD nts_hh_id nts_ind_id \\\n", - "0 199 89 39 E00059031 2.019001e+09 2019001326 \n", - "1 199 89 39 E00059031 2.019001e+09 2019001326 \n", - "2 199 89 39 E00059031 2.019001e+09 2019001326 \n", - "3 199 89 39 E00059031 2.019001e+09 2019001326 \n", - "4 199 89 39 E00059031 2.019001e+09 2019001326 \n", + " id household workplace \\\n", + "0 0 0 NaN \n", + "1 1 0 NaN \n", + "2 2 1 NaN \n", + "3 3 2 NaN \n", + "4 4 2 508.0 \n", + "... ... ... ... \n", + "794634 794634 334848 NaN \n", + "794635 794635 334849 NaN \n", + "794636 794636 334850 NaN \n", + "794637 794637 334851 NaN \n", + "794638 794638 334851 31329.0 \n", + "\n", + " events \\\n", + "0 {'concert_f': 3.6287833784528047e-16, 'concert... \n", + "1 {'concert_f': 9.903925281880971e-14, 'concert_... \n", + "2 {'concert_f': 1.2791347489984115e-31, 'concert... \n", + "3 {'concert_f': 7.754311082130982e-10, 'concert_... \n", + "4 {'concert_f': 2.1388457227544677e-08, 'concert... \n", + "... ... \n", + "794634 {'concert_f': 0.030085181817412376, 'concert_f... \n", + "794635 {'concert_f': 5.36953439222998e-06, 'concert_f... \n", + "794636 {'concert_f': 9.81540181321244e-28, 'concert_f... \n", + "794637 {'concert_f': 0.05208142101764679, 'concert_fs... \n", + "794638 {'concert_f': 5.096165657043457, 'concert_fs':... \n", + "\n", + " weekday_diaries \\\n", + "0 [954, 1037, 1234, 2981, 6290, 9535, 10385, 106... \n", + "1 [3435, 6069, 13203, 14704] \n", + "2 [762, 5168, 6201, 8977] \n", + "3 [1580, 5417, 5956, 12901] \n", + "4 [318, 3145, 10496, 12819, 13943] \n", + "... ... \n", + "794634 [253, 904, 960, 1258, 1666, 1827, 2990, 3158, ... \n", + "794635 [1611, 2074, 3331, 3973, 5305, 7241, 9500, 10413] \n", + "794636 [1768, 1833, 2004, 3538, 5690, 5693, 9177, 112... \n", + "794637 [2173, 14404, 15340, 16376] \n", + "794638 [3393, 3729, 4300, 5119, 10052, 15867, 15970] \n", "\n", - " education_type TripID TravDay seq mode oact \\\n", - "0 education_university 2.019018e+09 3.0 1.0 car home \n", - "1 education_university 2.019018e+09 4.0 1.0 car_passenger home \n", - "2 education_university 2.019018e+09 2.0 2.0 car work \n", - "3 education_university 2.019018e+09 4.0 3.0 car home \n", - "4 education_university 2.019018e+09 7.0 2.0 car work \n", + " weekend_diaries orig_pid \\\n", + "0 [955, 1036, 1235, 2980, 6291, 9536, 10384, 106... E02002330_0001_001 \n", + "1 [3436, 6068, 13202, 14703] E02002330_0001_002 \n", + "2 [761, 5169, 6200, 8976] E02002330_0002_001 \n", + "3 [1581, 5416, 5957, 12900] E02002330_0003_001 \n", + "4 [319, 3144, 10495, 12818, 13942] E02002330_0003_002 \n", + "... ... ... \n", + "794634 [252, 903, 961, 1259, 1667, 1826, 2991, 3159, ... E02006876_3658_001 \n", + "794635 [1610, 2075, 3330, 3974, 5304, 7240, 9499, 10412] E02006876_3659_001 \n", + "794636 [1767, 1832, 2005, 3537, 5691, 5692, 9176, 112... E02006876_3660_001 \n", + "794637 [2174, 14403, 15339, 16377] E02006876_3661_001 \n", + "794638 [3392, 3730, 4299, 5118, 10051, 15866, 15969] E02006876_3661_002 \n", "\n", - " dact tst tet TripDisIncSW TripTotalTime IndividualID \n", - "0 shop 540.0 555.0 2.0 15.0 2.019001e+09 \n", - "1 other 450.0 480.0 20.0 30.0 2.019001e+09 \n", - "2 home 960.0 1020.0 25.0 60.0 2.019001e+09 \n", - "3 shop 960.0 975.0 2.0 15.0 2.019001e+09 \n", - "4 home 1005.0 1065.0 25.0 60.0 2.019001e+09 " + " id_tus_hh id_tus_p pid_hs ... accommodation_type communal_type \\\n", + "0 34051017 1 2911721 ... 2.0 NaN \n", + "1 21040818 1 2904618 ... 2.0 NaN \n", + "2 11131017 1 2902311 ... 1.0 NaN \n", + "3 15020311 1 2911131 ... 1.0 NaN \n", + "4 20090607 1 2909582 ... 1.0 NaN \n", + "... ... ... ... ... ... ... \n", + "794634 15171109 2 2910202 ... 3.0 NaN \n", + "794635 12080913 2 2903691 ... 2.0 NaN \n", + "794636 15291209 1 2905917 ... 4.0 NaN \n", + "794637 20280416 1 2907191 ... 4.0 NaN \n", + "794638 13031119 1 2907075 ... 4.0 NaN \n", + "\n", + " num_rooms central_heat tenure num_cars sex age_years ethnicity \\\n", + "0 6.0 True 2.0 2 1 68 1 \n", + "1 6.0 True 2.0 2 2 65 1 \n", + "2 5.0 True 1.0 2 1 86 1 \n", + "3 6.0 True 2.0 1 1 58 1 \n", + "4 6.0 True 2.0 1 2 56 1 \n", + "... ... ... ... ... ... ... ... \n", + "794634 6.0 True 1.0 1 1 39 1 \n", + "794635 5.0 False 1.0 0 2 51 1 \n", + "794636 2.0 True 5.0 0 1 82 1 \n", + "794637 3.0 True 5.0 0 1 38 1 \n", + "794638 3.0 True 5.0 0 2 22 2 \n", + "\n", + " nssec8_household \n", + "0 1.0 \n", + "1 6.0 \n", + "2 2.0 \n", + "3 4.0 \n", + "4 1.0 \n", + "... ... \n", + "794634 3.0 \n", + "794635 7.0 \n", + "794636 NaN \n", + "794637 2.0 \n", + "794638 2.0 \n", + "\n", + "[794639 rows x 38 columns]" ] }, - "execution_count": 16, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "spc_with_nts = pd.read_parquet(acbm.root_path / \"data/interim/matching/spc_with_nts_trips.parquet\")\n", - "spc_with_nts.head(5)" + "spc = pd.read_parquet(acbm.root_path / \"data/external/spc_output/leeds_people_hh.parquet\")\n", + "spc" ] }, { @@ -357,203 +525,28 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# add sex column to individuals\n", + "\n", + "spc = pd.read_parquet(acbm.root_path / \"data/external/spc_output/leeds_people_hh.parquet\", \n", + " columns=[\"id\", \"household\", \"age_years\", \"sex\", \"salary_yearly\"])\n", + "spc.head(5)\n", + "\n", + "# change spc[\"sex\"] column: 1 = male, 2 = female\n", + "\n", + "spc[\"sex\"] = spc[\"sex\"].map({1:'male',\n", + " 2: 'female'})\n", + "\n", + "spc.head(5)" + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idsex
00male
11female
22male
33male
44female
\n", - "
" - ], - "text/plain": [ - " id sex\n", - "0 0 male\n", - "1 1 female\n", - "2 2 male\n", - "3 3 male\n", - "4 4 female" - ] - }, - "execution_count": 61, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# add sex column to individuals\n", - "\n", - "spc = pd.read_parquet(acbm.root_path / \"data/external/spc_output/leeds_people_hh.parquet\", \n", - " columns=[\"id\", \"sex\"])\n", - "spc.head(5)\n", - "\n", - "# change spc[\"sex\"] column: 1 = male, 2 = female\n", - "\n", - "spc[\"sex\"] = spc[\"sex\"].map({1:'male',\n", - " 2: 'female'})\n", - "\n", - "spc.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
pidhidfreqhzoneage_yearsisStudentsex
019989NaNE0200233039Falsemale
120089NaNE0200233038Falsefemale
2312139NaNE0200233064Falsefemale
3610283NaNE0200233067Falsefemale
4611283NaNE0200233064Falsemale
\n", - "
" - ], - "text/plain": [ - " pid hid freq hzone age_years isStudent sex\n", - "0 199 89 NaN E02002330 39 False male\n", - "1 200 89 NaN E02002330 38 False female\n", - "2 312 139 NaN E02002330 64 False female\n", - "3 610 283 NaN E02002330 67 False female\n", - "4 611 283 NaN E02002330 64 False male" - ] - }, - "execution_count": 63, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "individuals = individuals.merge(spc, left_on=\"pid\", right_on=\"id\", how=\"left\")\n", + "individuals = individuals.merge(spc[[\"id\", \"sex\"]], left_on=\"pid\", right_on=\"id\", how=\"left\")\n", "individuals = individuals.drop(columns=\"id\")\n", "individuals.head(5)" ] @@ -562,150 +555,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
pidhidfreqhzoneage_yearsisPassenger
019989NaNE0200233039False
120089NaNE0200233038True
2312139NaNE0200233064False
3610283NaNE0200233067True
4611283NaNE0200233064False
5612283NaNE020023308True
6613283NaNE020023302True
72016892NaNE0200233049True
82017892NaNE0200233048False
92018892NaNE020023309True
\n", - "
" - ], - "text/plain": [ - " pid hid freq hzone age_years isPassenger\n", - "0 199 89 NaN E02002330 39 False\n", - "1 200 89 NaN E02002330 38 True\n", - "2 312 139 NaN E02002330 64 False\n", - "3 610 283 NaN E02002330 67 True\n", - "4 611 283 NaN E02002330 64 False\n", - "5 612 283 NaN E02002330 8 True\n", - "6 613 283 NaN E02002330 2 True\n", - "7 2016 892 NaN E02002330 49 True\n", - "8 2017 892 NaN E02002330 48 False\n", - "9 2018 892 NaN E02002330 9 True" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "individuals = get_passengers(\n", " legs = legs, \n", @@ -717,161 +567,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
pidhidfreqhzoneage_yearsisPassengerhasPTSubscription
019989NaNE0200233039FalseFalse
120089NaNE0200233038TrueFalse
2312139NaNE0200233064FalseTrue
3610283NaNE0200233067TrueTrue
4611283NaNE0200233064FalseTrue
5612283NaNE020023308TrueFalse
6613283NaNE020023302TrueFalse
72016892NaNE0200233049TrueFalse
82017892NaNE0200233048FalseFalse
92018892NaNE020023309TrueFalse
\n", - "
" - ], - "text/plain": [ - " pid hid freq hzone age_years isPassenger hasPTSubscription\n", - "0 199 89 NaN E02002330 39 False False\n", - "1 200 89 NaN E02002330 38 True False\n", - "2 312 139 NaN E02002330 64 False True\n", - "3 610 283 NaN E02002330 67 True True\n", - "4 611 283 NaN E02002330 64 False True\n", - "5 612 283 NaN E02002330 8 True False\n", - "6 613 283 NaN E02002330 2 True False\n", - "7 2016 892 NaN E02002330 49 True False\n", - "8 2017 892 NaN E02002330 48 False False\n", - "9 2018 892 NaN E02002330 9 True False" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "individuals = get_pt_subscription(individuals = individuals, age_threshold = 66)" ] @@ -880,163 +576,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
pidhidfreqhzoneage_yearsisStudent
019989NaNE0200233039False
120089NaNE0200233038False
2312139NaNE0200233064False
3610283NaNE0200233067False
4611283NaNE0200233064False
.....................
952340710597NaNE0200233313True
962484411334NaNE0200233316False
972543211574NaNE0200233360False
982547411594NaNE0200233330False
992547511594NaNE0200233320False
\n", - "

100 rows × 6 columns

\n", - "
" - ], - "text/plain": [ - " pid hid freq hzone age_years isStudent\n", - "0 199 89 NaN E02002330 39 False\n", - "1 200 89 NaN E02002330 38 False\n", - "2 312 139 NaN E02002330 64 False\n", - "3 610 283 NaN E02002330 67 False\n", - "4 611 283 NaN E02002330 64 False\n", - ".. ... ... ... ... ... ...\n", - "95 23407 10597 NaN E02002333 13 True\n", - "96 24844 11334 NaN E02002333 16 False\n", - "97 25432 11574 NaN E02002333 60 False\n", - "98 25474 11594 NaN E02002333 30 False\n", - "99 25475 11594 NaN E02002333 20 False\n", - "\n", - "[100 rows x 6 columns]" - ] - }, - "execution_count": 58, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "individuals = get_students(\n", " individuals = individuals,\n", @@ -1052,33 +592,27 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 48, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# histogram of x by age and isStudent \n", "individuals.groupby(['age', 'isStudent']).size().unstack().plot(kind='bar', stacked=True)\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "individuals = get_hhlIncome(\n", + " individuals = individuals,\n", + " individuals_with_salary = spc,\n", + " pension_age = 66,\n", + " pension = 13000)\n", + "\n", + "individuals.head(10)" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/scripts/5_acbm_to_matsim_xml.py b/scripts/5_acbm_to_matsim_xml.py index 3c96e20..a4807ea 100644 --- a/scripts/5_acbm_to_matsim_xml.py +++ b/scripts/5_acbm_to_matsim_xml.py @@ -16,6 +16,7 @@ calculate_percentage_remaining, filter_by_pid, filter_no_location, + get_hhlIncome, get_passengers, get_pt_subscription, get_students, @@ -61,23 +62,23 @@ def main(config_file): # TODO: add sex column upstream in the beginning of the pipeline spc = pd.read_parquet( acbm.root_path / f"data/external/spc_output/{config.region}_people_hh.parquet", - columns=["id", "sex"], + columns=["id", "household", "age_years", "sex", "salary_yearly"], ) - spc.head(5) # change spc["sex"] column: 1 = male, 2 = female spc["sex"] = spc["sex"].map({1: "male", 2: "female"}) # merge it on - individuals = individuals.merge(spc, left_on="pid", right_on="id", how="left") + individuals = individuals.merge( + spc[["id", "sex"]], left_on="pid", right_on="id", how="left" + ) individuals = individuals.drop(columns="id") # isStudent - individuals = get_students( individuals=individuals, activities=activities, age_base_threshold=config.postprocessing.student_age_base, - # age_upper_threshold = config.postprocessing.student_age_upper,, + # age_upper_threshold = config.postprocessing.student_age_upper, activity="education", ) @@ -92,6 +93,14 @@ def main(config_file): individuals=individuals, age_threshold=config.postprocessing.pt_subscription_age ) + ## hhlIncome + individuals = get_hhlIncome( + individuals=individuals, + individuals_with_salary=spc, + pension_age=config.postprocessing.pt_subscription_age, + pension=config.postprocessing.state_pension, + ) + # We will be removing some rows in each planning operation. This function helps keep a # record of the number of rows in each table after each operation. diff --git a/src/acbm/config.py b/src/acbm/config.py index 980e881..eaca0d6 100644 --- a/src/acbm/config.py +++ b/src/acbm/config.py @@ -46,6 +46,7 @@ class Postprocessing(BaseModel): student_age_upper: int modes_passenger: list[str] pt_subscription_age: int + state_pension: int class Config(BaseModel): diff --git a/src/acbm/postprocessing/matsim.py b/src/acbm/postprocessing/matsim.py index 458d7aa..b8d67c2 100644 --- a/src/acbm/postprocessing/matsim.py +++ b/src/acbm/postprocessing/matsim.py @@ -1,5 +1,6 @@ from typing import Optional +import numpy as np import pandas as pd @@ -340,3 +341,65 @@ def get_students( individuals["isStudent"] = individuals["pid"].isin(student_pids) return individuals + + +def get_hhlIncome( + individuals: pd.DataFrame, + individuals_with_salary: pd.DataFrame, + pension_age: int = 66, + pension: int = 13000, +) -> pd.DataFrame: + """ + Function to calculate the household level income from the individual level income data in the SPC + dataset. The function groups salary data by household and then merges the household level income + data back onto the individual level data. + + The salary data is missing for many individuals in the SPC dataset. It also does not include pension + We add the state pension if the person has reached the state pension age (66 years) and has no salary data. + + TODO: add student maintenance loan? + + Parameters + ---------- + individuals : pd.DataFrame + The individual level data output from acbm + individuals_with_salary : pd.DataFrame + The original SPC dataset with the salary_yearly column + + Returns + ------- + pd.DataFrame + The individual level data with the hhlIncome column added + """ + individuals_income = individuals.copy() + + # If person is a pensioner, add pension, otherwise keep salary + individuals_with_salary["income_modeled"] = np.where( + (individuals_with_salary["salary_yearly"] == 0) + | (individuals_with_salary["salary_yearly"].isna()), + np.where(individuals_with_salary["age_years"] >= pension_age, pension, 0), + individuals_with_salary["salary_yearly"], + ) + + # Summarize the data by household to create the hhlIncome column + household_income = ( + individuals_with_salary.groupby("household")["income_modeled"] + .sum() + .reset_index() + ) + household_income.rename(columns={"income_modeled": "hhlIncome"}, inplace=True) + # round hhlIncome to the nearest whole number + household_income["hhlIncome"] = household_income["hhlIncome"].round() + + # Merge the household_income data onto the individuals data + individuals_income = pd.merge( + individuals_income, + household_income, + how="left", + left_on="hid", + right_on="household", + ) + + individuals_income.drop(columns="household", inplace=True) + + return individuals_income From 3fe1494fe9e67bcd1f0286eb04d0585d707b9339 Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Tue, 10 Dec 2024 11:59:36 +0100 Subject: [PATCH 3/8] get correct cycle ownership column --- notebooks/2_match_households_and_individuals.ipynb | 4 ++-- scripts/2_match_households_and_individuals.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/notebooks/2_match_households_and_individuals.ipynb b/notebooks/2_match_households_and_individuals.ipynb index 871225b..94d53a1 100644 --- a/notebooks/2_match_households_and_individuals.ipynb +++ b/notebooks/2_match_households_and_individuals.ipynb @@ -487,7 +487,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -505,7 +505,7 @@ " 'EdAttn1_B01ID',\n", " 'EdAttn2_B01ID',\n", " 'EdAttn3_B01ID',\n", - " 'OwnCycle_B01ID', # Owns a cycle\n", + " 'OwnCycleN_B01ID', # Owns a cycle\n", " 'DrivLic_B02ID', # type of driving license\n", " 'CarAccess_B01ID',\n", " 'IndIncome2002_B02ID',\n", diff --git a/scripts/2_match_households_and_individuals.py b/scripts/2_match_households_and_individuals.py index e67deba..476d429 100644 --- a/scripts/2_match_households_and_individuals.py +++ b/scripts/2_match_households_and_individuals.py @@ -134,7 +134,7 @@ def get_interim_path( "EdAttn1_B01ID", "EdAttn2_B01ID", "EdAttn3_B01ID", - "OwnCycle_B01ID", # Owns a cycle + "OwnCycleN_B01ID", # Owns a cycle "DrivLic_B02ID", # type of driving license "CarAccess_B01ID", "IndIncome2002_B02ID", From 455e20183a42049db359314ed09e252488521483 Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Tue, 10 Dec 2024 14:06:09 +0100 Subject: [PATCH 4/8] add vehicle availability to individuals --- notebooks/6_acbm_to_matsim_xml.ipynb | 1428 ++++++++++++++++++++++++-- scripts/5_acbm_to_matsim_xml.py | 73 ++ 2 files changed, 1433 insertions(+), 68 deletions(-) diff --git a/notebooks/6_acbm_to_matsim_xml.ipynb b/notebooks/6_acbm_to_matsim_xml.ipynb index 47c2f8b..ef2713a 100644 --- a/notebooks/6_acbm_to_matsim_xml.ipynb +++ b/notebooks/6_acbm_to_matsim_xml.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 16, + "execution_count": 57, "metadata": {}, "outputs": [], "source": [ @@ -44,7 +44,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -58,16 +58,92 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pidhidfreqhzoneage_years
019989NaNE0200233039
120089NaNE0200233038
2312139NaNE0200233064
3610283NaNE0200233067
4611283NaNE0200233064
\n", + "
" + ], "text/plain": [ - "1709" + " pid hid freq hzone age_years\n", + "0 199 89 NaN E02002330 39\n", + "1 200 89 NaN E02002330 38\n", + "2 312 139 NaN E02002330 64\n", + "3 610 283 NaN E02002330 67\n", + "4 611 283 NaN E02002330 64" ] }, - "execution_count": 18, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -78,7 +154,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -88,9 +164,188 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idhouseholdage_yearsOA11CDnts_hh_idnts_ind_ideducation_typeTripIDTravDayseqmodeoactdacttsttetTripDisIncSWTripTotalTimeIndividualID
01998939E000590312.019001e+092019001326education_university2.019018e+093.01.0carhomeshop540.0555.02.015.02.019001e+09
11998939E000590312.019001e+092019001326education_university2.019018e+094.01.0car_passengerhomeother450.0480.020.030.02.019001e+09
21998939E000590312.019001e+092019001326education_university2.019018e+092.02.0carworkhome960.01020.025.060.02.019001e+09
31998939E000590312.019001e+092019001326education_university2.019018e+094.03.0carhomeshop960.0975.02.015.02.019001e+09
41998939E000590312.019001e+092019001326education_university2.019018e+097.02.0carworkhome1005.01065.025.060.02.019001e+09
\n", + "
" + ], + "text/plain": [ + " id household age_years OA11CD nts_hh_id nts_ind_id \\\n", + "0 199 89 39 E00059031 2.019001e+09 2019001326 \n", + "1 199 89 39 E00059031 2.019001e+09 2019001326 \n", + "2 199 89 39 E00059031 2.019001e+09 2019001326 \n", + "3 199 89 39 E00059031 2.019001e+09 2019001326 \n", + "4 199 89 39 E00059031 2.019001e+09 2019001326 \n", + "\n", + " education_type TripID TravDay seq mode oact \\\n", + "0 education_university 2.019018e+09 3.0 1.0 car home \n", + "1 education_university 2.019018e+09 4.0 1.0 car_passenger home \n", + "2 education_university 2.019018e+09 2.0 2.0 car work \n", + "3 education_university 2.019018e+09 4.0 3.0 car home \n", + "4 education_university 2.019018e+09 7.0 2.0 car work \n", + "\n", + " dact tst tet TripDisIncSW TripTotalTime IndividualID \n", + "0 shop 540.0 555.0 2.0 15.0 2.019001e+09 \n", + "1 other 450.0 480.0 20.0 30.0 2.019001e+09 \n", + "2 home 960.0 1020.0 25.0 60.0 2.019001e+09 \n", + "3 shop 960.0 975.0 2.0 15.0 2.019001e+09 \n", + "4 home 1005.0 1065.0 25.0 60.0 2.019001e+09 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "spc_with_nts = pd.read_parquet(acbm.root_path / \"data/interim/matching/spc_with_nts_trips.parquet\")\n", "spc_with_nts.head(5)" @@ -98,7 +353,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -510,7 +765,7 @@ "[794639 rows x 38 columns]" ] }, - "execution_count": 19, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -522,9 +777,96 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idhouseholdage_yearssexsalary_yearly
00068maleNaN
11065femaleNaN
22186maleNaN
33258maleNaN
44256female36646.464844
\n", + "
" + ], + "text/plain": [ + " id household age_years sex salary_yearly\n", + "0 0 0 68 male NaN\n", + "1 1 0 65 female NaN\n", + "2 2 1 86 male NaN\n", + "3 3 2 58 male NaN\n", + "4 4 2 56 female 36646.464844" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# add sex column to individuals\n", "\n", @@ -542,67 +884,568 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "individuals = individuals.merge(spc[[\"id\", \"sex\"]], left_on=\"pid\", right_on=\"id\", how=\"left\")\n", - "individuals = individuals.drop(columns=\"id\")\n", - "individuals.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "individuals = get_passengers(\n", - " legs = legs, \n", - " individuals = individuals, \n", - " modes = ['car_passenger', 'taxi'])\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "individuals = get_pt_subscription(individuals = individuals, age_threshold = 66)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "individuals = get_students(\n", - " individuals = individuals,\n", - " activities = activities,\n", - " age_base_threshold = 16,\n", - " #age_upper_threshold = 30,\n", - " activity = 'education')\n", - "\n", - "individuals.head(10)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], - "source": [ - "# histogram of x by age and isStudent \n", - "individuals.groupby(['age', 'isStudent']).size().unstack().plot(kind='bar', stacked=True)\n" - ] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pidhidfreqhzoneagesex
019989NaNE0200233039male
120089NaNE0200233038female
2312139NaNE0200233064female
3610283NaNE0200233067female
4611283NaNE0200233064male
\n", + "
" + ], + "text/plain": [ + " pid hid freq hzone age sex\n", + "0 199 89 NaN E02002330 39 male\n", + "1 200 89 NaN E02002330 38 female\n", + "2 312 139 NaN E02002330 64 female\n", + "3 610 283 NaN E02002330 67 female\n", + "4 611 283 NaN E02002330 64 male" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "individuals = individuals.merge(spc[[\"id\", \"sex\"]], left_on=\"pid\", right_on=\"id\", how=\"left\")\n", + "individuals = individuals.drop(columns=\"id\")\n", + "individuals.head(5)" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "individuals = get_passengers(\n", + " legs = legs, \n", + " individuals = individuals, \n", + " modes = ['car_passenger', 'taxi'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, "metadata": {}, "outputs": [], + "source": [ + "individuals = get_pt_subscription(individuals = individuals, age_threshold = 66)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pidhidfreqhzoneagesexisPassengerhasPTSubscriptionisStudent
019989NaNE0200233039maleFalseFalseFalse
120089NaNE0200233038femaleTrueFalseFalse
2312139NaNE0200233064femaleFalseFalseFalse
3610283NaNE0200233067femaleTrueTrueFalse
4611283NaNE0200233064maleFalseFalseFalse
5612283NaNE020023308maleTrueFalseTrue
6613283NaNE020023302femaleTrueFalseTrue
72016892NaNE0200233049maleTrueFalseFalse
82017892NaNE0200233048femaleFalseFalseFalse
92018892NaNE020023309maleTrueFalseTrue
\n", + "
" + ], + "text/plain": [ + " pid hid freq hzone age sex isPassenger hasPTSubscription \\\n", + "0 199 89 NaN E02002330 39 male False False \n", + "1 200 89 NaN E02002330 38 female True False \n", + "2 312 139 NaN E02002330 64 female False False \n", + "3 610 283 NaN E02002330 67 female True True \n", + "4 611 283 NaN E02002330 64 male False False \n", + "5 612 283 NaN E02002330 8 male True False \n", + "6 613 283 NaN E02002330 2 female True False \n", + "7 2016 892 NaN E02002330 49 male True False \n", + "8 2017 892 NaN E02002330 48 female False False \n", + "9 2018 892 NaN E02002330 9 male True False \n", + "\n", + " isStudent \n", + "0 False \n", + "1 False \n", + "2 False \n", + "3 False \n", + "4 False \n", + "5 True \n", + "6 True \n", + "7 False \n", + "8 False \n", + "9 True " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "individuals = get_students(\n", + " individuals = individuals,\n", + " activities = activities,\n", + " age_base_threshold = 16,\n", + " #age_upper_threshold = 30,\n", + " activity = 'education')\n", + "\n", + "individuals.head(10)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# histogram of x by age and isStudent \n", + "individuals.groupby(['age', 'isStudent']).size().unstack().plot(kind='bar', stacked=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pidhidfreqhzoneagesexisPassengerhasPTSubscriptionisStudenthhlIncome
019989NaNE0200233039maleFalseFalseFalse34167.0
120089NaNE0200233038femaleTrueFalseFalse34167.0
2312139NaNE0200233064femaleFalseFalseFalse22677.0
3610283NaNE0200233067femaleTrueTrueFalse13000.0
4611283NaNE0200233064maleFalseFalseFalse13000.0
5612283NaNE020023308maleTrueFalseTrue13000.0
6613283NaNE020023302femaleTrueFalseTrue13000.0
72016892NaNE0200233049maleTrueFalseFalse66666.0
82017892NaNE0200233048femaleFalseFalseFalse66666.0
92018892NaNE020023309maleTrueFalseTrue66666.0
\n", + "
" + ], + "text/plain": [ + " pid hid freq hzone age sex isPassenger hasPTSubscription \\\n", + "0 199 89 NaN E02002330 39 male False False \n", + "1 200 89 NaN E02002330 38 female True False \n", + "2 312 139 NaN E02002330 64 female False False \n", + "3 610 283 NaN E02002330 67 female True True \n", + "4 611 283 NaN E02002330 64 male False False \n", + "5 612 283 NaN E02002330 8 male True False \n", + "6 613 283 NaN E02002330 2 female True False \n", + "7 2016 892 NaN E02002330 49 male True False \n", + "8 2017 892 NaN E02002330 48 female False False \n", + "9 2018 892 NaN E02002330 9 male True False \n", + "\n", + " isStudent hhlIncome \n", + "0 False 34167.0 \n", + "1 False 34167.0 \n", + "2 False 22677.0 \n", + "3 False 13000.0 \n", + "4 False 13000.0 \n", + "5 True 13000.0 \n", + "6 True 13000.0 \n", + "7 False 66666.0 \n", + "8 False 66666.0 \n", + "9 True 66666.0 " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "individuals = get_hhlIncome(\n", " individuals = individuals,\n", @@ -617,13 +1460,462 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Clean the data" + "### Add attributes to do with vehicle ownership \n", + "\n", + "These attributes are in the nts dataset. We will merge them onto the processed dataset\n", + "#TODO: move this upstream so that it is part of the individuals.csv output" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "nts_individuals = pd.read_parquet(acbm.root_path / \"data/external/nts/filtered/nts_individuals.parquet\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Create a df with the vehicle ownership data (from the nts)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IndividualIDCarAvailabilityBicycleAvailabilityhasLicence
02019002187yesyesTrue
12019002187yesyesTrue
22019002187yesyesTrue
32019002187yesyesTrue
42019002187yesyesTrue
52019002187yesyesTrue
62019002187yesyesTrue
72019002187yesyesTrue
82019002187yesyesTrue
92019002187yesyesTrue
\n", + "
" + ], + "text/plain": [ + " IndividualID CarAvailability BicycleAvailability hasLicence\n", + "0 2019002187 yes yes True\n", + "1 2019002187 yes yes True\n", + "2 2019002187 yes yes True\n", + "3 2019002187 yes yes True\n", + "4 2019002187 yes yes True\n", + "5 2019002187 yes yes True\n", + "6 2019002187 yes yes True\n", + "7 2019002187 yes yes True\n", + "8 2019002187 yes yes True\n", + "9 2019002187 yes yes True" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nts_individuals = nts_individuals[[\"IndividualID\", \"OwnCycleN_B01ID\", \"DrivLic_B02ID\", \"CarAccess_B01ID\"]]\n", + "\n", + "# Create CarAvailability colum\n", + "\n", + "car_availability_mapping = {\n", + " 1: \"yes\", # Main driver of company car\n", + " 2: \"yes\", # Other main driver\n", + " 3: \"some\", # Not main driver of household car\n", + " }\n", + "\n", + "nts_individuals['CarAvailability'] = nts_individuals['CarAccess_B01ID'].map(car_availability_mapping).fillna('no')\n", + "# Create BicycleAvailability column\n", + "\n", + "bicycle_availability_mapping = {\n", + " 1: \"yes\", # Own a pedal cycle yourself\n", + " 2: \"some\", # Have use of household pedal cycle\n", + " 3: \"no\", # Have use of non-household pedal cycle\n", + " }\n", + "\n", + "nts_individuals['BicycleAvailability'] = nts_individuals['OwnCycleN_B01ID'].map(bicycle_availability_mapping).fillna('no')\n", + "\n", + "# Create hasLicence column\n", + "# 1: Full licence, 2: Provisional licence, 3: Other or none\n", + "nts_individuals[\"hasLicence\"] = nts_individuals[\"DrivLic_B02ID\"].apply(lambda x: x == 1)\n", + "\n", + "# Keep only the columns we created\n", + "nts_individuals = nts_individuals[[\"IndividualID\", \"CarAvailability\", \"BicycleAvailability\", \"hasLicence\"]]\n", + "nts_individuals.head(10)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Merge the data onto the individuals df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pidhidfreqhzoneagesexisPassengerhasPTSubscriptionisStudenthhlIncomeCarAvailabilityBicycleAvailabilityhasLicence
019989NaNE0200233039maleFalseFalseFalse34167.0yesyesTrue
119989NaNE0200233039maleFalseFalseFalse34167.0yesyesTrue
219989NaNE0200233039maleFalseFalseFalse34167.0yesyesTrue
319989NaNE0200233039maleFalseFalseFalse34167.0yesyesTrue
419989NaNE0200233039maleFalseFalseFalse34167.0yesyesTrue
519989NaNE0200233039maleFalseFalseFalse34167.0yesyesTrue
619989NaNE0200233039maleFalseFalseFalse34167.0yesyesTrue
719989NaNE0200233039maleFalseFalseFalse34167.0yesyesTrue
819989NaNE0200233039maleFalseFalseFalse34167.0yesyesTrue
919989NaNE0200233039maleFalseFalseFalse34167.0yesyesTrue
\n", + "
" + ], + "text/plain": [ + " pid hid freq hzone age sex isPassenger hasPTSubscription \\\n", + "0 199 89 NaN E02002330 39 male False False \n", + "1 199 89 NaN E02002330 39 male False False \n", + "2 199 89 NaN E02002330 39 male False False \n", + "3 199 89 NaN E02002330 39 male False False \n", + "4 199 89 NaN E02002330 39 male False False \n", + "5 199 89 NaN E02002330 39 male False False \n", + "6 199 89 NaN E02002330 39 male False False \n", + "7 199 89 NaN E02002330 39 male False False \n", + "8 199 89 NaN E02002330 39 male False False \n", + "9 199 89 NaN E02002330 39 male False False \n", + "\n", + " isStudent hhlIncome CarAvailability BicycleAvailability hasLicence \n", + "0 False 34167.0 yes yes True \n", + "1 False 34167.0 yes yes True \n", + "2 False 34167.0 yes yes True \n", + "3 False 34167.0 yes yes True \n", + "4 False 34167.0 yes yes True \n", + "5 False 34167.0 yes yes True \n", + "6 False 34167.0 yes yes True \n", + "7 False 34167.0 yes yes True \n", + "8 False 34167.0 yes yes True \n", + "9 False 34167.0 yes yes True " + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# get spc id from spc_with_nts\n", + "nts_individuals = nts_individuals.merge(\n", + " spc_with_nts[[\"id\", \"nts_ind_id\"]], \n", + " left_on='IndividualID',\n", + " right_on='nts_ind_id', \n", + " how='inner').drop(columns=['nts_ind_id'])\n", + "\n", + "nts_individuals.rename(columns={\"id\": \"spc_id\"}, inplace=True)\n", + "nts_individuals.head(10)\n", + "\n", + "# merge vehicle ownership data onto individuals\n", + "individuals = individuals.merge(\n", + " nts_individuals, \n", + " left_on=\"pid\", \n", + " right_on=\"spc_id\", \n", + " how=\"left\").drop(columns= [\"spc_id\", \"IndividualID\"])\n", + "\n", + "individuals.head(10)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Clean the data" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, "outputs": [], "source": [ "# We will be removing some rows in each planning operation. This function helps keep a \n", diff --git a/scripts/5_acbm_to_matsim_xml.py b/scripts/5_acbm_to_matsim_xml.py index a4807ea..6837aca 100644 --- a/scripts/5_acbm_to_matsim_xml.py +++ b/scripts/5_acbm_to_matsim_xml.py @@ -101,6 +101,79 @@ def main(config_file): pension=config.postprocessing.state_pension, ) + # ----- Add vehicle ownership attributes (car, bicycle) to the individuals dataframe + # TODO: move this upstream + + # a. spc to nts match (used to get nts_id: spc_id match) + spc_with_nts = pd.read_parquet( + acbm.root_path / "data/interim/matching/spc_with_nts_trips.parquet" + ) + nts_individuals = pd.read_parquet( + acbm.root_path / "data/external/nts/filtered/nts_individuals.parquet" + ) + + # b. Create a df with the vehicle ownership data (from the nts) + + nts_individuals = nts_individuals[ + ["IndividualID", "OwnCycleN_B01ID", "DrivLic_B02ID", "CarAccess_B01ID"] + ] + + # Create CarAvailability colum + + car_availability_mapping = { + 1: "yes", # Main driver of company car + 2: "yes", # Other main driver + 3: "some", # Not main driver of household car + } + + nts_individuals["CarAvailability"] = ( + nts_individuals["CarAccess_B01ID"].map(car_availability_mapping).fillna("no") + ) + # Create BicycleAvailability column + + bicycle_availability_mapping = { + 1: "yes", # Own a pedal cycle yourself + 2: "some", # Have use of household pedal cycle + 3: "no", # Have use of non-household pedal cycle + } + + nts_individuals["BicycleAvailability"] = ( + nts_individuals["OwnCycleN_B01ID"] + .map(bicycle_availability_mapping) + .fillna("no") + ) + + # Create hasLicence column + # 1: Full licence, 2: Provisional licence, 3: Other or none + nts_individuals["hasLicence"] = nts_individuals["DrivLic_B02ID"].apply( + lambda x: x == 1 + ) + + # Keep only the columns we created + nts_individuals = nts_individuals[ + ["IndividualID", "CarAvailability", "BicycleAvailability", "hasLicence"] + ] + nts_individuals.head(10) + + # c. add spc id to nts_individuals + + # create a df with spc_id and nts_id + spcid_to_ntsid = spc_with_nts[ + ["id", "nts_ind_id"] + ].drop_duplicates() # spc_with_nts has one row per travel day + + # add the spc_id column + nts_individuals = nts_individuals.merge( + spcid_to_ntsid, left_on="IndividualID", right_on="nts_ind_id", how="inner" + ).drop(columns=["nts_ind_id"]) + + nts_individuals.rename(columns={"id": "spc_id"}, inplace=True) + + # d. merge nts_individuals with individuals to get the vehicle ownership data + individuals = individuals.merge( + nts_individuals, left_on="pid", right_on="spc_id", how="left" + ).drop(columns=["spc_id", "IndividualID"]) + # We will be removing some rows in each planning operation. This function helps keep a # record of the number of rows in each table after each operation. From d6dc8a52060f7ee157d5291bb3919f47171bb020 Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Tue, 10 Dec 2024 14:32:41 +0100 Subject: [PATCH 5/8] add individual attributes example --- notebooks/6_acbm_to_matsim_xml.ipynb | 1468 +++++++++++++++++++++++--- 1 file changed, 1296 insertions(+), 172 deletions(-) diff --git a/notebooks/6_acbm_to_matsim_xml.ipynb b/notebooks/6_acbm_to_matsim_xml.ipynb index ef2713a..1d941ad 100644 --- a/notebooks/6_acbm_to_matsim_xml.ipynb +++ b/notebooks/6_acbm_to_matsim_xml.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 57, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -1468,7 +1468,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -1484,7 +1484,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -1516,71 +1516,71 @@ " \n", " \n", " \n", - " 0\n", - " 2019002187\n", - " yes\n", - " yes\n", - " True\n", + " 340877\n", + " 2019006027\n", + " no\n", + " no\n", + " False\n", " \n", " \n", - " 1\n", - " 2019002187\n", - " yes\n", - " yes\n", - " True\n", + " 340878\n", + " 2019006028\n", + " no\n", + " no\n", + " False\n", " \n", " \n", - " 2\n", - " 2019002187\n", - " yes\n", - " yes\n", - " True\n", + " 340879\n", + " 2019006029\n", + " no\n", + " no\n", + " False\n", " \n", " \n", - " 3\n", - " 2019002187\n", - " yes\n", + " 340882\n", + " 2019002185\n", + " some\n", " yes\n", " True\n", " \n", " \n", - " 4\n", - " 2019002187\n", - " yes\n", + " 340883\n", + " 2019002186\n", + " some\n", " yes\n", " True\n", " \n", " \n", - " 5\n", + " 340884\n", " 2019002187\n", " yes\n", " yes\n", " True\n", " \n", " \n", - " 6\n", - " 2019002187\n", + " 340885\n", + " 2019002188\n", " yes\n", " yes\n", " True\n", " \n", " \n", - " 7\n", - " 2019002187\n", - " yes\n", - " yes\n", - " True\n", + " 340886\n", + " 2019002189\n", + " no\n", + " no\n", + " False\n", " \n", " \n", - " 8\n", - " 2019002187\n", + " 340887\n", + " 2019004751\n", " yes\n", " yes\n", " True\n", " \n", " \n", - " 9\n", - " 2019002187\n", + " 340888\n", + " 2019004752\n", " yes\n", " yes\n", " True\n", @@ -1590,20 +1590,20 @@ "" ], "text/plain": [ - " IndividualID CarAvailability BicycleAvailability hasLicence\n", - "0 2019002187 yes yes True\n", - "1 2019002187 yes yes True\n", - "2 2019002187 yes yes True\n", - "3 2019002187 yes yes True\n", - "4 2019002187 yes yes True\n", - "5 2019002187 yes yes True\n", - "6 2019002187 yes yes True\n", - "7 2019002187 yes yes True\n", - "8 2019002187 yes yes True\n", - "9 2019002187 yes yes True" + " IndividualID CarAvailability BicycleAvailability hasLicence\n", + "340877 2019006027 no no False\n", + "340878 2019006028 no no False\n", + "340879 2019006029 no no False\n", + "340882 2019002185 some yes True\n", + "340883 2019002186 some yes True\n", + "340884 2019002187 yes yes True\n", + "340885 2019002188 yes yes True\n", + "340886 2019002189 no no False\n", + "340887 2019004751 yes yes True\n", + "340888 2019004752 yes yes True" ] }, - "execution_count": 79, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1646,9 +1646,334 @@ "### Merge the data onto the individuals df" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Create a 1:1 mapping of spc_id to nts_id" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnts_ind_id
01992019001326
202002019001325
362012019001327
443122019003730
783132019003731
804252019014984
814262019014985
826102019002009
936112019002008
1276122019002010
1456132019002011
15720162019012822
17120172019012823
18320182019012824
19522702019004682
21722712019004683
23025612019014624
24425622019014625
25925822019006176
27825832019006177
\n", + "
" + ], + "text/plain": [ + " id nts_ind_id\n", + "0 199 2019001326\n", + "20 200 2019001325\n", + "36 201 2019001327\n", + "44 312 2019003730\n", + "78 313 2019003731\n", + "80 425 2019014984\n", + "81 426 2019014985\n", + "82 610 2019002009\n", + "93 611 2019002008\n", + "127 612 2019002010\n", + "145 613 2019002011\n", + "157 2016 2019012822\n", + "171 2017 2019012823\n", + "183 2018 2019012824\n", + "195 2270 2019004682\n", + "217 2271 2019004683\n", + "230 2561 2019014624\n", + "244 2562 2019014625\n", + "259 2582 2019006176\n", + "278 2583 2019006177" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "spcid_to_ntsid = spc_with_nts[['id', 'nts_ind_id']].drop_duplicates()\n", + "spcid_to_ntsid.head(20)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IndividualIDCarAvailabilityBicycleAvailabilityhasLicencespc_id
02019004751yesyesTrue111243
12019004752yesyesTrue111244
22019004753noyesFalse111245
32019004775yesnoTrue319239
42019004776yesnoTrue319240
52019004777yesnoTrue110871
62019004778yesnoTrue110870
72019004779nonoFalse110872
82019004780yesnoTrue659043
92019004781yesyesTrue611325
\n", + "
" + ], + "text/plain": [ + " IndividualID CarAvailability BicycleAvailability hasLicence spc_id\n", + "0 2019004751 yes yes True 111243\n", + "1 2019004752 yes yes True 111244\n", + "2 2019004753 no yes False 111245\n", + "3 2019004775 yes no True 319239\n", + "4 2019004776 yes no True 319240\n", + "5 2019004777 yes no True 110871\n", + "6 2019004778 yes no True 110870\n", + "7 2019004779 no no False 110872\n", + "8 2019004780 yes no True 659043\n", + "9 2019004781 yes yes True 611325" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# add spc id to nts_individuals\n", + "nts_individuals = nts_individuals.merge(\n", + " spcid_to_ntsid, \n", + " left_on='IndividualID',\n", + " right_on='nts_ind_id', \n", + " how='inner').drop(columns=['nts_ind_id'])\n", + "\n", + "nts_individuals.rename(columns={\"id\": \"spc_id\"}, inplace=True)\n", + "nts_individuals.head(10)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -1706,13 +2031,13 @@ " \n", " \n", " 1\n", - " 199\n", + " 200\n", " 89\n", " NaN\n", " E02002330\n", - " 39\n", - " male\n", - " False\n", + " 38\n", + " female\n", + " True\n", " False\n", " False\n", " 34167.0\n", @@ -1722,178 +2047,168 @@ " \n", " \n", " 2\n", - " 199\n", - " 89\n", + " 312\n", + " 139\n", " NaN\n", " E02002330\n", - " 39\n", - " male\n", + " 64\n", + " female\n", " False\n", " False\n", " False\n", - " 34167.0\n", - " yes\n", + " 22677.0\n", " yes\n", + " no\n", " True\n", " \n", " \n", " 3\n", - " 199\n", - " 89\n", + " 610\n", + " 283\n", " NaN\n", " E02002330\n", - " 39\n", - " male\n", - " False\n", + " 67\n", + " female\n", + " True\n", + " True\n", " False\n", + " 13000.0\n", + " no\n", + " no\n", " False\n", - " 34167.0\n", - " yes\n", - " yes\n", - " True\n", " \n", " \n", " 4\n", - " 199\n", - " 89\n", + " 611\n", + " 283\n", " NaN\n", " E02002330\n", - " 39\n", + " 64\n", " male\n", " False\n", " False\n", " False\n", - " 34167.0\n", + " 13000.0\n", " yes\n", " yes\n", " True\n", " \n", " \n", " 5\n", - " 199\n", - " 89\n", + " 612\n", + " 283\n", " NaN\n", " E02002330\n", - " 39\n", + " 8\n", " male\n", + " True\n", " False\n", - " False\n", - " False\n", - " 34167.0\n", - " yes\n", - " yes\n", " True\n", + " 13000.0\n", + " no\n", + " yes\n", + " False\n", " \n", " \n", " 6\n", - " 199\n", - " 89\n", + " 613\n", + " 283\n", " NaN\n", " E02002330\n", - " 39\n", - " male\n", - " False\n", - " False\n", + " 2\n", + " female\n", + " True\n", " False\n", - " 34167.0\n", - " yes\n", - " yes\n", " True\n", + " 13000.0\n", + " no\n", + " no\n", + " False\n", " \n", " \n", " 7\n", - " 199\n", - " 89\n", + " 2016\n", + " 892\n", " NaN\n", " E02002330\n", - " 39\n", + " 49\n", " male\n", + " True\n", " False\n", " False\n", - " False\n", - " 34167.0\n", + " 66666.0\n", " yes\n", " yes\n", " True\n", " \n", " \n", " 8\n", - " 199\n", - " 89\n", + " 2017\n", + " 892\n", " NaN\n", " E02002330\n", - " 39\n", - " male\n", + " 48\n", + " female\n", " False\n", " False\n", " False\n", - " 34167.0\n", - " yes\n", - " yes\n", - " True\n", + " 66666.0\n", + " no\n", + " no\n", + " False\n", " \n", " \n", " 9\n", - " 199\n", - " 89\n", + " 2018\n", + " 892\n", " NaN\n", " E02002330\n", - " 39\n", + " 9\n", " male\n", + " True\n", " False\n", - " False\n", - " False\n", - " 34167.0\n", - " yes\n", - " yes\n", " True\n", + " 66666.0\n", + " no\n", + " no\n", + " False\n", " \n", " \n", "\n", "" ], "text/plain": [ - " pid hid freq hzone age sex isPassenger hasPTSubscription \\\n", - "0 199 89 NaN E02002330 39 male False False \n", - "1 199 89 NaN E02002330 39 male False False \n", - "2 199 89 NaN E02002330 39 male False False \n", - "3 199 89 NaN E02002330 39 male False False \n", - "4 199 89 NaN E02002330 39 male False False \n", - "5 199 89 NaN E02002330 39 male False False \n", - "6 199 89 NaN E02002330 39 male False False \n", - "7 199 89 NaN E02002330 39 male False False \n", - "8 199 89 NaN E02002330 39 male False False \n", - "9 199 89 NaN E02002330 39 male False False \n", + " pid hid freq hzone age sex isPassenger hasPTSubscription \\\n", + "0 199 89 NaN E02002330 39 male False False \n", + "1 200 89 NaN E02002330 38 female True False \n", + "2 312 139 NaN E02002330 64 female False False \n", + "3 610 283 NaN E02002330 67 female True True \n", + "4 611 283 NaN E02002330 64 male False False \n", + "5 612 283 NaN E02002330 8 male True False \n", + "6 613 283 NaN E02002330 2 female True False \n", + "7 2016 892 NaN E02002330 49 male True False \n", + "8 2017 892 NaN E02002330 48 female False False \n", + "9 2018 892 NaN E02002330 9 male True False \n", "\n", - " isStudent hhlIncome CarAvailability BicycleAvailability hasLicence \n", - "0 False 34167.0 yes yes True \n", - "1 False 34167.0 yes yes True \n", - "2 False 34167.0 yes yes True \n", - "3 False 34167.0 yes yes True \n", - "4 False 34167.0 yes yes True \n", - "5 False 34167.0 yes yes True \n", - "6 False 34167.0 yes yes True \n", - "7 False 34167.0 yes yes True \n", - "8 False 34167.0 yes yes True \n", - "9 False 34167.0 yes yes True " + " isStudent hhlIncome CarAvailability BicycleAvailability hasLicence \n", + "0 False 34167.0 yes yes True \n", + "1 False 34167.0 yes yes True \n", + "2 False 22677.0 yes no True \n", + "3 False 13000.0 no no False \n", + "4 False 13000.0 yes yes True \n", + "5 True 13000.0 no yes False \n", + "6 True 13000.0 no no False \n", + "7 False 66666.0 yes yes True \n", + "8 False 66666.0 no no False \n", + "9 True 66666.0 no no False " ] }, - "execution_count": 80, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# get spc id from spc_with_nts\n", - "nts_individuals = nts_individuals.merge(\n", - " spc_with_nts[[\"id\", \"nts_ind_id\"]], \n", - " left_on='IndividualID',\n", - " right_on='nts_ind_id', \n", - " how='inner').drop(columns=['nts_ind_id'])\n", - "\n", - "nts_individuals.rename(columns={\"id\": \"spc_id\"}, inplace=True)\n", - "nts_individuals.head(10)\n", - "\n", "# merge vehicle ownership data onto individuals\n", "individuals = individuals.merge(\n", " nts_individuals, \n", @@ -1901,8 +2216,7 @@ " right_on=\"spc_id\", \n", " how=\"left\").drop(columns= [\"spc_id\", \"IndividualID\"])\n", "\n", - "individuals.head(10)\n", - "\n" + "individuals.head(10)" ] }, { @@ -1914,7 +2228,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -1937,7 +2251,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -1950,9 +2264,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[('0_initial', 'individuals', 2960),\n", + " ('0_initial', 'households', 1709),\n", + " ('0_initial', 'activities', 11357),\n", + " ('0_initial', 'legs', 8397),\n", + " ('0_initial', 'legs_geo', 8397)]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "row_counts" ] @@ -1968,7 +2297,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -2015,7 +2344,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ @@ -2028,27 +2357,395 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[('0_initial', 'individuals', 2960),\n", + " ('0_initial', 'households', 1709),\n", + " ('0_initial', 'activities', 11357),\n", + " ('0_initial', 'legs', 8397),\n", + " ('0_initial', 'legs_geo', 8397),\n", + " ('1_filter_by_pid', 'individuals', 2931),\n", + " ('1_filter_by_pid', 'households', 1696),\n", + " ('1_filter_by_pid', 'activities', 11328),\n", + " ('1_filter_by_pid', 'legs', 8397),\n", + " ('1_filter_by_pid', 'legs_geo', 8397)]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "row_counts" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[('0_initial', 'individuals', 2960),\n", + " ('0_initial', 'households', 1709),\n", + " ('0_initial', 'activities', 11357),\n", + " ('0_initial', 'legs', 8397),\n", + " ('0_initial', 'legs_geo', 8397),\n", + " ('1_filter_by_pid', 'individuals', 2931),\n", + " ('1_filter_by_pid', 'households', 1696),\n", + " ('1_filter_by_pid', 'activities', 11328),\n", + " ('1_filter_by_pid', 'legs', 8397),\n", + " ('1_filter_by_pid', 'legs_geo', 8397)]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "sorted(row_counts, key=lambda x: x[0])" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pidhidozonedzonepurporigin activitydestination activitymodeseqtsttetdurationstart_location_idend_location_idstart_location_geometry_wktend_location_geometry_wkt
5945492107E02002330E02006875otherhomeotherwalk1.01900-01-01 08:32:001900-01-01 09:05:000:33:00None156862412NonePOINT (-172747.9714739804 7132434.416198847)
6045502107E02002330E02002390otherhomeotherwalk1.01900-01-01 08:32:001900-01-01 09:05:000:33:00None859863886NonePOINT (-164145.12529813123 7133893.434519876)
9554862499E02002330E02002330homeeducationhomecar_passenger2.01900-01-01 15:25:001900-01-01 15:29:000:04:00None1328971154NonePOINT (-167258.92084511882 7143476.774006749)
136139755990E02002332E02002374shophomeshopcar_passenger1.01900-01-01 12:20:001900-01-01 12:25:000:05:00None944325256NonePOINT (-176622.00253619973 7134449.399056955)
138146256309E02002332E02002427otherhomeotherwalk1.01900-01-01 07:30:001900-01-01 08:00:000:30:00None1392368056NonePOINT (-164873.8980231174 7122957.632887748)
...................................................
8146750434320294NoneE02006852homeeducationhomewalk2.01900-01-01 15:30:001900-01-01 15:50:000:20:00None400341812NonePOINT (-175859.81928684 7137067.487501181)
8226757296322904NoneE02006852homeeducationhomecar_passenger2.01900-01-01 16:30:001900-01-01 16:33:000:03:00None893100964NonePOINT (-177193.1389936416 7138848.613389791)
8246761165324230NoneE02006861homeworkhomecar_passenger2.01900-01-01 16:30:001900-01-01 16:35:000:05:00None894561802NonePOINT (-175350.8653553922 7135901.867221266)
8281768117326237E02006861E02006875shophomeshoppt1.01900-01-01 11:56:001900-01-01 12:30:000:34:00None310161982NonePOINT (-170556.39694620587 7131821.287018999)
8329784710330759E02006875E02002347otherhomeotherwalk1.01900-01-01 09:00:001900-01-01 09:20:000:20:00None1083335384NonePOINT (-173477.85368187493 7141276.259348786)
\n", + "

364 rows × 16 columns

\n", + "
" + ], + "text/plain": [ + " pid hid ozone dzone purp origin activity \\\n", + "59 4549 2107 E02002330 E02006875 other home \n", + "60 4550 2107 E02002330 E02002390 other home \n", + "95 5486 2499 E02002330 E02002330 home education \n", + "136 13975 5990 E02002332 E02002374 shop home \n", + "138 14625 6309 E02002332 E02002427 other home \n", + "... ... ... ... ... ... ... \n", + "8146 750434 320294 None E02006852 home education \n", + "8226 757296 322904 None E02006852 home education \n", + "8246 761165 324230 None E02006861 home work \n", + "8281 768117 326237 E02006861 E02006875 shop home \n", + "8329 784710 330759 E02006875 E02002347 other home \n", + "\n", + " destination activity mode seq tst \\\n", + "59 other walk 1.0 1900-01-01 08:32:00 \n", + "60 other walk 1.0 1900-01-01 08:32:00 \n", + "95 home car_passenger 2.0 1900-01-01 15:25:00 \n", + "136 shop car_passenger 1.0 1900-01-01 12:20:00 \n", + "138 other walk 1.0 1900-01-01 07:30:00 \n", + "... ... ... ... ... \n", + "8146 home walk 2.0 1900-01-01 15:30:00 \n", + "8226 home car_passenger 2.0 1900-01-01 16:30:00 \n", + "8246 home car_passenger 2.0 1900-01-01 16:30:00 \n", + "8281 shop pt 1.0 1900-01-01 11:56:00 \n", + "8329 other walk 1.0 1900-01-01 09:00:00 \n", + "\n", + " tet duration start_location_id end_location_id \\\n", + "59 1900-01-01 09:05:00 0:33:00 None 156862412 \n", + "60 1900-01-01 09:05:00 0:33:00 None 859863886 \n", + "95 1900-01-01 15:29:00 0:04:00 None 1328971154 \n", + "136 1900-01-01 12:25:00 0:05:00 None 944325256 \n", + "138 1900-01-01 08:00:00 0:30:00 None 1392368056 \n", + "... ... ... ... ... \n", + "8146 1900-01-01 15:50:00 0:20:00 None 400341812 \n", + "8226 1900-01-01 16:33:00 0:03:00 None 893100964 \n", + "8246 1900-01-01 16:35:00 0:05:00 None 894561802 \n", + "8281 1900-01-01 12:30:00 0:34:00 None 310161982 \n", + "8329 1900-01-01 09:20:00 0:20:00 None 1083335384 \n", + "\n", + " start_location_geometry_wkt \\\n", + "59 None \n", + "60 None \n", + "95 None \n", + "136 None \n", + "138 None \n", + "... ... \n", + "8146 None \n", + "8226 None \n", + "8246 None \n", + "8281 None \n", + "8329 None \n", + "\n", + " end_location_geometry_wkt \n", + "59 POINT (-172747.9714739804 7132434.416198847) \n", + "60 POINT (-164145.12529813123 7133893.434519876) \n", + "95 POINT (-167258.92084511882 7143476.774006749) \n", + "136 POINT (-176622.00253619973 7134449.399056955) \n", + "138 POINT (-164873.8980231174 7122957.632887748) \n", + "... ... \n", + "8146 POINT (-175859.81928684 7137067.487501181) \n", + "8226 POINT (-177193.1389936416 7138848.613389791) \n", + "8246 POINT (-175350.8653553922 7135901.867221266) \n", + "8281 POINT (-170556.39694620587 7131821.287018999) \n", + "8329 POINT (-173477.85368187493 7141276.259348786) \n", + "\n", + "[364 rows x 16 columns]" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# all rows where start_location_geometry_wkt is null\n", "legs_geo[legs_geo['start_location_geometry_wkt'].isnull()]\n", @@ -2066,9 +2763,337 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pidhidozonedzonepurporigin activitydestination activitymodeseqtsttetdurationstart_location_idend_location_idstart_locend_loc
019989E02002330E02006875shophomeshopcar1.01900-01-01 09:00:001900-01-01 09:15:000:15:001328970162408195016POINT (-167397.56531315646 7143640.45559949)POINT (-171371.01028764987 7131169.312723003)
119989E02006875E02002330homeshophomecar2.01900-01-01 09:45:001900-01-01 10:00:000:15:004081950161328970162POINT (-171371.01028764987 7131169.312723003)POINT (-167397.56531315646 7143640.45559949)
220089E02002330E02006875shophomeshopcar_passenger1.01900-01-01 09:00:001900-01-01 09:15:000:15:00132897016254957076POINT (-167397.56531315646 7143640.45559949)POINT (-171754.62143938692 7132283.384033667)
320089E02006875E02002330homeshophomecar_passenger2.01900-01-01 09:45:001900-01-01 10:00:000:15:00549570761328970162POINT (-171754.62143938692 7132283.384033667)POINT (-167397.56531315646 7143640.45559949)
4312139E02002330E02002402otherhomeothercar1.01900-01-01 09:00:001900-01-01 09:07:000:07:00136415719823376925POINT (-165837.93994546734 7146289.198739764)POINT (-158590.10429323144 7132494.796008638)
...................................................
8392791889333558E02006876E02006875shophomeshopwalk1.01900-01-01 11:00:001900-01-01 11:45:000:45:0012344842261904725674POINT (-171040.97173284873 7127547.111372213)POINT (-172391.3534696827 7131595.359505144)
8393791889333558E02006875E02006876homeshophomecar_passenger2.01900-01-01 13:30:001900-01-01 13:50:000:20:0019047256741234484226POINT (-172391.3534696827 7131595.359505144)POINT (-171040.97173284873 7127547.111372213)
8394794272334647E02006876E02006875otherhomeothercar1.01900-01-01 11:00:001900-01-01 11:15:000:15:00123890187646375628POINT (-170465.43907988604 7128930.59006113)POINT (-172078.9593115899 7132361.310596656)
8395794272334647E02006875E02006876homeotherhomecar2.01900-01-01 12:40:001900-01-01 12:55:000:15:00463756281238901876POINT (-172078.9593115899 7132361.310596656)POINT (-170465.43907988604 7128930.59006113)
8396794273334647E02006876E02002337workhomeworkcar1.01900-01-01 09:20:001900-01-01 10:45:001:25:00123890187611990461POINT (-170465.43907988604 7128930.59006113)POINT (-190466.878660929 7146766.5424680635)
\n", + "

8397 rows × 16 columns

\n", + "
" + ], + "text/plain": [ + " pid hid ozone dzone purp origin activity \\\n", + "0 199 89 E02002330 E02006875 shop home \n", + "1 199 89 E02006875 E02002330 home shop \n", + "2 200 89 E02002330 E02006875 shop home \n", + "3 200 89 E02006875 E02002330 home shop \n", + "4 312 139 E02002330 E02002402 other home \n", + "... ... ... ... ... ... ... \n", + "8392 791889 333558 E02006876 E02006875 shop home \n", + "8393 791889 333558 E02006875 E02006876 home shop \n", + "8394 794272 334647 E02006876 E02006875 other home \n", + "8395 794272 334647 E02006875 E02006876 home other \n", + "8396 794273 334647 E02006876 E02002337 work home \n", + "\n", + " destination activity mode seq tst \\\n", + "0 shop car 1.0 1900-01-01 09:00:00 \n", + "1 home car 2.0 1900-01-01 09:45:00 \n", + "2 shop car_passenger 1.0 1900-01-01 09:00:00 \n", + "3 home car_passenger 2.0 1900-01-01 09:45:00 \n", + "4 other car 1.0 1900-01-01 09:00:00 \n", + "... ... ... ... ... \n", + "8392 shop walk 1.0 1900-01-01 11:00:00 \n", + "8393 home car_passenger 2.0 1900-01-01 13:30:00 \n", + "8394 other car 1.0 1900-01-01 11:00:00 \n", + "8395 home car 2.0 1900-01-01 12:40:00 \n", + "8396 work car 1.0 1900-01-01 09:20:00 \n", + "\n", + " tet duration start_location_id end_location_id \\\n", + "0 1900-01-01 09:15:00 0:15:00 1328970162 408195016 \n", + "1 1900-01-01 10:00:00 0:15:00 408195016 1328970162 \n", + "2 1900-01-01 09:15:00 0:15:00 1328970162 54957076 \n", + "3 1900-01-01 10:00:00 0:15:00 54957076 1328970162 \n", + "4 1900-01-01 09:07:00 0:07:00 1364157198 23376925 \n", + "... ... ... ... ... \n", + "8392 1900-01-01 11:45:00 0:45:00 1234484226 1904725674 \n", + "8393 1900-01-01 13:50:00 0:20:00 1904725674 1234484226 \n", + "8394 1900-01-01 11:15:00 0:15:00 1238901876 46375628 \n", + "8395 1900-01-01 12:55:00 0:15:00 46375628 1238901876 \n", + "8396 1900-01-01 10:45:00 1:25:00 1238901876 11990461 \n", + "\n", + " start_loc \\\n", + "0 POINT (-167397.56531315646 7143640.45559949) \n", + "1 POINT (-171371.01028764987 7131169.312723003) \n", + "2 POINT (-167397.56531315646 7143640.45559949) \n", + "3 POINT (-171754.62143938692 7132283.384033667) \n", + "4 POINT (-165837.93994546734 7146289.198739764) \n", + "... ... \n", + "8392 POINT (-171040.97173284873 7127547.111372213) \n", + "8393 POINT (-172391.3534696827 7131595.359505144) \n", + "8394 POINT (-170465.43907988604 7128930.59006113) \n", + "8395 POINT (-172078.9593115899 7132361.310596656) \n", + "8396 POINT (-170465.43907988604 7128930.59006113) \n", + "\n", + " end_loc \n", + "0 POINT (-171371.01028764987 7131169.312723003) \n", + "1 POINT (-167397.56531315646 7143640.45559949) \n", + "2 POINT (-171754.62143938692 7132283.384033667) \n", + "3 POINT (-167397.56531315646 7143640.45559949) \n", + "4 POINT (-158590.10429323144 7132494.796008638) \n", + "... ... \n", + "8392 POINT (-172391.3534696827 7131595.359505144) \n", + "8393 POINT (-171040.97173284873 7127547.111372213) \n", + "8394 POINT (-172078.9593115899 7132361.310596656) \n", + "8395 POINT (-170465.43907988604 7128930.59006113) \n", + "8396 POINT (-190466.878660929 7146766.5424680635) \n", + "\n", + "[8397 rows x 16 columns]" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# TODO rename in 3.3_assign_facility_all script\n", "# rename start_location_geometry_wkt and end_location_geometry_wkt to start_loc and end_loc\n", @@ -2085,7 +3110,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -2155,7 +3180,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -2168,18 +3193,65 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[('0_initial', 'individuals', 2960),\n", + " ('0_initial', 'households', 1709),\n", + " ('0_initial', 'activities', 11357),\n", + " ('0_initial', 'legs', 8397),\n", + " ('0_initial', 'legs_geo', 8397),\n", + " ('1_filter_by_pid', 'individuals', 2931),\n", + " ('1_filter_by_pid', 'households', 1696),\n", + " ('1_filter_by_pid', 'activities', 11328),\n", + " ('1_filter_by_pid', 'legs', 8397),\n", + " ('1_filter_by_pid', 'legs_geo', 8397),\n", + " ('2_filter_no_location', 'individuals', 2623),\n", + " ('2_filter_no_location', 'households', 1585),\n", + " ('2_filter_no_location', 'activities', 10258),\n", + " ('2_filter_no_location', 'legs', 7635),\n", + " ('2_filter_no_location', 'legs_geo', 7635)]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "row_counts" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0_initial - activities - 11357 rows: 100.0% rows remaining\n", + "1_filter_by_pid - activities - 11328 rows: 99.7% rows remaining\n", + "2_filter_no_location - activities - 10258 rows: 90.3% rows remaining\n", + "0_initial - households - 1709 rows: 100.0% rows remaining\n", + "1_filter_by_pid - households - 1696 rows: 99.2% rows remaining\n", + "2_filter_no_location - households - 1585 rows: 92.7% rows remaining\n", + "0_initial - individuals - 2960 rows: 100.0% rows remaining\n", + "1_filter_by_pid - individuals - 2931 rows: 99.0% rows remaining\n", + "2_filter_no_location - individuals - 2623 rows: 88.6% rows remaining\n", + "0_initial - legs - 8397 rows: 100.0% rows remaining\n", + "1_filter_by_pid - legs - 8397 rows: 100.0% rows remaining\n", + "2_filter_no_location - legs - 7635 rows: 90.9% rows remaining\n", + "0_initial - legs_geo - 8397 rows: 100.0% rows remaining\n", + "1_filter_by_pid - legs_geo - 8397 rows: 100.0% rows remaining\n", + "2_filter_no_location - legs_geo - 7635 rows: 90.9% rows remaining\n" + ] + } + ], "source": [ "def calculate_percentage_remaining(row_counts):\n", " \"\"\"\n", @@ -2232,9 +3304,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_3327128/1617488548.py:8: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " legs_geo[\"start_loc\"] = legs_geo[\"start_loc\"].apply(convert_to_point)\n", + "/tmp/ipykernel_3327128/1617488548.py:9: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " legs_geo[\"end_loc\"] = legs_geo[\"end_loc\"].apply(convert_to_point)\n" + ] + } + ], "source": [ "# Function to convert to Point if not already a Point\n", "def convert_to_point(value):\n", @@ -2262,7 +3360,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -2328,9 +3426,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "Using simple trip based purpose parser, this assumes first activity is 'home'.\n", + "If you do not wish to assume this, try setting 'tour_based' = True (default).\n", + "\n", + "Using freq of 'None' for all trips.\n" + ] + } + ], "source": [ "population = load_travel_diary(\n", " trips=legs_geo,\n", @@ -2344,9 +3454,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Person: 200\n", + "{'hzone': 'E02002330', 'age': 38, 'sex': 'female', 'isPassenger': True, 'hasPTSubscription': False, 'isStudent': False, 'hhlIncome': 34167.0, 'CarAvailability': 'yes', 'BicycleAvailability': 'yes', 'hasLicence': True}\n", + "0:\tActivity(act:home, location:POINT (-167397.56531315646 7143640.45559949), time:00:00:00 --> 09:00:00, duration:9:00:00)\n", + "1:\tLeg(mode:car_passenger, area:POINT (-167397.56531315646 7143640.45559949) --> POINT (-171754.62143938692 7132283.384033667), time:09:00:00 --> 09:15:00, duration:0:15:00)\n", + "2:\tActivity(act:shop, location:POINT (-171754.62143938692 7132283.384033667), time:09:15:00 --> 09:45:00, duration:0:30:00)\n", + "3:\tLeg(mode:car_passenger, area:POINT (-171754.62143938692 7132283.384033667) --> POINT (-167397.56531315646 7143640.45559949), time:09:45:00 --> 10:00:00, duration:0:15:00)\n", + "4:\tActivity(act:home, location:POINT (-167397.56531315646 7143640.45559949), time:10:00:00 --> 00:00:00, duration:14:00:00)\n" + ] + } + ], "source": [ "population[89][200].print()" ] From 8d4b017e2b4a12e121c46c47f9d5496e352d9599 Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Fri, 13 Dec 2024 16:42:17 +0100 Subject: [PATCH 6/8] add taxi mode --- .../2_match_households_and_individuals.ipynb | 346 +++++++++--------- scripts/2_match_households_and_individuals.py | 2 +- scripts/3.2.2_assign_primary_zone_work.py | 2 +- scripts/run_pipeline.sh | 6 +- src/acbm/assigning/select_zone_primary.py | 2 +- src/acbm/assigning/utils.py | 2 + 6 files changed, 181 insertions(+), 179 deletions(-) diff --git a/notebooks/2_match_households_and_individuals.ipynb b/notebooks/2_match_households_and_individuals.ipynb index 94d53a1..50c7578 100644 --- a/notebooks/2_match_households_and_individuals.ipynb +++ b/notebooks/2_match_households_and_individuals.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -64,17 +64,17 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# useful variables\n", - "region = \"west-yorkshire\"" + "region = \"leeds\"" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -144,121 +144,121 @@ " 0\n", " 0\n", " NaN\n", - " {'concert_f': 1.2791347489984115e-31, 'concert...\n", - " [1583, 13161]\n", - " [1582, 13160]\n", - " E02002183_0001_001\n", - " 11291218\n", - " 1\n", - " 2905399\n", - " E02002183\n", - " E00053954\n", - " [0]\n", - " 24.879356\n", - " False\n", + " {'concert_f': 3.6287833784528047e-16, 'concert...\n", + " [954, 1037, 1234, 2981, 6290, 9535, 10385, 106...\n", + " [955, 1036, 1235, 2980, 6291, 9536, 10384, 106...\n", + " E02002330_0001_001\n", + " 34051017\n", + " 1\n", + " 2911721\n", + " E02002330\n", + " E00059034\n", + " [0, 1]\n", + " 27.506445\n", " False\n", " False\n", + " True\n", " NaN\n", - " 3.0\n", " 2.0\n", - " J\n", - " 58.0\n", - " 1115.0\n", + " 4.0\n", + " Q\n", + " 88.0\n", + " 1184.0\n", " 6\n", " NaN\n", " NaN\n", - " E02002183_0001\n", + " E02002330_0001\n", " 1.0\n", - " 1.0\n", - " NaN\n", " 2.0\n", + " NaN\n", + " 6.0\n", " True\n", " 2.0\n", " 2\n", " 1\n", - " 86\n", + " 68\n", " 1\n", " 1.0\n", " \n", " \n", " 1\n", " 1\n", - " 1\n", + " 0\n", " NaN\n", - " {'concert_f': 9.743248151956307e-21, 'concert_...\n", - " [2900, 4948, 4972, 7424, 10284, 10586, 12199, ...\n", - " [2901, 4949, 4973, 7425, 10285, 10585, 12198, ...\n", - " E02002183_0002_001\n", - " 17291219\n", - " 1\n", - " 2905308\n", - " E02002183\n", - " E00053953\n", - " [1, 2]\n", - " 27.491207\n", - " False\n", + " {'concert_f': 9.903925281880971e-14, 'concert_...\n", + " [3435, 6069, 13203, 14704]\n", + " [3436, 6068, 13202, 14703]\n", + " E02002330_0001_002\n", + " 21040818\n", + " 1\n", + " 2904618\n", + " E02002330\n", + " E00059034\n", + " [0, 1]\n", + " 30.527805\n", + " True\n", " False\n", " True\n", - " NaN\n", + " 7.0\n", + " 2.0\n", " 3.0\n", - " NaN\n", - " C\n", - " 25.0\n", - " 1121.0\n", + " I\n", + " 56.0\n", + " 5434.0\n", " 6\n", " NaN\n", " NaN\n", - " E02002183_0002\n", + " E02002330_0001\n", " 1.0\n", - " 3.0\n", + " 2.0\n", " NaN\n", " 6.0\n", " True\n", " 2.0\n", " 2\n", + " 2\n", + " 65\n", " 1\n", - " 74\n", - " 3\n", - " 1.0\n", + " 6.0\n", " \n", " \n", " 2\n", " 2\n", " 1\n", " NaN\n", - " {'concert_f': 8.46716103992468e-16, 'concert_f...\n", - " [3010, 6389, 9448, 10184, 11598]\n", - " [3011, 6388, 9447, 10183, 11599]\n", - " E02002183_0002_002\n", - " 17070713\n", - " 2\n", - " 2907681\n", - " E02002183\n", - " E00053953\n", - " [1, 2]\n", - " 17.310829\n", + " {'concert_f': 1.2791347489984115e-31, 'concert...\n", + " [762, 5168, 6201, 8977]\n", + " [761, 5169, 6200, 8976]\n", + " E02002330_0002_001\n", + " 11131017\n", + " 1\n", + " 2902311\n", + " E02002330\n", + " E00059022\n", + " [2]\n", + " 22.261669\n", + " False\n", + " False\n", " False\n", - " True\n", - " True\n", " NaN\n", " 2.0\n", - " 4.0\n", - " P\n", - " 85.0\n", - " 2311.0\n", + " 3.0\n", + " Q\n", + " 86.0\n", + " 2211.0\n", " 6\n", " NaN\n", " NaN\n", - " E02002183_0002\n", + " E02002330_0002\n", + " 2.0\n", " 1.0\n", - " 3.0\n", " NaN\n", - " 6.0\n", + " 5.0\n", " True\n", - " 2.0\n", - " 2\n", + " 1.0\n", " 2\n", - " 68\n", + " 1\n", + " 86\n", " 1\n", " 2.0\n", " \n", @@ -266,40 +266,40 @@ " 3\n", " 3\n", " 2\n", - " 56126.0\n", - " {'concert_f': 1.8844366073608398, 'concert_fs'...\n", - " [366, 867, 2096, 3678, 5212, 5450, 8145, 9254,...\n", - " [365, 868, 2097, 3677, 5213, 5451, 8146, 9253,...\n", - " E02002183_0003_001\n", - " 20310313\n", - " 1\n", - " 2902817\n", - " E02002183\n", - " E00053689\n", - " [3, 4]\n", - " 20.852091\n", + " NaN\n", + " {'concert_f': 7.754311082130982e-10, 'concert_...\n", + " [1580, 5417, 5956, 12901]\n", + " [1581, 5416, 5957, 12900]\n", + " E02002330_0003_001\n", + " 15020311\n", + " 1\n", + " 2911131\n", + " E02002330\n", + " E00059022\n", + " [3, 4, 5]\n", + " 21.434204\n", " False\n", " False\n", " False\n", " NaN\n", - " 2.0\n", " 1.0\n", - " C\n", - " 31.0\n", - " 3422.0\n", - " 1\n", - " 32857.859375\n", - " 14.360952\n", - " E02002183_0003\n", - " 4.0\n", " 3.0\n", + " O\n", + " 84.0\n", + " 3314.0\n", + " 6\n", + " NaN\n", + " NaN\n", + " E02002330_0003\n", + " 4.0\n", + " 1.0\n", " NaN\n", " 6.0\n", " True\n", " 2.0\n", " 1\n", " 1\n", - " 27\n", + " 58\n", " 1\n", " 4.0\n", " \n", @@ -307,42 +307,42 @@ " 4\n", " 4\n", " 2\n", - " NaN\n", - " {'concert_f': 4.877435207366943, 'concert_fs':...\n", - " [1289, 12528, 12870]\n", - " [1288, 12529, 12871]\n", - " E02002183_0003_002\n", - " 13010909\n", - " 3\n", - " 2900884\n", - " E02002183\n", - " E00053689\n", - " [3, 4]\n", - " 20.032526\n", - " False\n", + " 508.0\n", + " {'concert_f': 2.1388457227544677e-08, 'concert...\n", + " [318, 3145, 10496, 12819, 13943]\n", + " [319, 3144, 10495, 12818, 13942]\n", + " E02002330_0003_002\n", + " 20090607\n", + " 1\n", + " 2909582\n", + " E02002330\n", + " E00059022\n", + " [3, 4, 5]\n", + " 12.644703\n", " False\n", " False\n", - " 1.0\n", - " 2.0\n", + " True\n", + " NaN\n", " 3.0\n", - " J\n", - " 62.0\n", - " 7214.0\n", + " 2.0\n", + " O\n", + " 84.0\n", + " 1131.0\n", " 1\n", - " 18162.451172\n", - " 9.439944\n", - " E02002183_0003\n", + " 36646.464844\n", + " 19.576103\n", + " E02002330_0003\n", " 4.0\n", - " 3.0\n", + " 1.0\n", " NaN\n", " 6.0\n", " True\n", " 2.0\n", " 1\n", " 2\n", - " 26\n", + " 56\n", " 1\n", - " 6.0\n", + " 1.0\n", " \n", " \n", "\n", @@ -351,76 +351,76 @@ "text/plain": [ " id household workplace \\\n", "0 0 0 NaN \n", - "1 1 1 NaN \n", + "1 1 0 NaN \n", "2 2 1 NaN \n", - "3 3 2 56126.0 \n", - "4 4 2 NaN \n", + "3 3 2 NaN \n", + "4 4 2 508.0 \n", "\n", " events \\\n", - "0 {'concert_f': 1.2791347489984115e-31, 'concert... \n", - "1 {'concert_f': 9.743248151956307e-21, 'concert_... \n", - "2 {'concert_f': 8.46716103992468e-16, 'concert_f... \n", - "3 {'concert_f': 1.8844366073608398, 'concert_fs'... \n", - "4 {'concert_f': 4.877435207366943, 'concert_fs':... \n", + "0 {'concert_f': 3.6287833784528047e-16, 'concert... \n", + "1 {'concert_f': 9.903925281880971e-14, 'concert_... \n", + "2 {'concert_f': 1.2791347489984115e-31, 'concert... \n", + "3 {'concert_f': 7.754311082130982e-10, 'concert_... \n", + "4 {'concert_f': 2.1388457227544677e-08, 'concert... \n", "\n", " weekday_diaries \\\n", - "0 [1583, 13161] \n", - "1 [2900, 4948, 4972, 7424, 10284, 10586, 12199, ... \n", - "2 [3010, 6389, 9448, 10184, 11598] \n", - "3 [366, 867, 2096, 3678, 5212, 5450, 8145, 9254,... \n", - "4 [1289, 12528, 12870] \n", + "0 [954, 1037, 1234, 2981, 6290, 9535, 10385, 106... \n", + "1 [3435, 6069, 13203, 14704] \n", + "2 [762, 5168, 6201, 8977] \n", + "3 [1580, 5417, 5956, 12901] \n", + "4 [318, 3145, 10496, 12819, 13943] \n", "\n", " weekend_diaries orig_pid \\\n", - "0 [1582, 13160] E02002183_0001_001 \n", - "1 [2901, 4949, 4973, 7425, 10285, 10585, 12198, ... E02002183_0002_001 \n", - "2 [3011, 6388, 9447, 10183, 11599] E02002183_0002_002 \n", - "3 [365, 868, 2097, 3677, 5213, 5451, 8146, 9253,... E02002183_0003_001 \n", - "4 [1288, 12529, 12871] E02002183_0003_002 \n", + "0 [955, 1036, 1235, 2980, 6291, 9536, 10384, 106... E02002330_0001_001 \n", + "1 [3436, 6068, 13202, 14703] E02002330_0001_002 \n", + "2 [761, 5169, 6200, 8976] E02002330_0002_001 \n", + "3 [1581, 5416, 5957, 12900] E02002330_0003_001 \n", + "4 [319, 3144, 10495, 12818, 13942] E02002330_0003_002 \n", "\n", - " id_tus_hh id_tus_p pid_hs msoa11cd oa11cd members bmi \\\n", - "0 11291218 1 2905399 E02002183 E00053954 [0] 24.879356 \n", - "1 17291219 1 2905308 E02002183 E00053953 [1, 2] 27.491207 \n", - "2 17070713 2 2907681 E02002183 E00053953 [1, 2] 17.310829 \n", - "3 20310313 1 2902817 E02002183 E00053689 [3, 4] 20.852091 \n", - "4 13010909 3 2900884 E02002183 E00053689 [3, 4] 20.032526 \n", + " id_tus_hh id_tus_p pid_hs msoa11cd oa11cd members bmi \\\n", + "0 34051017 1 2911721 E02002330 E00059034 [0, 1] 27.506445 \n", + "1 21040818 1 2904618 E02002330 E00059034 [0, 1] 30.527805 \n", + "2 11131017 1 2902311 E02002330 E00059022 [2] 22.261669 \n", + "3 15020311 1 2911131 E02002330 E00059022 [3, 4, 5] 21.434204 \n", + "4 20090607 1 2909582 E02002330 E00059022 [3, 4, 5] 12.644703 \n", "\n", " has_cardiovascular_disease has_diabetes has_high_blood_pressure \\\n", - "0 False False False \n", - "1 False False True \n", - "2 False True True \n", + "0 False False True \n", + "1 True False True \n", + "2 False False False \n", "3 False False False \n", - "4 False False False \n", + "4 False False True \n", "\n", " number_medications self_assessed_health life_satisfaction sic1d2007 \\\n", - "0 NaN 3.0 2.0 J \n", - "1 NaN 3.0 NaN C \n", - "2 NaN 2.0 4.0 P \n", - "3 NaN 2.0 1.0 C \n", - "4 1.0 2.0 3.0 J \n", + "0 NaN 2.0 4.0 Q \n", + "1 7.0 2.0 3.0 I \n", + "2 NaN 2.0 3.0 Q \n", + "3 NaN 1.0 3.0 O \n", + "4 NaN 3.0 2.0 O \n", "\n", " sic2d2007 soc2010 pwkstat salary_yearly salary_hourly hid \\\n", - "0 58.0 1115.0 6 NaN NaN E02002183_0001 \n", - "1 25.0 1121.0 6 NaN NaN E02002183_0002 \n", - "2 85.0 2311.0 6 NaN NaN E02002183_0002 \n", - "3 31.0 3422.0 1 32857.859375 14.360952 E02002183_0003 \n", - "4 62.0 7214.0 1 18162.451172 9.439944 E02002183_0003 \n", + "0 88.0 1184.0 6 NaN NaN E02002330_0001 \n", + "1 56.0 5434.0 6 NaN NaN E02002330_0001 \n", + "2 86.0 2211.0 6 NaN NaN E02002330_0002 \n", + "3 84.0 3314.0 6 NaN NaN E02002330_0003 \n", + "4 84.0 1131.0 1 36646.464844 19.576103 E02002330_0003 \n", "\n", " nssec8 accommodation_type communal_type num_rooms central_heat tenure \\\n", - "0 1.0 1.0 NaN 2.0 True 2.0 \n", - "1 1.0 3.0 NaN 6.0 True 2.0 \n", - "2 1.0 3.0 NaN 6.0 True 2.0 \n", - "3 4.0 3.0 NaN 6.0 True 2.0 \n", - "4 4.0 3.0 NaN 6.0 True 2.0 \n", + "0 1.0 2.0 NaN 6.0 True 2.0 \n", + "1 1.0 2.0 NaN 6.0 True 2.0 \n", + "2 2.0 1.0 NaN 5.0 True 1.0 \n", + "3 4.0 1.0 NaN 6.0 True 2.0 \n", + "4 4.0 1.0 NaN 6.0 True 2.0 \n", "\n", " num_cars sex age_years ethnicity nssec8_household \n", - "0 2 1 86 1 1.0 \n", - "1 2 1 74 3 1.0 \n", - "2 2 2 68 1 2.0 \n", - "3 1 1 27 1 4.0 \n", - "4 1 2 26 1 6.0 " + "0 2 1 68 1 1.0 \n", + "1 2 2 65 1 6.0 \n", + "2 2 1 86 1 2.0 \n", + "3 1 1 58 1 4.0 \n", + "4 1 2 56 1 1.0 " ] }, - "execution_count": 3, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -433,7 +433,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -447,7 +447,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -470,7 +470,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -487,7 +487,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -539,7 +539,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -587,7 +587,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -635,11 +635,11 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "years = [2018, 2019, 2021, 2022]\n", + "years = [2019, 2021, 2022]\n", "\n", "nts_individuals = nts_filter_by_year(nts_individuals, psu, years)\n", "nts_households = nts_filter_by_year(nts_households, psu, years)\n", diff --git a/scripts/2_match_households_and_individuals.py b/scripts/2_match_households_and_individuals.py index 476d429..864a999 100644 --- a/scripts/2_match_households_and_individuals.py +++ b/scripts/2_match_households_and_individuals.py @@ -1040,7 +1040,7 @@ def get_interim_path( 9: "pt", #'Non-local bus', 10: "pt", #'London Underground', 11: "pt", #'Surface Rail', - 12: "car_passenger", #'Taxi/minicab', + 12: "taxi", #'Taxi/minicab', 13: "pt", #'Other public transport', -10: "DEAD", -8: "NA", diff --git a/scripts/3.2.2_assign_primary_zone_work.py b/scripts/3.2.2_assign_primary_zone_work.py index 66ad872..47b964f 100644 --- a/scripts/3.2.2_assign_primary_zone_work.py +++ b/scripts/3.2.2_assign_primary_zone_work.py @@ -101,7 +101,7 @@ def main(config_file): "Train": "pt", "Underground, metro, light rail, tram": "pt", "On foot": "walk", - "Taxi": "car_passenger", + "Taxi": "taxi", "Other method of travel to work": "other", "Bicycle": "cycle", "Passenger in a car or van": "car_passenger", diff --git a/scripts/run_pipeline.sh b/scripts/run_pipeline.sh index 025ed8c..9da0845 100755 --- a/scripts/run_pipeline.sh +++ b/scripts/run_pipeline.sh @@ -2,9 +2,9 @@ set -e -python scripts/0_preprocess_inputs.py --config_file $1 -python scripts/0.1_run_osmox.py --config_file $1 -python scripts/1_prep_synthpop.py --config_file $1 +# python scripts/0_preprocess_inputs.py --config_file $1 +# python scripts/0.1_run_osmox.py --config_file $1 +# python scripts/1_prep_synthpop.py --config_file $1 python scripts/2_match_households_and_individuals.py --config_file $1 python scripts/3.1_assign_primary_feasible_zones.py --config_file $1 python scripts/3.2.1_assign_primary_zone_edu.py --config_file $1 diff --git a/src/acbm/assigning/select_zone_primary.py b/src/acbm/assigning/select_zone_primary.py index 7b63435..e2b5877 100644 --- a/src/acbm/assigning/select_zone_primary.py +++ b/src/acbm/assigning/select_zone_primary.py @@ -241,7 +241,7 @@ def _get_zones_using_time_estimate( The zone that has the estimated time closest to the given time. """ - acceptable_modes = ["car", "car_passenger", "pt", "walk", "cycle"] + acceptable_modes = ["car", "car_passenger", "pt", "walk", "cycle", "taxi"] if mode is not None and mode not in acceptable_modes: error_message = f"Invalid mode: {mode}. Mode must be one of {acceptable_modes}." diff --git a/src/acbm/assigning/utils.py b/src/acbm/assigning/utils.py index 2f6caaf..27d9ce7 100644 --- a/src/acbm/assigning/utils.py +++ b/src/acbm/assigning/utils.py @@ -334,6 +334,7 @@ def zones_to_time_matrix( mode_speeds_mps = { "car": 20 * 1000 / 3600, "car_passenger": 20 * 1000 / 3600, + "taxi": 20 * 1000 / 3600, "pt": 15 * 1000 / 3600, "cycle": 15 * 1000 / 3600, "walk": 5 * 1000 / 3600, @@ -452,6 +453,7 @@ def intrazone_time(zones: gpd.GeoDataFrame, key_column: str) -> dict: mode_speeds_mps = { "car": 20 * 1000 / 3600, "car_passenger": 20 * 1000 / 3600, + "taxi": 20 * 1000 / 3600, "pt": 15 * 1000 / 3600, "cycle": 15 * 1000 / 3600, "walk": 5 * 1000 / 3600, From e7b375c0cf27bd272dddf9d1376ab67c14212406 Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Fri, 13 Dec 2024 16:46:44 +0100 Subject: [PATCH 7/8] edit optional type --- src/acbm/postprocessing/matsim.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/acbm/postprocessing/matsim.py b/src/acbm/postprocessing/matsim.py index b8d67c2..fd99b4c 100644 --- a/src/acbm/postprocessing/matsim.py +++ b/src/acbm/postprocessing/matsim.py @@ -1,5 +1,3 @@ -from typing import Optional - import numpy as np import pandas as pd @@ -282,8 +280,8 @@ def get_pt_subscription(individuals: pd.DataFrame, age_threshold=60): def get_students( individuals: pd.DataFrame, activities: pd.DataFrame, - age_base_threshold: Optional[int] = None, - age_upper_threshold: Optional[int] = None, + age_base_threshold: int | None = None, + age_upper_threshold: int | None = None, activity: str = "education", ) -> pd.DataFrame: """ From 4cde9a3982a070ee9e0016cc01b6251eb3aabd65 Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Fri, 13 Dec 2024 16:54:46 +0100 Subject: [PATCH 8/8] update config template --- config/README.md | 63 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 10 deletions(-) diff --git a/config/README.md b/config/README.md index c1bf75e..8794cb9 100644 --- a/config/README.md +++ b/config/README.md @@ -3,31 +3,74 @@ The config.toml file has an explanation for each parameter. You can copy the tom ``` toml [parameters] seed = 0 -region = "leeds" # this is used to query poi data from osm and to load in SPC data -number_of_households = 5000 # how many people from the SPC do we want to run the model for? Comment out if you want to run the analysis on the entire SPC populaiton -zone_id = "OA21CD" # "OA21CD": OA level, "MSOA11CD": MSOA level -travel_times = true # Only set to true if you have travel time matrix at the level specified in boundary_geography -boundary_geography = "OA" + # this is used to query poi data from osm and to load in SPC data +region = "leeds" +# how many people from the SPC do we want to run the model for? Comment out if you want to run the analysis on the entire SPC populaiton +number_of_households = 2500 +# "OA21CD": OA level, "MSOA11CD": MSOA level +zone_id = "MSOA21CD" + # Only set to true if you have travel time matrix at the level specified in boundary_geography +travel_times = false +boundary_geography = "MSOA" +# NTS years to use +nts_years = [2019, 2021, 2022] +# NTS regions to use +nts_regions = [ + 'Yorkshire and the Humber', + 'North West', + 'North East', + 'East Midlands', + 'West Midlands', + 'East of England', + 'South East', + 'South West'] +# nts day of the week to use +# 1: Monday, 2: Tuesday, 3: Wednesday, 4: Thursday, 5: Friday, 6: Saturday, 7: Sunday +nts_day_of_week = 3 +# what crs do we want the output to be in? (just add the number, e.g. 3857) +output_crs = 3857 [matching] # for optional and required columns, see the [iterative_match_categorical](https://github.com/Urban-Analytics-Technology-Platform/acbm/blob/ca181c54d7484ebe44706ff4b43c26286b22aceb/src/acbm/matching.py#L110) function # Do not add any column not listed below. You can only move a column from optional to require (or vise versa) -required_columns = ["number_adults", "number_children"] +required_columns = [ + "number_adults", + "number_children", + "num_pension_age", +] optional_columns = [ "number_cars", - "num_pension_age", "rural_urban_2_categories", "employment_status", "tenure_status", ] -n_matches = 10 # What is the maximum number of NTS matches we want for each SPC household? +# What is the maximum number of NTS matches we want for each SPC household? +n_matches = 10 [work_assignment] -use_percentages = true # if true, optimization problem will try to minimize percentage difference at OD level (not absolute numbers). Recommended to set it to true +commute_level = "MSOA" +# if true, optimization problem will try to minimize percentage difference at OD level (not absolute numbers). Recommended to set it to true +use_percentages = true # weights to add for each objective in the optimization problem weight_max_dev = 0.2 weight_total_dev = 0.8 -max_zones = 8 # maximum number of feasible zones to include in the optimization problem (less zones makes problem smaller - so faster, but at the cost of a better solution) +# maximum number of feasible zones to include in the optimization problem (less zones makes problem smaller - so faster, but at the cost of a better solution) +max_zones = 10 +[postprocessing] +pam_jitter = 30 +pam_min_duration = 10 +# for get_pt_subscription: everyone above this age has a subscription (pensioners get free travel) +# TODO: more sophisticated approach +pt_subscription_age = 66 +# to define if a person is a student: +# eveyone below this age is a student +student_age_base = 16 +# everyone below this age that has at least one "education" activity is a student +student_age_upper = 30 +# eveyone who uses one of the modes below is classified as a passenger (isPassenger = True) +modes_passenger = ['car_passenger', 'taxi'] +# yearly state pension: for getting hhlIncome of pensioners +state_pension = 11502 ```