Skip to content

Commit

Permalink
Merge pull request #71 from Urban-Analytics-Technology-Platform/53-paths
Browse files Browse the repository at this point in the history
* Revise the filepath structure for pipeline outputs, include @Property methods within the config providing the paths and refactor scripts to use these

* Add a `Population` class for reading pipeline outputs

* Fallback download option when pyrosm fails (e.g. for West Yorkshire)

* Configurable time tolerance

* Rewrite of the run pipeline script with Python

* Multiprocessing for script 3.3
  • Loading branch information
sgreenbury authored Jan 30, 2025
2 parents 1cec3f7 + 256ebde commit cdb1389
Show file tree
Hide file tree
Showing 36 changed files with 5,523 additions and 703 deletions.
58 changes: 29 additions & 29 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,37 +89,37 @@ The pipeline is a series of scripts that are run in sequence to generate the act
│   │   │   ├── trip_eul_2002-2022.tab
│   │   │   └── <other_nts_tables>.tab
│   │   ├── travel_times
│   │   | ├── oa
│   │   | | ├── travel_time_matrix.parquet
| | | └── msoa
│   │   | └── travel_time_matrix.parquet
│   │   ├── oa
│   │   | ├── travel_time_matrix.parquet
| | └── msoa
│   │   └── travel_time_matrix.parquet
│   │   ├── ODWP01EW_OA.zip
│   │   ├── ODWP15EW_MSOA_v1.zip
│   │   ── spc_output
│   │      ── <region>>_people_hh.parquet (Generated in Script 1)
│   │      ├── <region>>_people_tu.parquet (Generated in Script 1)
│   │      ├── raw
│   │         ├── <region>_households.parquet
│   │      │   ├── <region>_info_per_msoa.json
│   │      │   ├── <region>.pb
│   │         ├── <region>_people.parquet
│   │         ── <region>_time_use_diaries.parquet
│   │   │   │   ├── <region>_venues.parquet
   │   │   │   ├── README.md
│   ├── interim
   │   ├── assigning (Generated in Script 3)
│      ── matching (Generated in Script 2)
│   ── processed
   ── acbm_<config_name>_<date>
   │   ├── activities.csv
   │   ├── households.csv
   │   ├── legs.csv
   │   ├── legs_with_locations.parquet
   │   ├── people.csv
      ── plans.xml
│   ├── plots
│   │   ├── assigning
│   │   └── validation
│   │   ── spc_output
│   │      ── raw
│   │         ├── <region>_households.parquet
│   │         ├── <region>_info_per_msoa.json
│   │         ├── <region>.pb
│   │          ├── <region>_people.parquet
│   │          ├── <region>_time_use_diaries.parquet
│   │         ├── <region>_venues.parquet
│   │         ── README.md
│   ── outputs
└- <config_id>
   │  
├── interim
      ── <region>>_people_hh.parquet (Generated in Script 1)
      ├── assigning (Generated in Script 3)
│      └── matching (Generated in Script 2)
   ├── activities.csv
   ├── households.csv
   ├── legs.csv
   ├── legs_with_locations.parquet
   ├── people.csv
│    ── plans.xml
   ├── plots
   │   ├── assigning
   │   └── validation
```

## Step 1: Prepare Data Inputs
Expand Down
12 changes: 12 additions & 0 deletions config/base.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,15 @@ max_zones = 8 # maximum number of feasible zones to include in the opti
[postprocessing]
pam_jitter = 30
pam_min_duration = 10
# for get_pt_subscription: everyone above this age has a subscription (pensioners get free travel)
# TODO: more sophisticated approach
pt_subscription_age = 66
# to define if a person is a student:
# eveyone below this age is a student
student_age_base = 16
# everyone below this age that has at least one "education" activity is a student
student_age_upper = 30
# eveyone who uses one of the modes below is classified as a passenger (isPassenger = True)
modes_passenger = ['car_passenger', 'taxi']
# yearly state pension: for getting hhlIncome of pensioners
state_pension = 11502
33 changes: 33 additions & 0 deletions config/greater-london.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
[parameters]
seed = 0
region = "greater-london"
zone_id = "MSOA21CD"
travel_times = false
boundary_geography = "MSOA"
nts_years = [2019, 2021, 2022]
nts_regions = ["London"]
nts_day_of_week = 3
output_crs = 4326

[work_assignment]
use_percentages = true
weight_max_dev = 0.0
weight_total_dev = 1.0
max_zones = 4
commute_level = "MSOA"

[matching]
required_columns = ["number_adults", "number_children"]
optional_columns = [
"number_cars",
"num_pension_age",
"rural_urban_2_categories",
"employment_status",
"tenure_status",
]
n_matches = 10
chunk_size = 50000

[postprocessing]
pam_jitter = 30
pam_min_duration = 10
42 changes: 42 additions & 0 deletions config/leeds.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
[parameters]
seed = 0
region = "leeds"
zone_id = "OA21CD"
travel_times = false
boundary_geography = "OA"
nts_years = [2019, 2021, 2022]
nts_regions = [
'Yorkshire and the Humber',
'North West',
'North East',
'East Midlands',
'West Midlands',
'East of England',
'South East',
'South West',
]
nts_day_of_week = 3
output_crs = 4326

[work_assignment]
use_percentages = false
weight_max_dev = 0.0
weight_total_dev = 1.0
max_zones = 4
commute_level = "OA"

[matching]
required_columns = ["number_adults", "number_children"]
optional_columns = [
"number_cars",
"num_pension_age",
"rural_urban_2_categories",
"employment_status",
"tenure_status",
]
n_matches = 10
chunk_size = 50000

[postprocessing]
pam_jitter = 30
pam_min_duration = 10
42 changes: 42 additions & 0 deletions config/leeds_with_travel_times.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
[parameters]
seed = 0
region = "leeds"
zone_id = "OA21CD"
travel_times = true
boundary_geography = "OA"
nts_years = [2019, 2021, 2022]
nts_regions = [
'Yorkshire and the Humber',
'North West',
'North East',
'East Midlands',
'West Midlands',
'East of England',
'South East',
'South West',
]
nts_day_of_week = 3
output_crs = 4326

[work_assignment]
use_percentages = false
weight_max_dev = 0.0
weight_total_dev = 1.0
max_zones = 4
commute_level = "OA"

[matching]
required_columns = ["number_adults", "number_children"]
optional_columns = [
"number_cars",
"num_pension_age",
"rural_urban_2_categories",
"employment_status",
"tenure_status",
]
n_matches = 10
chunk_size = 50000

[postprocessing]
pam_jitter = 30
pam_min_duration = 10
4,406 changes: 4,406 additions & 0 deletions notebooks/Validation_AcBM_with_Cencus.ipynb

Large diffs are not rendered by default.

15 changes: 13 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

71 changes: 31 additions & 40 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "acbm"
version = "0.1.0"
authors = [
"Hussein Mahfouz <[email protected]>",
]
authors = ["Hussein Mahfouz <[email protected]>"]
homepage = "https://github.com/alan-turing-institute/acbm"
repository = "https://github.com/alan-turing-institute/acbm"
license = "Apache-2.0"
Expand All @@ -28,7 +26,7 @@ python = "^3.10"
pytest = { version = ">=6", optional = true }
pytest-cov = { version = ">=3", optional = true }
pandas = "^2.2.0"
uatk-spc = {git = "https://github.com/alan-turing-institute/uatk-spc.git", subdirectory = "python"}
uatk-spc = { git = "https://github.com/alan-turing-institute/uatk-spc.git", subdirectory = "python" }
geopandas = "^0.14.3"
matplotlib = "^3.8.3"
scikit-learn = "^1.4.1.post1"
Expand All @@ -43,9 +41,10 @@ tomlkit = "^0.13.0"
cml-pam = "0.3.2"
gdal = "<=3.8.4"
pandera = "^0.20.4"
osmox = {git = "https://github.com/arup-group/osmox"}
osmox = { git = "https://github.com/arup-group/osmox" }
pyrosm = "^0.6.2"
jsonschema = "^4.23.0"
jcs = "^0.2.1"

[tool.poetry.dev-dependencies]
pytest = ">= 6"
Expand All @@ -62,22 +61,13 @@ ipykernel = "^6.29.4"
minversion = "6.0"
addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"]
xfail_strict = true
filterwarnings = [
"error",
]
filterwarnings = ["error"]
log_cli_level = "INFO"
testpaths = [
"tests",
]
testpaths = ["tests"]

[tool.coverage]
run.source = ["acbm"]
port.exclude_lines = [
'pragma: no cover',
'\.\.\.',
'if typing.TYPE_CHECKING:',
]

port.exclude_lines = ['pragma: no cover', '\.\.\.', 'if typing.TYPE_CHECKING:']


[tool.ruff]
Expand All @@ -86,29 +76,30 @@ exclude = []
line-length = 88 # how long you want lines to be

[tool.ruff.format]
docstring-code-format = true # code snippets in docstrings will be formatted
docstring-code-format = true # code snippets in docstrings will be formatted

[tool.ruff.lint]
select = [
"E", "F", "W", # flake8
"B", # flake8-bugbear
"I", # isort
"ARG", # flake8-unused-arguments
"C4", # flake8-comprehensions
"EM", # flake8-errmsg
"ICN", # flake8-import-conventions
"ISC", # flake8-implicit-str-concat
"G", # flake8-logging-format
"PGH", # pygrep-hooks
"PIE", # flake8-pie
"PL", # pylint
"PT", # flake8-pytest-style
"RET", # flake8-return
"RUF", # Ruff-specific
"SIM", # flake8-simplify
"UP", # pyupgrade
"YTT", # flake8-2020
"EXE", # flake8-executable
"E",
"F",
"W", # flake8
"B", # flake8-bugbear
"I", # isort
"ARG", # flake8-unused-arguments
"C4", # flake8-comprehensions
"EM", # flake8-errmsg
"ICN", # flake8-import-conventions
"ISC", # flake8-implicit-str-concat
"G", # flake8-logging-format
"PGH", # pygrep-hooks
"PIE", # flake8-pie
"PL", # pylint
"RET", # flake8-return
"RUF", # Ruff-specific
"SIM", # flake8-simplify
"UP", # pyupgrade
"YTT", # flake8-2020
"EXE", # flake8-executable
]

ignore = [
Expand All @@ -118,7 +109,7 @@ ignore = [
"G004", # Logging statement uses f-string, not necessary here
]
unfixable = [
"F401", # Would remove unused imports
"F841", # Would remove unused variables
"F401", # Would remove unused imports
"F841", # Would remove unused variables
]
flake8-unused-arguments.ignore-variadic-names = true # allow unused *args/**kwargs
flake8-unused-arguments.ignore-variadic-names = true # allow unused *args/**kwargs
Loading

0 comments on commit cdb1389

Please sign in to comment.