diff --git a/docker/pgosm_flex.py b/docker/pgosm_flex.py index f02d884..803031f 100644 --- a/docker/pgosm_flex.py +++ b/docker/pgosm_flex.py @@ -51,7 +51,6 @@ def get_today(): help='Amount of RAM in GB available on the server running this process. Used to determine appropriate osm2pgsql command via osm2pgsql-tuner.com API.') @click.option('--region', required=False, show_default="north-america/us", - default="north-america/us", help='Region name matching the filename for data sourced from Geofabrik. e.g. north-america/us. Optional when --input-file is specified, otherwise required.') @click.option('--subregion', required=False, show_default="district-of-columbia", @@ -96,9 +95,9 @@ def run_pgosm_flex(layerset, layerset_path, ram, region, subregion, srid, skip_dump, debug, basepath, input_file): """Logic to run PgOSM Flex within Docker. """ - if region is None and input_file is None: - raise ValueError("either region or input_file must be provided") + validate_region_inputs(region, subregion, input_file) + # Ensure always a region name if region is None and input_file: region = input_file @@ -161,6 +160,27 @@ def run_pgosm_flex(layerset, layerset_path, ram, region, subregion, srid, logger.info('PgOSM Flex complete!') +def validate_region_inputs(region, subregion, input_file): + """Ensures the combination of region, subregion and input_file is valid. + + No return, raises error when invalid. + + Parameters + ----------------------- + region : str + subregion : str + input_file : str + """ + if region is None and input_file is None: + raise ValueError('Either --region or --input-file must be provided') + + if region is None and subregion is not None: + raise ValueError('Cannot use --subregion without --region') + + if region is not None: + if '/' in region and subregion is None: + raise ValueError('Region provided appears to include subregion. The portion after the final "/" in the Geofabrik URL should be the --subregion.') + def set_env_vars(region, subregion, srid, language, pgosm_date, layerset, layerset_path): diff --git a/docs/DOCKER-RUN.md b/docs/DOCKER-RUN.md index 6c3274f..b4918de 100644 --- a/docs/DOCKER-RUN.md +++ b/docs/DOCKER-RUN.md @@ -48,22 +48,54 @@ docker ps -a | grep pgosm ## Run PgOSM-Flex The following `docker exec` command runs PgOSM Flex to load the District of Columbia -region - +region. The command `python3 docker/pgosm_flex.py` runs the full process. The -script uses a region (`north-america/us`) and sub-region (`district-of-columbia`) -that must match values in URLs from the Geofabrik download server. -The 3rd parameter tells the script the server has 8 GB RAM available for osm2pgsql, Postgres, and the OS. The PgOSM-Flex layer set is defined (`default`). +script uses a region (`--region=north-america/us`) and +sub-region (`--subregion=district-of-columbia`). +The region/subregion values must the URL pattern used by the Geofabrik download server, +see the [Regions and Subregions](#regions-and-subregions) section. + +The `--ram=8` parameter defines the total system RAM available and is used by +internal logic to determine the best osm2pgsql options to use. +When running on hardware dedicated to this process it is safe to define the total +system RAM. If the process is on a computer with other responsibilities, such +as your laptop, feel free to lower this value. ```bash docker exec -it \ pgosm python3 docker/pgosm_flex.py \ - --layerset=default \ --ram=8 \ --region=north-america/us \ - --subregion=district-of-columbia \ - &> ~/pgosm-data/pgosm-flex.log + --subregion=district-of-columbia +``` + +For the best in-Docker performance you will need to +[tune the internal Postgres config](#configure-postgres-in-docker) appropriately +for your hardware. +See the [osm2pgsql documentation](https://osm2pgsql.org/doc/manual.html#tuning-the-postgresql-server) for more on tuning Postgres for this +process. + + +## Regions and Subregions + +The `--region` and `--subregion` definitions must match +the Geofabrik URL scheme. This can be a bit confusing +as larger subregions can contain smaller subregions. + +The example above to process the `district-of-columbia` subregion defines +`--region=north-america/us`. You cannot, unfortunately, drop off +the `--subregion` to load the U.S. subregion. Attempting this results +in a `ValueError`. + +To load the U.S. subregion, the `us` portion drops out of `--region` +and moves to `--subregion`. + +```bash +docker exec -it pgosm python3 docker/pgosm_flex.py \ + --ram=8 \ + --region=north-america \ + --subregion=us ```