diff --git a/Dockerfile b/Dockerfile index 7ec98ea..062a691 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,6 +20,7 @@ RUN apt-get update \ curl unzip \ postgresql-16-pgrouting \ nlohmann-json3-dev \ + osmium-tool \ && rm -rf /var/lib/apt/lists/* RUN wget https://luarocks.org/releases/luarocks-3.9.2.tar.gz \ diff --git a/db/deploy/osm_pgosm_flex.sql b/db/deploy/osm_pgosm_flex.sql index 4c64ff4..029e871 100644 --- a/db/deploy/osm_pgosm_flex.sql +++ b/db/deploy/osm_pgosm_flex.sql @@ -5,7 +5,7 @@ BEGIN; CREATE TABLE IF NOT EXISTS {schema_name}.pgosm_flex ( id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY, imported TIMESTAMPTZ NOT NULL DEFAULT NOW(), - osm_date date NOT NULL, + osm_date TIMESTAMPTZ NOT NULL, region text NOT NULL, layerset TEXT NULL, srid text NOT NULL, @@ -43,10 +43,12 @@ ALTER TABLE {schema_name}.pgosm_flex ALTER TABLE {schema_name}.pgosm_flex DROP COLUMN IF EXISTS project_url; ALTER TABLE {schema_name}.pgosm_flex DROP COLUMN IF EXISTS default_date; +ALTER TABLE {schema_name}.pgosm_flex ALTER COLUMN osm_date TYPE TIMESTAMPTZ; + COMMENT ON TABLE {schema_name}.pgosm_flex IS 'Provides meta information on the PgOSM-Flex project including version and SRID used during the import. One row per import.'; COMMENT ON COLUMN {schema_name}.pgosm_flex.imported IS 'Indicates when the import was ran.'; -COMMENT ON COLUMN {schema_name}.pgosm_flex.osm_date IS 'Indicates the date of the OpenStreetMap data loaded. Recommended to set PGOSM_DATE env var at runtime, otherwise defaults to the date PgOSM-Flex was run.'; +COMMENT ON COLUMN {schema_name}.pgosm_flex.osm_date IS 'Indicates the date of the OpenStreetMap data loaded. Uses timestamp from PBF file metadata when available. If metadata not available this represents --osm-date at runtime, or the date of today in timezone based on computer running import.'; COMMENT ON COLUMN {schema_name}.pgosm_flex.srid IS 'SRID of imported data.'; COMMENT ON COLUMN {schema_name}.pgosm_flex.pgosm_flex_version IS 'Version of PgOSM-Flex used to generate schema.'; COMMENT ON COLUMN {schema_name}.pgosm_flex.osm2pgsql_version IS 'Version of osm2pgsql used to load data.'; diff --git a/docker/db.py b/docker/db.py index 0bf69a7..f0956bf 100644 --- a/docker/db.py +++ b/docker/db.py @@ -626,21 +626,30 @@ def fix_pg_dump_create_public(export_path): def log_import_message(import_id, msg, schema_name): """Logs msg to database in osm.pgosm_flex for import_uuid. + Overwrites `osm_date` IF `pbf_timestamp` is set. + Parameters ------------------------------- import_id : int msg : str schema_name: str """ + try: + pbf_timestamp = os.environ['PBF_TIMESTAMP'] + except KeyError: + pbf_timestamp = os.environ['PGOSM_DATE'] sql_raw = """ UPDATE {schema_name}.pgosm_flex - SET import_status = %(msg)s + SET import_status = %(msg)s , + osm_date = COALESCE( %(pbf_timestamp)s , osm_date) WHERE id = %(import_id)s ; """ sql_raw = sql_raw.format(schema_name=schema_name) with get_db_conn(conn_string=os.environ['PGOSM_CONN']) as conn: - params = {'import_id': import_id, 'msg': msg} + params = {'import_id': import_id, + 'msg': msg, + 'pbf_timestamp': pbf_timestamp} cur = conn.cursor() cur.execute(sql_raw, params=params) diff --git a/docker/geofabrik.py b/docker/geofabrik.py index b7ecf02..957a26e 100644 --- a/docker/geofabrik.py +++ b/docker/geofabrik.py @@ -1,6 +1,7 @@ """This module handles the auto-file handling using Geofabrik's download service. """ import logging +import json import os import shutil import subprocess @@ -67,10 +68,45 @@ def prepare_data(out_path: str) -> str: md5_file_with_date) helpers.verify_checksum(md5_file, out_path) + set_date_from_metadata(pbf_file=pbf_file) return pbf_file +def set_date_from_metadata(pbf_file: str): + """Use `osmium fileinfo` to set a more accurate date to represent when it was + extracted from OpenStreetMap. + + Parameters + --------------------- + pbf_file : str + Full path to the `.osm.pbf` file. + """ + logger = logging.getLogger('pgosm-flex') + osmium_cmd = f'osmium fileinfo {pbf_file} --json' + output = [] + returncode = helpers.run_command_via_subprocess(cmd=osmium_cmd.split(), + cwd=None, + output_lines=output, + print=False) + if returncode != 0: + logger.error(f'osmium fileinfo failed. Output: {output}') + + output_joined = json.loads(''.join(output)) + meta_options = output_joined['header']['option'] + + try: + meta_timestamp = meta_options['timestamp'] + except KeyError: + try: + meta_timestamp = meta_options['osmosis_replication_timestamp'] + except KeyError: + meta_timestamp = None + + logger.info(f'PBF Meta timestamp: {meta_timestamp}') + os.environ['PBF_TIMESTAMP'] = meta_timestamp + + def pbf_download_needed(pbf_file_with_date: str, md5_file_with_date: str, pgosm_date: str) -> bool: """Decides if the PBF/MD5 files need to be downloaded. diff --git a/docker/helpers.py b/docker/helpers.py index a04643c..0400ae7 100644 --- a/docker/helpers.py +++ b/docker/helpers.py @@ -309,7 +309,7 @@ def okay_to_run(self, prior_import: dict) -> bool: # Check git version against latest. # If current version is lower than prior version from latest import, stop. prior_import_version = prior_import['pgosm_flex_version_no_hash'] - git_tag = helpers.get_git_info(tag_only=True) + git_tag = get_git_info(tag_only=True) if git_tag == '-- (version unknown) --': msg = 'Unable to detect PgOSM Flex version from Git.' diff --git a/docker/tests/test_osm2pgsql_recommendation.py b/docker/tests/test_osm2pgsql_recommendation.py index 9f41569..06f6607 100644 --- a/docker/tests/test_osm2pgsql_recommendation.py +++ b/docker/tests/test_osm2pgsql_recommendation.py @@ -3,7 +3,7 @@ import unittest import osm2pgsql_recommendation -from import_mode import ImportMode +from helpers import ImportMode class Osm2pgsqlRecommendationTests(unittest.TestCase):