From 22c20f11969de836cdafcc58b1a2f425eb653ba1 Mon Sep 17 00:00:00 2001 From: Emilio Mariscal Date: Wed, 29 Jan 2025 14:39:26 -0300 Subject: [PATCH 1/6] refactor: Multiple custom visualizations for custom/HDX exports --- API/api_worker.py | 2 +- requirements.txt | 2 +- src/app.py | 9 ++++- src/post_processing/geojson_stats.py | 44 ++------------------- src/post_processing/processor.py | 42 +++++++++++++------- src/post_processing/stats_building_tpl.html | 18 ++++++--- src/post_processing/stats_highway_tpl.html | 18 ++++++--- src/post_processing/stats_railway_tpl.html | 18 ++++++--- src/post_processing/stats_tpl.html | 18 ++++++--- src/post_processing/stats_waterway_tpl.html | 18 ++++++--- 10 files changed, 100 insertions(+), 89 deletions(-) diff --git a/API/api_worker.py b/API/api_worker.py index 096f2f0f..11754919 100644 --- a/API/api_worker.py +++ b/API/api_worker.py @@ -54,7 +54,7 @@ # celery.conf.result_serializer = "json" # celery.conf.accept_content = ["application/json", "application/x-python-serialize"] celery.conf.task_track_started = True -celery.conf.update(result_extended=True) +celery.conf.update(result_extended=True, worker_pool_restarts=True) # celery.conf.task_reject_on_worker_lost = True # celery.conf.task_acks_late = True diff --git a/requirements.txt b/requirements.txt index e1930f50..eb03b4ac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,7 +27,7 @@ humanize==4.9.0 python-slugify==8.0.1 geomet==1.1.0 PyYAML==6.0.1 -geojson-stats==0.2.5 +geojson-stats==0.2.6 transliterate==1.10.2 ## documentation diff --git a/src/app.py b/src/app.py index 1b313d8c..295b5708 100644 --- a/src/app.py +++ b/src/app.py @@ -1299,6 +1299,7 @@ def __init__(self, params, uid=None): self.default_export_base_name = ( self.iso3.upper() if self.iso3 else self.params.dataset.dataset_prefix ) + self.default_export_path = os.path.join( export_path, self.uuid, @@ -1307,6 +1308,7 @@ def __init__(self, params, uid=None): ) if os.path.exists(self.default_export_path): shutil.rmtree(self.default_export_path, ignore_errors=True) + os.makedirs(self.default_export_path) if USE_DUCK_DB_FOR_CUSTOM_EXPORTS is True: @@ -1941,8 +1943,13 @@ def add_resource(self, resource_meta): # Add customviz if available if resource_meta.get("stats_html"): + dataset_customviz = self.dataset.get("customviz") + if not dataset_customviz: + dataset_customviz = [{"name": resource_meta["name"], "url": resource_meta["stats_html"]}] + else: + dataset_customviz.append({"name": resource_meta["name"], "url": resource_meta["stats_html"]}) self.dataset.update( - {"customviz": [{"url": resource_meta["stats_html"]}]} + {"customviz": dataset_customviz} ) def upload_dataset(self, dump_config_to_s3=False): diff --git a/src/post_processing/geojson_stats.py b/src/post_processing/geojson_stats.py index 9ca74d68..9ea65eb3 100644 --- a/src/post_processing/geojson_stats.py +++ b/src/post_processing/geojson_stats.py @@ -1,12 +1,8 @@ from geojson_stats.stats import Stats from geojson_stats.html import Html -CONFIG_AREA = ["building"] -CONFIG_LENGTH = ["highway", "waterway"] - - class GeoJSONStats(Stats): - """Used for collecting stats while processing GeoJSON files line by line""" + """Used for collecting stats while processing GeoJSON files""" def __init__(self, filters, *args, **kwargs): super().__init__(*args, **kwargs) @@ -14,48 +10,14 @@ def __init__(self, filters, *args, **kwargs): self.config.clean = True self.config.properties_prop = "properties.tags" - if filters and filters.tags: - for tag in CONFIG_AREA: - if self.check_filter(filters.tags, tag): - self.config.keys.append(tag) - self.config.value_keys.append(tag) - self.config.area = True - - for tag in CONFIG_LENGTH: - if self.check_filter(filters.tags, tag): - self.config.keys.append(tag) - self.config.value_keys.append(tag) - self.config.length = True - - def check_filter(self, tags, tag): - """ - Check if a tag is present in tag filters - """ - - if tags.all_geometry: - if tags.all_geometry.join_or and tag in tags.all_geometry.join_or: - return True - if tags.all_geometry.join_and and tag in tags.all_geometry.join_and: - return True - if tags.polygon: - if tags.polygon.join_or and tag in tags.polygon.join_or: - return True - if tags.polygon.join_and and tag in tags.polygon.join_and: - return True - if tags.line: - if tags.line.join_or and tag in tags.line.join_or: - return True - if tags.line.join_and and tag in tags.line.join_and: - return True - def raw_data_line_stats(self, json_object: dict): """ Process a GeoJSON line (for getting stats) and return that line """ self.get_object_stats(json_object) - def html(self, tpl): + def html(self, tpl, tpl_params): """ Returns stats Html object, generated from stats data using a template """ - return Html(tpl, self) + return Html(tpl, self, tpl_params) diff --git a/src/post_processing/processor.py b/src/post_processing/processor.py index c7b416f2..98b092b6 100644 --- a/src/post_processing/processor.py +++ b/src/post_processing/processor.py @@ -4,9 +4,26 @@ import os import pathlib +CATEGORIES_CONFIG = { + "roads": { + "tag": "highway", "length": True, "area": False + }, + "buildings": { + "tag": "building", "length": False, "area": True + }, + "waterways": { + "tag": "waterway", "length": True, "area": False + }, + "railways": { + "tag": "railway", "length": True, "area": False + }, + "default": { + "tag": None, "length": False, "area": False + }, +} class PostProcessor: - """Used for posst-process data while processing GeoJSON files line by line""" + """Used for post-process GeoJSON files""" options = {} filters = {} @@ -26,6 +43,10 @@ def post_process_line(self, line: str): fn(line_object) return json.dumps(line_object) + + def get_categories_config(self, category_name): + config = CATEGORIES_CONFIG.get(category_name) + return config if config else CATEGORIES_CONFIG["default"] def custom( self, category_name, export_format_path, export_filename, file_export_path @@ -35,19 +56,10 @@ def custom( """ self.geoJSONStats.config.properties_prop = "properties" - category_tag = "" - if category_name == "roads": - category_tag = "highway" - self.geoJSONStats.config.length = True - elif category_name == "buildings": - category_tag = "building" - self.geoJSONStats.config.area = True - elif category_name == "waterways": - category_tag = "waterway" - self.geoJSONStats.config.length = True - elif category_name == "railways": - category_tag = "railway" - self.geoJSONStats.config.length = True + category_config = self.get_categories_config(category_name) + category_tag = category_config["tag"] + self.geoJSONStats.config.length = category_config["length"] + self.geoJSONStats.config.area = category_config["area"] if self.options["include_stats"]: if category_tag: @@ -102,7 +114,7 @@ def custom( project_root, "{tpl}_tpl.html".format(tpl=tpl), ) - geojson_stats_html = self.geoJSONStats.html(tpl_path).build() + geojson_stats_html = self.geoJSONStats.html(tpl_path, {"title": f"{export_filename}.geojson"}).build() upload_html_path = os.path.join(file_export_path, "stats-summary.html") with open(upload_html_path, "w") as f: f.write(geojson_stats_html) diff --git a/src/post_processing/stats_building_tpl.html b/src/post_processing/stats_building_tpl.html index 97c3f659..e3d0dbd0 100644 --- a/src/post_processing/stats_building_tpl.html +++ b/src/post_processing/stats_building_tpl.html @@ -3,31 +3,31 @@ - HOT Export Stats