From 7ebe2b7674e4c80d621021ffcddc27039f65e615 Mon Sep 17 00:00:00 2001 From: sujan Date: Mon, 29 Jul 2024 16:41:31 +0545 Subject: [PATCH 1/7] feat: updated generate_project_files to bulk upload entities --- src/backend/app/db/postgis_utils.py | 11 ++-- src/backend/app/projects/project_crud.py | 59 +++++++++------------ src/backend/app/projects/project_schemas.py | 37 ++++++++++++- 3 files changed, 67 insertions(+), 40 deletions(-) diff --git a/src/backend/app/db/postgis_utils.py b/src/backend/app/db/postgis_utils.py index a059c60e98..29b9bc2b20 100644 --- a/src/backend/app/db/postgis_utils.py +++ b/src/backend/app/db/postgis_utils.py @@ -696,6 +696,11 @@ async def feature_geojson_to_entity_dict( feature_id = feature.get("id") geometry = feature.get("geometry", {}) + if not geometry: + msg = "'geometry' data field is mandatory" + log.debug(msg) + raise ValueError(msg) + javarosa_geom = await geojson_to_javarosa_geom(geometry) # NOTE all properties MUST be string values for Entities, convert @@ -708,7 +713,7 @@ async def feature_geojson_to_entity_dict( task_id = properties.get("task_id") entity_label = f"Task {task_id} Feature {feature_id}" - return {entity_label: {"geometry": javarosa_geom, **properties}} + return {"label": entity_label, "data":{"geometry": javarosa_geom, **properties}} async def task_geojson_dict_to_entity_values(task_geojson_dict): @@ -720,9 +725,7 @@ async def task_geojson_dict_to_entity_values(task_geojson_dict): [feature_geojson_to_entity_dict(feature) for feature in features if feature] ) - entity_values = await gather(*asyncio_tasks) - # Merge all dicts into a single dict - return {k: v for result in entity_values for k, v in result.items()} + return await gather(*asyncio_tasks) def multipolygon_to_polygon( diff --git a/src/backend/app/projects/project_crud.py b/src/backend/app/projects/project_crud.py index c4059c73c3..5f3d5065c7 100644 --- a/src/backend/app/projects/project_crud.py +++ b/src/backend/app/projects/project_crud.py @@ -828,27 +828,12 @@ async def generate_odk_central_project_content( xlsform: BytesIO, form_category: str, form_file_ext: str, - task_count: int, + task_extract_dict: dict, db: Session, ) -> str: """Populate the project in ODK Central with XForm, Appuser, Permissions.""" project_odk_id = project.odkid - # NOTE Entity Registration form: this may be removed with future Central - # API changes to allow Entity creation - with open(entities_registration, "rb") as f: - registration_xlsform = BytesIO(f.read()) - registration_xform = await central_crud.read_and_test_xform( - registration_xlsform, "xls", return_form_data=True - ) - # Upload entity registration XForm - log.info("Uploading Entity registration XForm to ODK Central") - central_crud.create_odk_xform( - project_odk_id, - registration_xform, - odk_credentials, - ) - # NOTE Survey form xform = await central_crud.read_and_test_xform( xlsform, form_file_ext, return_form_data=True @@ -857,7 +842,7 @@ async def generate_odk_central_project_content( updated_xform = await central_crud.modify_xform_xml( xform, form_category, - task_count, + len(task_extract_dict.keys()), ) # Upload survey XForm log.info("Uploading survey XForm to ODK Central") @@ -867,6 +852,26 @@ async def generate_odk_central_project_content( odk_credentials, ) + entities_list = await task_geojson_dict_to_entity_values(task_extract_dict) + fields_dict_list = project_schemas.fields_to_dict() + + async with central_deps.get_odk_entity(odk_credentials) as odk_central: + await odk_central.createDataset(project_odk_id, project.project_name_prefix) + await odk_central.createProperties( + project_odk_id, + "features", + fields_dict_list + ) + entities = await odk_central.createEntities( + project_odk_id, + "features", + entities_list, + ) + if entities["success"]==True: + log.debug(f"Wrote {len(entities_list)} entities for project ({project.id})") + else: + log.debug(f"No entities uploaded for project ({project.id})") + sql = text( """ INSERT INTO xforms ( @@ -906,7 +911,7 @@ async def generate_project_files( background_task_id (uuid): the task_id of the background task. """ try: - project = await get_project_by_id(db, project_id) + project = await project_deps.get_project_by_id(db, project_id) form_category = project.xform_category log.info(f"Starting generate_project_files for project {project_id}") odk_credentials = await project_deps.get_odk_credentials(db, project_id) @@ -948,7 +953,7 @@ async def generate_project_files( xlsform, form_category, form_file_ext, - len(task_extract_dict.keys()), + task_extract_dict, db, ) log.debug( @@ -969,22 +974,6 @@ async def generate_project_files( # Commit all updated database records db.commit() - # Map geojson to entities dict - entities_data_dict = await task_geojson_dict_to_entity_values(task_extract_dict) - # Create entities - # TODO after Entity creation is a single API call, - # TODO move to generate_odk_central_project_content - async with central_deps.get_odk_entity(odk_credentials) as odk_central: - entities = await odk_central.createEntities( - project_odk_id, - "features", - entities_data_dict, - ) - if entities: - log.debug(f"Wrote {len(entities)} entities for project ({project_id})") - else: - log.debug(f"No entities uploaded for project ({project_id})") - if background_task_id: # Update background task status to COMPLETED await update_background_task_status_in_database( diff --git a/src/backend/app/projects/project_schemas.py b/src/backend/app/projects/project_schemas.py index 670e5574fb..885bd7e37c 100644 --- a/src/backend/app/projects/project_schemas.py +++ b/src/backend/app/projects/project_schemas.py @@ -19,7 +19,8 @@ import uuid from datetime import datetime -from typing import Any, List, Optional, Union +from dataclasses import dataclass +from typing import Any, List, Optional, Union, Dict from dateutil import parser from geojson_pydantic import Feature, FeatureCollection, MultiPolygon, Polygon @@ -436,3 +437,37 @@ def get_last_active(self, value, values): return f'{days_difference} day{"s" if days_difference > 1 else ""} ago' else: return last_active.strftime("%d %b %Y") + +@dataclass +class Field: + """ + A data class representing a field with a name and type. + + Args: + name (str): The name of the field. + type (str): The type of the field. + + Returns: + None + """ + name: str + type: str + +def fields_to_dict() -> List[Dict[str, str]]: + """ + Converts a list of Field objects to a list of dictionaries. + + Returns: + List[Dict[str, str]]: A list of dictionaries representing the fields. + """ + fields: List[Field] = [ + Field(name="geometry", type="geopoint"), + Field(name="project_id", type="string"), + Field(name="task_id", type="string"), + Field(name="osm_id", type="string"), + Field(name="tags", type="string"), + Field(name="version", type="string"), + Field(name="changeset", type="string"), + Field(name="timestamp", type="datetime"), +] + return [field.__dict__ for field in fields] \ No newline at end of file From cc3ed8742ab2275215fd79c13e0470c29122628e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 29 Jul 2024 11:12:08 +0000 Subject: [PATCH 2/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/backend/app/db/postgis_utils.py | 4 +-- src/backend/app/projects/project_crud.py | 10 ++----- src/backend/app/projects/project_schemas.py | 33 +++++++++++---------- 3 files changed, 22 insertions(+), 25 deletions(-) diff --git a/src/backend/app/db/postgis_utils.py b/src/backend/app/db/postgis_utils.py index 29b9bc2b20..74e3f2a9fd 100644 --- a/src/backend/app/db/postgis_utils.py +++ b/src/backend/app/db/postgis_utils.py @@ -700,7 +700,7 @@ async def feature_geojson_to_entity_dict( msg = "'geometry' data field is mandatory" log.debug(msg) raise ValueError(msg) - + javarosa_geom = await geojson_to_javarosa_geom(geometry) # NOTE all properties MUST be string values for Entities, convert @@ -713,7 +713,7 @@ async def feature_geojson_to_entity_dict( task_id = properties.get("task_id") entity_label = f"Task {task_id} Feature {feature_id}" - return {"label": entity_label, "data":{"geometry": javarosa_geom, **properties}} + return {"label": entity_label, "data": {"geometry": javarosa_geom, **properties}} async def task_geojson_dict_to_entity_values(task_geojson_dict): diff --git a/src/backend/app/projects/project_crud.py b/src/backend/app/projects/project_crud.py index 5f3d5065c7..2a57fac48d 100644 --- a/src/backend/app/projects/project_crud.py +++ b/src/backend/app/projects/project_crud.py @@ -35,7 +35,7 @@ from geojson.feature import Feature, FeatureCollection from loguru import logger as log from osm_fieldwork.basemapper import create_basemap_file -from osm_fieldwork.xlsforms import entities_registration, xlsforms_path +from osm_fieldwork.xlsforms import xlsforms_path from osm_rawdata.postgres import PostgresClient from shapely.geometry import shape from sqlalchemy import and_, column, func, select, table, text @@ -857,17 +857,13 @@ async def generate_odk_central_project_content( async with central_deps.get_odk_entity(odk_credentials) as odk_central: await odk_central.createDataset(project_odk_id, project.project_name_prefix) - await odk_central.createProperties( - project_odk_id, - "features", - fields_dict_list - ) + await odk_central.createProperties(project_odk_id, "features", fields_dict_list) entities = await odk_central.createEntities( project_odk_id, "features", entities_list, ) - if entities["success"]==True: + if entities["success"] == True: log.debug(f"Wrote {len(entities_list)} entities for project ({project.id})") else: log.debug(f"No entities uploaded for project ({project.id})") diff --git a/src/backend/app/projects/project_schemas.py b/src/backend/app/projects/project_schemas.py index 885bd7e37c..39d77dce38 100644 --- a/src/backend/app/projects/project_schemas.py +++ b/src/backend/app/projects/project_schemas.py @@ -18,9 +18,9 @@ """Pydantic schemas for Projects.""" import uuid -from datetime import datetime from dataclasses import dataclass -from typing import Any, List, Optional, Union, Dict +from datetime import datetime +from typing import Any, Dict, List, Optional, Union from dateutil import parser from geojson_pydantic import Feature, FeatureCollection, MultiPolygon, Polygon @@ -438,10 +438,10 @@ def get_last_active(self, value, values): else: return last_active.strftime("%d %b %Y") + @dataclass class Field: - """ - A data class representing a field with a name and type. + """A data class representing a field with a name and type. Args: name (str): The name of the field. @@ -450,24 +450,25 @@ class Field: Returns: None """ + name: str type: str + def fields_to_dict() -> List[Dict[str, str]]: - """ - Converts a list of Field objects to a list of dictionaries. + """Converts a list of Field objects to a list of dictionaries. Returns: List[Dict[str, str]]: A list of dictionaries representing the fields. """ fields: List[Field] = [ - Field(name="geometry", type="geopoint"), - Field(name="project_id", type="string"), - Field(name="task_id", type="string"), - Field(name="osm_id", type="string"), - Field(name="tags", type="string"), - Field(name="version", type="string"), - Field(name="changeset", type="string"), - Field(name="timestamp", type="datetime"), -] - return [field.__dict__ for field in fields] \ No newline at end of file + Field(name="geometry", type="geopoint"), + Field(name="project_id", type="string"), + Field(name="task_id", type="string"), + Field(name="osm_id", type="string"), + Field(name="tags", type="string"), + Field(name="version", type="string"), + Field(name="changeset", type="string"), + Field(name="timestamp", type="datetime"), + ] + return [field.__dict__ for field in fields] From 7cdf05a6533a6ee41d726d1ca53645f535d22222 Mon Sep 17 00:00:00 2001 From: sujan Date: Mon, 29 Jul 2024 17:00:10 +0545 Subject: [PATCH 3/7] fix: pre-commit issue --- src/backend/app/projects/project_crud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/app/projects/project_crud.py b/src/backend/app/projects/project_crud.py index 5f3d5065c7..2f621f33db 100644 --- a/src/backend/app/projects/project_crud.py +++ b/src/backend/app/projects/project_crud.py @@ -867,7 +867,7 @@ async def generate_odk_central_project_content( "features", entities_list, ) - if entities["success"]==True: + if entities["success"]: log.debug(f"Wrote {len(entities_list)} entities for project ({project.id})") else: log.debug(f"No entities uploaded for project ({project.id})") From cc9be244b784e1bb702791c51eb7e4ca8ef6d4d1 Mon Sep 17 00:00:00 2001 From: sujan Date: Mon, 29 Jul 2024 17:20:59 +0545 Subject: [PATCH 4/7] refactor: changed ODKEntity -> ODKDataset and get_odk_entity -> get_odk_dataset --- src/backend/app/central/central_crud.py | 8 ++++---- src/backend/app/central/central_deps.py | 8 ++++---- src/backend/app/helpers/helper_routes.py | 2 +- src/backend/app/projects/project_crud.py | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/backend/app/central/central_crud.py b/src/backend/app/central/central_crud.py index d7bff0b254..8d92491222 100644 --- a/src/backend/app/central/central_crud.py +++ b/src/backend/app/central/central_crud.py @@ -731,7 +731,7 @@ async def get_entities_geojson( Returns: dict: Entity data in OData JSON format. """ - async with central_deps.get_odk_entity(odk_creds) as odk_central: + async with central_deps.get_odk_dataset(odk_creds) as odk_central: entities = await odk_central.getEntityData( odk_id, dataset_name, @@ -781,7 +781,7 @@ async def get_entities_data( list: JSON list containing Entity info. If updated_at is included, the format is string 2022-01-31T23:59:59.999Z. """ - async with central_deps.get_odk_entity(odk_creds) as odk_central: + async with central_deps.get_odk_dataset(odk_creds) as odk_central: entities = await odk_central.getEntityData( odk_id, dataset_name, @@ -847,7 +847,7 @@ async def get_entity_mapping_status( dict: JSON containing Entity: id, status, updated_at. updated_at is in string format 2022-01-31T23:59:59.999Z. """ - async with central_deps.get_odk_entity(odk_creds) as odk_central: + async with central_deps.get_odk_dataset(odk_creds) as odk_central: entity = await odk_central.getEntity( odk_id, dataset_name, @@ -879,7 +879,7 @@ async def update_entity_mapping_status( Returns: dict: All Entity data in OData JSON format. """ - async with central_deps.get_odk_entity(odk_creds) as odk_central: + async with central_deps.get_odk_dataset(odk_creds) as odk_central: entity = await odk_central.updateEntity( odk_id, dataset_name, diff --git a/src/backend/app/central/central_deps.py b/src/backend/app/central/central_deps.py index 8a15f7fea3..f1b2905e0e 100644 --- a/src/backend/app/central/central_deps.py +++ b/src/backend/app/central/central_deps.py @@ -21,17 +21,17 @@ from contextlib import asynccontextmanager from fastapi.exceptions import HTTPException -from osm_fieldwork.OdkCentralAsync import OdkEntity +from osm_fieldwork.OdkCentralAsync import OdkDataset from app.models.enums import HTTPStatus from app.projects.project_schemas import ODKCentralDecrypted @asynccontextmanager -async def get_odk_entity(odk_creds: ODKCentralDecrypted): - """Wrap getting an OdkEntity object with ConnectionError handling.""" +async def get_odk_dataset(odk_creds: ODKCentralDecrypted): + """Wrap getting an OdkDataset object with ConnectionError handling.""" try: - async with OdkEntity( + async with OdkDataset( url=odk_creds.odk_central_url, user=odk_creds.odk_central_user, passwd=odk_creds.odk_central_password, diff --git a/src/backend/app/helpers/helper_routes.py b/src/backend/app/helpers/helper_routes.py index 421e4b42cf..5cc3f44f34 100644 --- a/src/backend/app/helpers/helper_routes.py +++ b/src/backend/app/helpers/helper_routes.py @@ -192,7 +192,7 @@ def parse_csv(csv_bytes): parsed_data = parse_csv(await csv_file.read()) entities_data_dict = {str(uuid4()): data for data in parsed_data} - async with central_deps.get_odk_entity(odk_creds) as odk_central: + async with central_deps.get_odk_dataset(odk_creds) as odk_central: entities = await odk_central.createEntities( odk_project_id, entity_name, diff --git a/src/backend/app/projects/project_crud.py b/src/backend/app/projects/project_crud.py index 3c43ed8178..d105dc2104 100644 --- a/src/backend/app/projects/project_crud.py +++ b/src/backend/app/projects/project_crud.py @@ -855,7 +855,7 @@ async def generate_odk_central_project_content( entities_list = await task_geojson_dict_to_entity_values(task_extract_dict) fields_dict_list = project_schemas.fields_to_dict() - async with central_deps.get_odk_entity(odk_credentials) as odk_central: + async with central_deps.get_odk_dataset(odk_credentials) as odk_central: await odk_central.createDataset(project_odk_id, project.project_name_prefix) await odk_central.createProperties(project_odk_id, "features", fields_dict_list) entities = await odk_central.createEntities( From 0ba5e226ae2812751ee7cb4b652f2c8bed11064e Mon Sep 17 00:00:00 2001 From: spwoodcock Date: Mon, 29 Jul 2024 23:20:16 +0100 Subject: [PATCH 5/7] build: update osm-fieldwork --> v0.14.0 --- src/backend/pdm.lock | 8 ++++---- src/backend/pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/backend/pdm.lock b/src/backend/pdm.lock index b20520bf6f..cb0727a062 100644 --- a/src/backend/pdm.lock +++ b/src/backend/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "debug", "dev", "docs", "test", "monitoring"] strategy = ["cross_platform"] lock_version = "4.4.1" -content_hash = "sha256:eb7decd06266cdc5d8441f9e947f54d0e7f9b99affccb18234ffaf6c500de681" +content_hash = "sha256:318c50c56d3a2c1d9a7dfe3f527c2efd7c8c98d599fb674bb803349f6b4e9ef2" [[package]] name = "aiohttp" @@ -1579,7 +1579,7 @@ files = [ [[package]] name = "osm-fieldwork" -version = "0.13.0" +version = "0.14.0" requires_python = ">=3.10" summary = "Processing field data from ODK to OpenStreetMap format." dependencies = [ @@ -1603,8 +1603,8 @@ dependencies = [ "xmltodict>=0.13.0", ] files = [ - {file = "osm-fieldwork-0.13.0.tar.gz", hash = "sha256:69b07a47619394171277dba5b39f6044954310b07ea84f0a83a067242d2e5bbd"}, - {file = "osm_fieldwork-0.13.0-py3-none-any.whl", hash = "sha256:1217f940e2647410a544c5d032efed0598cb47006d8864305b21a5e00818fe90"}, + {file = "osm-fieldwork-0.14.0.tar.gz", hash = "sha256:503172335f11d3e8aaf31ef3dd95b702a91d7e367ea90b57012d46791c7a390e"}, + {file = "osm_fieldwork-0.14.0-py3-none-any.whl", hash = "sha256:98bf2001a651b3744a70f6c84326947df4b2c143d9311bd5bd7a69360d59315b"}, ] [[package]] diff --git a/src/backend/pyproject.toml b/src/backend/pyproject.toml index a3ed32589b..6bbd5215a2 100644 --- a/src/backend/pyproject.toml +++ b/src/backend/pyproject.toml @@ -47,7 +47,7 @@ dependencies = [ "pyjwt>=2.8.0", "async-lru>=2.0.4", "osm-login-python==1.0.3", - "osm-fieldwork==0.13.0", + "osm-fieldwork==0.14.0", "osm-rawdata==0.3.0", "fmtm-splitter==1.3.0", ] From 65ee323a5ddaa25597fd3c49f55ef61c398d8296 Mon Sep 17 00:00:00 2001 From: spwoodcock Date: Mon, 29 Jul 2024 23:21:23 +0100 Subject: [PATCH 6/7] refactor: dataset properties use field names instead of name:field dict --- src/backend/app/projects/project_schemas.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/backend/app/projects/project_schemas.py b/src/backend/app/projects/project_schemas.py index 39d77dce38..e69dc2b605 100644 --- a/src/backend/app/projects/project_schemas.py +++ b/src/backend/app/projects/project_schemas.py @@ -20,7 +20,7 @@ import uuid from dataclasses import dataclass from datetime import datetime -from typing import Any, Dict, List, Optional, Union +from typing import Any, List, Optional, Union from dateutil import parser from geojson_pydantic import Feature, FeatureCollection, MultiPolygon, Polygon @@ -455,11 +455,11 @@ class Field: type: str -def fields_to_dict() -> List[Dict[str, str]]: - """Converts a list of Field objects to a list of dictionaries. +def entity_fields_to_list() -> List[str]: + """Converts a list of Field objects to a list of field names. Returns: - List[Dict[str, str]]: A list of dictionaries representing the fields. + List[str]: A list of fields. """ fields: List[Field] = [ Field(name="geometry", type="geopoint"), @@ -470,5 +470,6 @@ def fields_to_dict() -> List[Dict[str, str]]: Field(name="version", type="string"), Field(name="changeset", type="string"), Field(name="timestamp", type="datetime"), + Field(name="status", type="string"), ] - return [field.__dict__ for field in fields] + return [field.name for field in fields] From fcc7cb29897008d4e192b82b349555058c69d8f3 Mon Sep 17 00:00:00 2001 From: spwoodcock Date: Mon, 29 Jul 2024 23:21:54 +0100 Subject: [PATCH 7/7] refactor: create entities prior to xlsform in project creation --- src/backend/app/projects/project_crud.py | 45 +++++++++++------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/src/backend/app/projects/project_crud.py b/src/backend/app/projects/project_crud.py index d105dc2104..a7ece25f8b 100644 --- a/src/backend/app/projects/project_crud.py +++ b/src/backend/app/projects/project_crud.py @@ -834,7 +834,20 @@ async def generate_odk_central_project_content( """Populate the project in ODK Central with XForm, Appuser, Permissions.""" project_odk_id = project.odkid - # NOTE Survey form + # The ODK Dataset (Entity List) must exist prior to main XLSForm + entities_list = await task_geojson_dict_to_entity_values(task_extract_dict) + fields_list = project_schemas.entity_fields_to_list() + + async with central_deps.get_odk_dataset(odk_credentials) as odk_central: + await odk_central.createDataset( + project_odk_id, datasetName="features", properties=fields_list + ) + await odk_central.createEntities( + project_odk_id, + "features", + entities_list, + ) + xform = await central_crud.read_and_test_xform( xlsform, form_file_ext, return_form_data=True ) @@ -852,31 +865,15 @@ async def generate_odk_central_project_content( odk_credentials, ) - entities_list = await task_geojson_dict_to_entity_values(task_extract_dict) - fields_dict_list = project_schemas.fields_to_dict() - - async with central_deps.get_odk_dataset(odk_credentials) as odk_central: - await odk_central.createDataset(project_odk_id, project.project_name_prefix) - await odk_central.createProperties(project_odk_id, "features", fields_dict_list) - entities = await odk_central.createEntities( - project_odk_id, - "features", - entities_list, - ) - if entities["success"]: - log.debug(f"Wrote {len(entities_list)} entities for project ({project.id})") - else: - log.debug(f"No entities uploaded for project ({project.id})") - sql = text( """ - INSERT INTO xforms ( - project_id, odk_form_id, category - ) - VALUES ( - :project_id, :xform_id, :category - ) - """ + INSERT INTO xforms ( + project_id, odk_form_id, category + ) + VALUES ( + :project_id, :xform_id, :category + ) + """ ) db.execute( sql,