From 2ffba84dd31188017d0fe6be6f82784d9f71785f Mon Sep 17 00:00:00 2001 From: Emile Sonneveld Date: Wed, 22 Mar 2023 12:39:31 +0100 Subject: [PATCH 1/3] Find amount of input features from regions data instead of the highest feature ID found in the CSV --- openeo_driver/save_result.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/openeo_driver/save_result.py b/openeo_driver/save_result.py index 82823718..16060b7e 100644 --- a/openeo_driver/save_result.py +++ b/openeo_driver/save_result.py @@ -15,6 +15,7 @@ import typing from flask import send_from_directory, jsonify, Response from shapely.geometry import GeometryCollection, mapping +from shapely.geometry.base import BaseGeometry import xarray from openeo.metadata import CollectionMetadata @@ -489,7 +490,7 @@ class AggregatePolygonResultCSV(AggregatePolygonResult): # TODO #71 #114 EP-3981 port this to proper vector cube support # TODO: this is a openeo-geopyspark-driver related/specific implementation, move it over there? - def __init__(self, csv_dir, regions: GeometryCollection, metadata: CollectionMetadata = None): + def __init__(self, csv_dir, regions: Union[GeometryCollection, DriverVectorCube, DelayedVector, BaseGeometry], metadata: CollectionMetadata = None): super().__init__(timeseries=None, regions=regions, metadata=metadata) self._csv_dir = csv_dir self.raster_bands = None @@ -503,9 +504,24 @@ def get_data(self): message = f"aggregate_spatial did not generate any output, intermediate output path on the server: {self._csv_dir}") df = pd.concat(map(pd.read_csv, paths)) features = df.feature_index.unique() - features.sort() if str(features.dtype) == 'int64': - features = np.arange(0, features.max() + 1) + if isinstance(self._regions, DriverVectorCube): + amount_of_regions = len(self._regions.get_geometries()) + elif isinstance(self._regions, DelayedVector): + geometries = list(self._regions.geometries) + amount_of_regions = len(geometries) + elif isinstance(self._regions, GeometryCollection): + amount_of_regions = len(self._regions) + elif isinstance(self._regions, BaseGeometry): + amount_of_regions = 1 # layercatalog.py:1026 implies that this is a single polygon + else: + _log.warning("Using polygon with largest index to estimate how many input polygons there where.") + amount_of_regions = features.max() + 1 + features = np.arange(0, amount_of_regions) + else: + features.sort() + print("interesting str(features.dtype): " + str(features.dtype)) + logging.warning("interesting str(features.dtype): " + str(features.dtype)) def _flatten_df(df): df.index = df.feature_index From dfa4be8c0a538d90e887ecd1dce3fad9d471aaad Mon Sep 17 00:00:00 2001 From: Emile Sonneveld Date: Wed, 22 Mar 2023 14:58:58 +0100 Subject: [PATCH 2/3] Remove non-relevant case. If there are zero results, no list is returned. If there is 1 result, it is also the last result. --- openeo_driver/save_result.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/openeo_driver/save_result.py b/openeo_driver/save_result.py index 16060b7e..d392faee 100644 --- a/openeo_driver/save_result.py +++ b/openeo_driver/save_result.py @@ -505,6 +505,7 @@ def get_data(self): df = pd.concat(map(pd.read_csv, paths)) features = df.feature_index.unique() if str(features.dtype) == 'int64': + # TODO: This logic might get cleaned up when one kind ove vector cube is used everywhere if isinstance(self._regions, DriverVectorCube): amount_of_regions = len(self._regions.get_geometries()) elif isinstance(self._regions, DelayedVector): @@ -512,8 +513,6 @@ def get_data(self): amount_of_regions = len(geometries) elif isinstance(self._regions, GeometryCollection): amount_of_regions = len(self._regions) - elif isinstance(self._regions, BaseGeometry): - amount_of_regions = 1 # layercatalog.py:1026 implies that this is a single polygon else: _log.warning("Using polygon with largest index to estimate how many input polygons there where.") amount_of_regions = features.max() + 1 From 2e27d68855877fabaef83a3982214f1d38fff77d Mon Sep 17 00:00:00 2001 From: Emile Sonneveld Date: Wed, 22 Mar 2023 15:42:23 +0100 Subject: [PATCH 3/3] This else clause did not get triggered in auto tests. I could triger it by manyally crafting a CSV that had strings for feature IDs, and the code still worked. --- openeo_driver/save_result.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/openeo_driver/save_result.py b/openeo_driver/save_result.py index d392faee..35834393 100644 --- a/openeo_driver/save_result.py +++ b/openeo_driver/save_result.py @@ -519,8 +519,6 @@ def get_data(self): features = np.arange(0, amount_of_regions) else: features.sort() - print("interesting str(features.dtype): " + str(features.dtype)) - logging.warning("interesting str(features.dtype): " + str(features.dtype)) def _flatten_df(df): df.index = df.feature_index