cli/run pipeline: add option to export the model results as well

awi-response · Nov 4, 2024 · 69e7438 · 69e7438
1 parent 2274883
commit 69e7438
Show file tree

Hide file tree

Showing 4 changed files with 124 additions and 13 deletions.
diff --git a/darts-export/src/darts_export/inference.py b/darts-export/src/darts_export/inference.py
@@ -17,52 +17,100 @@ def __init__(self, ds) -> None:
         """Initialize the dataset."""
         self.ds: xarray.Dataset = ds
 
+    def export_geotiff(self, path: Path, filename: str, layername: str, tags={}):
+        """Export a GeoTiff file from the inference result, specifying the layer to export.
+
+        Args:
+            path (Path): the folder path where to write the result as GeoTIFF
+            filename (str): The filename (basename) of the GeoTIFF to write
+            layername (str): the name of the layer to write
+            tags (dict, optional): optional GeoTIFF metadata to be written. Defaults to no additional metadata.
+
+        Returns:
+            _type_: _description_
+
+        """
+        # write the probability layer from the raster to a GeoTiff
+        file_path = path / filename
+        self.ds[layername].rio.to_raster(file_path, driver="GTiff", tags=tags, compress="LZW")
+        return file_path
+
     def export_probabilities(self, path: Path, filename="pred_probabilities.tif", tags={}):
         """Export the probabilities layer to a file.
 
+        If the inference result is an ensemble result and it contains also results of the models,
+        also the probabilities of the models will be written as individual files as well.
+
         Args:
             path (Path): The path where to export to.
             filename (str, optional): the filename. Defaults to "pred_probabilities.tif".
-            tags (dict, optional): optional GeoTIFF metadate to be written. Defaults to no additional metadata.
+            tags (dict, optional): optional GeoTIFF metadata to be written. Defaults to no additional metadata.
 
         Returns:
             the Path of the written file
 
         """
-        # write the probability layer from the raster to a GeoTiff
-        file_path = path / filename
-        self.ds.probabilities.rio.to_raster(file_path, driver="GTiff", tags=tags, compress="LZW")
-        return file_path
+        # check if the ds as also the model outputs in it
+        for check_subset in ["tcvis", "notcvis"]:
+            check_layer_name = "probabilities-" + check_subset
+            if check_layer_name in self.ds:
+                fname_p = Path(filename)
+                fname = fname_p.stem + "-" + check_subset + ".tif"
+                self.export_geotiff(path, fname, check_layer_name, tags)
+
+        return self.export_geotiff(path, filename, "probabilities", tags)
 
     def export_binarized(self, path: Path, filename="pred_binarized.tif", tags={}):
-        """Export the binarized segmentation result of the inference Result.
+        """Export the binarized segmentation result of the inference result.
+
+        If the inference result is an ensemble result and it contains also results of the models,
+        also the binarized probabilities of the models will be written as individual files as well.
 
         Args:
             path (Path): The path where to export to.
             filename (str, optional): the filename. Defaults to "pred_binarized.tif".
-            tags (dict, optional): optional GeoTIFF metadate to be written. Defaults to no additional metadata.
+            tags (dict, optional): optional GeoTIFF metadata to be written. Defaults to no additional metadata.
 
         Returns:
             the Path of the written file
 
         """
-        file_path = path / filename
-        self.ds.binarized_segmentation.rio.to_raster(file_path, driver="GTiff", tags=tags, compress="LZW")
-        return file_path
+        # check if the ds as also the model outputs in it
+        for check_subset in ["tcvis", "notcvis"]:
+            check_layer_name = "binarized_segmentation-" + check_subset
+            if check_layer_name in self.ds:
+                fname_p = Path(filename)
+                fname = fname_p.stem + "-" + check_subset + ".tif"
+                self.export_geotiff(path, fname, check_layer_name, tags)
+
+        return self.export_geotiff(path, filename, "binarized_segmentation", tags)
 
     def export_polygonized(self, path: Path, filename_prefix="pred_segments", minimum_mapping_unit=32):
         """Export the binarized probabilities as a vector dataset in GeoPackage and GeoParquet format.
 
+        If the inference result is an ensemble result and it contains also results of the models,
+        these datasets will also be polygonized. In that case a parquet file for each result (ensemble + models) as
+        well as a GeoPackage file containing all polygonization results as individual layers will be written.
+
         Args:
             path (Path): The path where to export the files
             filename_prefix (str, optional): the file prefix of the exported files. Defaults to "pred_segments".
             minimum_mapping_unit (int, optional): segments covering less pixel are removed. Defaults to 32.
 
         """
-        polygon_gdf = vectorization.vectorize(self.ds, minimum_mapping_unit=minimum_mapping_unit)
+        polygon_gdf = vectorization.vectorize(
+            self.ds, "binarized_segmentation", minimum_mapping_unit=minimum_mapping_unit
+        )
 
         path_gpkg = path / f"{filename_prefix}.gpkg"
         path_parquet = path / f"{filename_prefix}.parquet"
 
         polygon_gdf.to_file(path_gpkg, layer=filename_prefix)
         polygon_gdf.to_parquet(path_parquet)
+
+        for subset_name in ["tcvis", "notcvis"]:
+            layer_name = "binarized_segmentation-" + subset_name
+            if layer_name in self.ds:
+                polygon_gdf = vectorization.vectorize(self.ds, layer_name, minimum_mapping_unit=minimum_mapping_unit)
+                polygon_gdf.to_file(path_gpkg, layer=f"{filename_prefix} ({subset_name.upper()})")
+                polygon_gdf.to_parquet(path / f"{filename_prefix}-{subset_name}.parquet")
diff --git a/darts-export/test/conftest.py b/darts-export/test/conftest.py
@@ -57,3 +57,16 @@ def probabilities_2():
     nd_prob_cluster[12:15, 84:87] = 110
 
     return _create_dataset(nd_prob_cluster, binarization_threshold=50)
+
+
+@pytest.fixture
+def ensemble_submodel_dataset(probabilities_2):
+    ensemble_ds = probabilities_2
+
+    ensemble_ds["probabilities-tcvis"] = ensemble_ds["probabilities"].copy()
+    ensemble_ds["probabilities-notcvis"] = ensemble_ds["probabilities"].copy()
+
+    ensemble_ds["binarized_segmentation-tcvis"] = ensemble_ds["binarized_segmentation"].copy()
+    ensemble_ds["binarized_segmentation-notcvis"] = ensemble_ds["binarized_segmentation"].copy()
+
+    return ensemble_ds
diff --git a/darts-export/test/test_inference.py b/darts-export/test/test_inference.py
@@ -78,3 +78,37 @@ def test_writeVectorsComplex(probabilities_2: Dataset, tmp_path: Path):
         else:
             gdf = gpd.read_file(tmp_path / f"pred_segments.{suffix}")
         assert gdf.shape == (4, len(POLYGONOUTPUT_EXPECTED_COLUMNS))
+
+
+def test_writeProbabilitiesWithSubmodels(ensemble_submodel_dataset, tmp_path):
+    ds = inference.InferenceResultWriter(ensemble_submodel_dataset)
+    res = ds.export_probabilities(tmp_path)
+
+    assert res.is_file()
+
+    assert (tmp_path / (res.stem + "-notcvis.tif")).is_file()
+    assert (tmp_path / (res.stem + "-tcvis.tif")).is_file()
+
+
+def test_writeBinarizedWithSubmodels(ensemble_submodel_dataset, tmp_path):
+    ds = inference.InferenceResultWriter(ensemble_submodel_dataset)
+    res = ds.export_binarized(tmp_path)
+
+    assert res.is_file()
+
+    assert (tmp_path / (res.stem + "-notcvis.tif")).is_file()
+    assert (tmp_path / (res.stem + "-tcvis.tif")).is_file()
+
+
+def test_writeVectorsWithSubmodels(ensemble_submodel_dataset, tmp_path):
+    ds = inference.InferenceResultWriter(ensemble_submodel_dataset)
+    ds.export_polygonized(tmp_path)
+
+    assert (tmp_path / "pred_segments.parquet").is_file()
+    assert (tmp_path / "pred_segments-notcvis.parquet").is_file()
+    assert (tmp_path / "pred_segments-tcvis.parquet").is_file()
+    assert (tmp_path / "pred_segments.gpkg").is_file()
+
+    t = gpd.list_layers(tmp_path / "pred_segments.gpkg")
+
+    assert set(t.name) == {"pred_segments", "pred_segments (TCVIS)", "pred_segments (NOTCVIS)"}
diff --git a/darts/src/darts/native.py b/darts/src/darts/native.py
@@ -44,6 +44,7 @@ def run_native_planet_pipeline(
     overlap: int = 16,
     batch_size: int = 8,
     reflection: int = 0,
+    write_model_outputs: bool = False,
 ):
     """Search for all PlanetScope scenes in the given directory and runs the segmentation pipeline on them.
 
@@ -63,6 +64,8 @@ def run_native_planet_pipeline(
         overlap (int, optional): The overlap to use for inference. Defaults to 16.
         batch_size (int, optional): The batch size to use for inference. Defaults to 8.
         reflection (int, optional): The reflection padding to use for inference. Defaults to 0.
+        write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result.
+            Defaults to False.
 
     Todo:
         Document the structure of the input data dir.
@@ -84,7 +87,12 @@ def run_native_planet_pipeline(
 
         ensemble = EnsembleV1(model_dir / tcvis_model_name, model_dir / notcvis_model_name)
         tile = ensemble.segment_tile(
-            tile, patch_size=patch_size, overlap=overlap, batch_size=batch_size, reflection=reflection
+            tile,
+            patch_size=patch_size,
+            overlap=overlap,
+            batch_size=batch_size,
+            reflection=reflection,
+            keep_inputs=write_model_outputs,
         )
         tile = prepare_export(tile)
 
@@ -109,6 +117,7 @@ def run_native_sentinel2_pipeline(
     overlap: int = 16,
     batch_size: int = 8,
     reflection: int = 0,
+    write_model_outputs: bool = False,
 ):
     """Search for all PlanetScope scenes in the given directory and runs the segmentation pipeline on them.
 
@@ -127,6 +136,8 @@ def run_native_sentinel2_pipeline(
         overlap (int, optional): The overlap to use for inference. Defaults to 16.
         batch_size (int, optional): The batch size to use for inference. Defaults to 8.
         reflection (int, optional): The reflection padding to use for inference. Defaults to 0.
+        write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result.
+            Defaults to False.
 
     Todo:
         Document the structure of the input data dir.
@@ -150,7 +161,12 @@ def run_native_sentinel2_pipeline(
 
         ensemble = EnsembleV1(model_dir / tcvis_model_name, model_dir / notcvis_model_name)
         tile = ensemble.segment_tile(
-            tile, patch_size=patch_size, overlap=overlap, batch_size=batch_size, reflection=reflection
+            tile,
+            patch_size=patch_size,
+            overlap=overlap,
+            batch_size=batch_size,
+            reflection=reflection,
+            keep_inputs=write_model_outputs,
         )
         tile = prepare_export(tile)