Skip to content

Commit

Permalink
cli/run pipeline: add option to export the model results as well
Browse files Browse the repository at this point in the history
  • Loading branch information
iona5 committed Nov 4, 2024
1 parent 2274883 commit 69e7438
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 13 deletions.
70 changes: 59 additions & 11 deletions darts-export/src/darts_export/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,52 +17,100 @@ def __init__(self, ds) -> None:
"""Initialize the dataset."""
self.ds: xarray.Dataset = ds

def export_geotiff(self, path: Path, filename: str, layername: str, tags={}):
"""Export a GeoTiff file from the inference result, specifying the layer to export.
Args:
path (Path): the folder path where to write the result as GeoTIFF
filename (str): The filename (basename) of the GeoTIFF to write
layername (str): the name of the layer to write
tags (dict, optional): optional GeoTIFF metadata to be written. Defaults to no additional metadata.
Returns:
_type_: _description_
"""
# write the probability layer from the raster to a GeoTiff
file_path = path / filename
self.ds[layername].rio.to_raster(file_path, driver="GTiff", tags=tags, compress="LZW")
return file_path

def export_probabilities(self, path: Path, filename="pred_probabilities.tif", tags={}):
"""Export the probabilities layer to a file.
If the inference result is an ensemble result and it contains also results of the models,
also the probabilities of the models will be written as individual files as well.
Args:
path (Path): The path where to export to.
filename (str, optional): the filename. Defaults to "pred_probabilities.tif".
tags (dict, optional): optional GeoTIFF metadate to be written. Defaults to no additional metadata.
tags (dict, optional): optional GeoTIFF metadata to be written. Defaults to no additional metadata.
Returns:
the Path of the written file
"""
# write the probability layer from the raster to a GeoTiff
file_path = path / filename
self.ds.probabilities.rio.to_raster(file_path, driver="GTiff", tags=tags, compress="LZW")
return file_path
# check if the ds as also the model outputs in it
for check_subset in ["tcvis", "notcvis"]:
check_layer_name = "probabilities-" + check_subset
if check_layer_name in self.ds:
fname_p = Path(filename)
fname = fname_p.stem + "-" + check_subset + ".tif"
self.export_geotiff(path, fname, check_layer_name, tags)

return self.export_geotiff(path, filename, "probabilities", tags)

def export_binarized(self, path: Path, filename="pred_binarized.tif", tags={}):
"""Export the binarized segmentation result of the inference Result.
"""Export the binarized segmentation result of the inference result.
If the inference result is an ensemble result and it contains also results of the models,
also the binarized probabilities of the models will be written as individual files as well.
Args:
path (Path): The path where to export to.
filename (str, optional): the filename. Defaults to "pred_binarized.tif".
tags (dict, optional): optional GeoTIFF metadate to be written. Defaults to no additional metadata.
tags (dict, optional): optional GeoTIFF metadata to be written. Defaults to no additional metadata.
Returns:
the Path of the written file
"""
file_path = path / filename
self.ds.binarized_segmentation.rio.to_raster(file_path, driver="GTiff", tags=tags, compress="LZW")
return file_path
# check if the ds as also the model outputs in it
for check_subset in ["tcvis", "notcvis"]:
check_layer_name = "binarized_segmentation-" + check_subset
if check_layer_name in self.ds:
fname_p = Path(filename)
fname = fname_p.stem + "-" + check_subset + ".tif"
self.export_geotiff(path, fname, check_layer_name, tags)

return self.export_geotiff(path, filename, "binarized_segmentation", tags)

def export_polygonized(self, path: Path, filename_prefix="pred_segments", minimum_mapping_unit=32):
"""Export the binarized probabilities as a vector dataset in GeoPackage and GeoParquet format.
If the inference result is an ensemble result and it contains also results of the models,
these datasets will also be polygonized. In that case a parquet file for each result (ensemble + models) as
well as a GeoPackage file containing all polygonization results as individual layers will be written.
Args:
path (Path): The path where to export the files
filename_prefix (str, optional): the file prefix of the exported files. Defaults to "pred_segments".
minimum_mapping_unit (int, optional): segments covering less pixel are removed. Defaults to 32.
"""
polygon_gdf = vectorization.vectorize(self.ds, minimum_mapping_unit=minimum_mapping_unit)
polygon_gdf = vectorization.vectorize(
self.ds, "binarized_segmentation", minimum_mapping_unit=minimum_mapping_unit
)

path_gpkg = path / f"{filename_prefix}.gpkg"
path_parquet = path / f"{filename_prefix}.parquet"

polygon_gdf.to_file(path_gpkg, layer=filename_prefix)
polygon_gdf.to_parquet(path_parquet)

for subset_name in ["tcvis", "notcvis"]:
layer_name = "binarized_segmentation-" + subset_name
if layer_name in self.ds:
polygon_gdf = vectorization.vectorize(self.ds, layer_name, minimum_mapping_unit=minimum_mapping_unit)
polygon_gdf.to_file(path_gpkg, layer=f"{filename_prefix} ({subset_name.upper()})")
polygon_gdf.to_parquet(path / f"{filename_prefix}-{subset_name}.parquet")
13 changes: 13 additions & 0 deletions darts-export/test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,16 @@ def probabilities_2():
nd_prob_cluster[12:15, 84:87] = 110

return _create_dataset(nd_prob_cluster, binarization_threshold=50)


@pytest.fixture
def ensemble_submodel_dataset(probabilities_2):
ensemble_ds = probabilities_2

ensemble_ds["probabilities-tcvis"] = ensemble_ds["probabilities"].copy()
ensemble_ds["probabilities-notcvis"] = ensemble_ds["probabilities"].copy()

ensemble_ds["binarized_segmentation-tcvis"] = ensemble_ds["binarized_segmentation"].copy()
ensemble_ds["binarized_segmentation-notcvis"] = ensemble_ds["binarized_segmentation"].copy()

return ensemble_ds
34 changes: 34 additions & 0 deletions darts-export/test/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,37 @@ def test_writeVectorsComplex(probabilities_2: Dataset, tmp_path: Path):
else:
gdf = gpd.read_file(tmp_path / f"pred_segments.{suffix}")
assert gdf.shape == (4, len(POLYGONOUTPUT_EXPECTED_COLUMNS))


def test_writeProbabilitiesWithSubmodels(ensemble_submodel_dataset, tmp_path):
ds = inference.InferenceResultWriter(ensemble_submodel_dataset)
res = ds.export_probabilities(tmp_path)

assert res.is_file()

assert (tmp_path / (res.stem + "-notcvis.tif")).is_file()
assert (tmp_path / (res.stem + "-tcvis.tif")).is_file()


def test_writeBinarizedWithSubmodels(ensemble_submodel_dataset, tmp_path):
ds = inference.InferenceResultWriter(ensemble_submodel_dataset)
res = ds.export_binarized(tmp_path)

assert res.is_file()

assert (tmp_path / (res.stem + "-notcvis.tif")).is_file()
assert (tmp_path / (res.stem + "-tcvis.tif")).is_file()


def test_writeVectorsWithSubmodels(ensemble_submodel_dataset, tmp_path):
ds = inference.InferenceResultWriter(ensemble_submodel_dataset)
ds.export_polygonized(tmp_path)

assert (tmp_path / "pred_segments.parquet").is_file()
assert (tmp_path / "pred_segments-notcvis.parquet").is_file()
assert (tmp_path / "pred_segments-tcvis.parquet").is_file()
assert (tmp_path / "pred_segments.gpkg").is_file()

t = gpd.list_layers(tmp_path / "pred_segments.gpkg")

assert set(t.name) == {"pred_segments", "pred_segments (TCVIS)", "pred_segments (NOTCVIS)"}
20 changes: 18 additions & 2 deletions darts/src/darts/native.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def run_native_planet_pipeline(
overlap: int = 16,
batch_size: int = 8,
reflection: int = 0,
write_model_outputs: bool = False,
):
"""Search for all PlanetScope scenes in the given directory and runs the segmentation pipeline on them.
Expand All @@ -63,6 +64,8 @@ def run_native_planet_pipeline(
overlap (int, optional): The overlap to use for inference. Defaults to 16.
batch_size (int, optional): The batch size to use for inference. Defaults to 8.
reflection (int, optional): The reflection padding to use for inference. Defaults to 0.
write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result.
Defaults to False.
Todo:
Document the structure of the input data dir.
Expand All @@ -84,7 +87,12 @@ def run_native_planet_pipeline(

ensemble = EnsembleV1(model_dir / tcvis_model_name, model_dir / notcvis_model_name)
tile = ensemble.segment_tile(
tile, patch_size=patch_size, overlap=overlap, batch_size=batch_size, reflection=reflection
tile,
patch_size=patch_size,
overlap=overlap,
batch_size=batch_size,
reflection=reflection,
keep_inputs=write_model_outputs,
)
tile = prepare_export(tile)

Expand All @@ -109,6 +117,7 @@ def run_native_sentinel2_pipeline(
overlap: int = 16,
batch_size: int = 8,
reflection: int = 0,
write_model_outputs: bool = False,
):
"""Search for all PlanetScope scenes in the given directory and runs the segmentation pipeline on them.
Expand All @@ -127,6 +136,8 @@ def run_native_sentinel2_pipeline(
overlap (int, optional): The overlap to use for inference. Defaults to 16.
batch_size (int, optional): The batch size to use for inference. Defaults to 8.
reflection (int, optional): The reflection padding to use for inference. Defaults to 0.
write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result.
Defaults to False.
Todo:
Document the structure of the input data dir.
Expand All @@ -150,7 +161,12 @@ def run_native_sentinel2_pipeline(

ensemble = EnsembleV1(model_dir / tcvis_model_name, model_dir / notcvis_model_name)
tile = ensemble.segment_tile(
tile, patch_size=patch_size, overlap=overlap, batch_size=batch_size, reflection=reflection
tile,
patch_size=patch_size,
overlap=overlap,
batch_size=batch_size,
reflection=reflection,
keep_inputs=write_model_outputs,
)
tile = prepare_export(tile)

Expand Down

0 comments on commit 69e7438

Please sign in to comment.