diff --git a/darts/src/darts/legacy_pipeline/__init__.py b/darts/src/darts/legacy_pipeline/__init__.py index 5f3417f..8a003b8 100644 --- a/darts/src/darts/legacy_pipeline/__init__.py +++ b/darts/src/darts/legacy_pipeline/__init__.py @@ -1,6 +1,6 @@ """Legacy pipeline module.""" -from darts.legacy_pipeline.legacy import run_native_planet_pipeline as run_native_planet_pipeline -from darts.legacy_pipeline.legacy import run_native_sentinel2_pipeline as run_native_sentinel2_pipeline -from darts.legacy_pipeline.legacy_fast import run_native_planet_pipeline_fast as run_native_planet_pipeline_fast -from darts.legacy_pipeline.legacy_fast import run_native_sentinel2_pipeline_fast as run_native_sentinel2_pipeline_fast +from darts.legacy_pipeline.planet import run_native_planet_pipeline as run_native_planet_pipeline +from darts.legacy_pipeline.planet_fast import run_native_planet_pipeline_fast as run_native_planet_pipeline_fast +from darts.legacy_pipeline.s2 import run_native_sentinel2_pipeline as run_native_sentinel2_pipeline +from darts.legacy_pipeline.s2_fast import run_native_sentinel2_pipeline_fast as run_native_sentinel2_pipeline_fast diff --git a/darts/src/darts/legacy_pipeline/_base.py b/darts/src/darts/legacy_pipeline/_base.py new file mode 100644 index 0000000..3079dcb --- /dev/null +++ b/darts/src/darts/legacy_pipeline/_base.py @@ -0,0 +1,188 @@ +import logging +import multiprocessing as mp +from collections import namedtuple +from dataclasses import dataclass +from pathlib import Path +from typing import Literal + +logger = logging.getLogger(__name__) + +AquisitionData = namedtuple("AquisitionData", ["optical", "arcticdem", "tcvis", "data_masks"]) + + +@dataclass +class _BasePipeline: + """Base class for all pipelines. + + This class provides the run method which is the main entry point for all pipelines. + + This class is meant to be subclassed by the specific pipelines. + These specific pipelines must implement the following methods: + + - "_path_generator" which generates the paths to the data (e.g. through Source Mixin) + - "_get_data" which loads the data for a given path + - "_preprocess" which preprocesses the data (e.g. through Processing Mixin) + + It is possible to implement these functions, by subclassing other mixins, e.g. _S2Mixin. + + The main class must be also a dataclass, to fully inherit all parameter of this class (and the mixins). + """ + + output_data_dir: Path + tcvis_dir: Path + model_dir: Path + tcvis_model_name: str + notcvis_model_name: str + device: Literal["cuda", "cpu", "auto"] | int | None + ee_project: str | None + ee_use_highvolume: bool + patch_size: int + overlap: int + batch_size: int + reflection: int + binarization_threshold: float + mask_erosion_size: int + min_object_size: int + use_quality_mask: bool + write_model_outputs: bool + + # These would be the type hints for the methods that need to be implemented + # Leaving them uncommented would result in a NotImplementedError if Mixins are used + # def _path_generator(self) -> Generator[tuple[Path, Path]]: + # raise NotImplementedError + + # def _get_data(self, fpath: Path) -> AquisitionData: + # raise NotImplementedError + + # def _preprocess(self, aqdata: AquisitionData) -> xr.Dataset: + # raise NotImplementedError + + def run(self): + import torch + from darts_ensemble.ensemble_v1 import EnsembleV1 + from darts_export.inference import InferenceResultWriter + from darts_postprocessing import prepare_export + from dask.distributed import Client, LocalCluster + from odc.stac import configure_rio + + from darts.utils.cuda import debug_info, decide_device + from darts.utils.earthengine import init_ee + + debug_info() + self.device = decide_device(self.device) + init_ee(self.ee_project, self.ee_use_highvolume) + + ensemble = EnsembleV1( + self.model_dir / self.tcvis_model_name, + self.model_dir / self.notcvis_model_name, + device=torch.device(self.device), + ) + + # Init Dask stuff with a context manager + with LocalCluster(n_workers=mp.cpu_count() - 1) as cluster, Client(cluster) as client: + logger.info(f"Using Dask client: {client}") + configure_rio(cloud_defaults=True, aws={"aws_unsigned": True}, client=client) + logger.info("Configured Rasterio with Dask") + + # Iterate over all the data (_path_generator) + for fpath, outpath in self._path_generator(): + try: + aqdata = self._get_data(fpath) + tile = self._preprocess(aqdata) + + tile = ensemble.segment_tile( + tile, + patch_size=self.patch_size, + overlap=self.overlap, + batch_size=self.batch_size, + reflection=self.reflection, + keep_inputs=self.write_model_outputs, + ) + tile = prepare_export( + tile, + self.binarization_threshold, + self.mask_erosion_size, + self.min_object_size, + self.use_quality_mask, + self.device, + ) + + outpath.mkdir(parents=True, exist_ok=True) + writer = InferenceResultWriter(tile) + writer.export_probabilities(outpath) + writer.export_binarized(outpath) + writer.export_polygonized(outpath) + except KeyboardInterrupt: + logger.warning("Keyboard interrupt detected.\nExiting...") + break + except Exception as e: + logger.warning(f"Could not process folder '{fpath.resolve()}'.\nSkipping...") + logger.exception(e) + + +# ============================================================================= +# Processing mixins (they provide _preprocess method) +# ============================================================================= +@dataclass +class _VRTMixin: + arcticdem_slope_vrt: Path + arcticdem_elevation_vrt: Path + + def _preprocess(self, aqdata: AquisitionData): + from darts_preprocessing import preprocess_legacy + + return preprocess_legacy(aqdata.optical, aqdata.arcticdem, aqdata.tcvis, aqdata.data_masks) + + +@dataclass +class _FastMixin: + arcticdem_dir: Path + tpi_outer_radius: int + tpi_inner_radius: int + + def _preprocess(self, aqdata: AquisitionData): + from darts_preprocessing import preprocess_legacy_fast + + return preprocess_legacy_fast( + aqdata.optical, + aqdata.arcticdem, + aqdata.tcvis, + aqdata.data_masks, + self.tpi_outer_radius, + self.tpi_inner_radius, + self.device, + ) + + +# ============================================================================= +# Source mixins (they provide _path_generator method) +# ============================================================================= +@dataclass +class _PlanetMixin: + orthotiles_dir: Path + scenes_dir: Path + + def _path_generator(self): + # Find all PlanetScope orthotiles + for fpath in self.orthotiles_dir.glob("*/*/"): + tile_id = fpath.parent.name + scene_id = fpath.name + outpath = self.output_data_dir / tile_id / scene_id + yield fpath, outpath + + # Find all PlanetScope scenes + for fpath in self.scenes_dir.glob("*/"): + scene_id = fpath.name + outpath = self.output_data_dir / scene_id + yield fpath, outpath + + +@dataclass +class _S2Mixin: + sentinel2_dir: Path + + def _path_generator(self): + for fpath in self.sentinel2_dir.glob("*/"): + scene_id = fpath.name + outpath = self.output_data_dir / scene_id + yield fpath, outpath diff --git a/darts/src/darts/legacy_pipeline/legacy.py b/darts/src/darts/legacy_pipeline/legacy.py deleted file mode 100644 index 374cc58..0000000 --- a/darts/src/darts/legacy_pipeline/legacy.py +++ /dev/null @@ -1,333 +0,0 @@ -"""Legacy Pipeline without any other framework.""" - -import logging -from collections.abc import Generator -from pathlib import Path -from typing import Literal - -from darts.legacy_pipeline.shared import AquisitionData, _load_planet, _load_s2, _segment_and_export - -logger = logging.getLogger(__name__) - - -def _process( - data_generator: Generator[tuple[Path, Path, AquisitionData], None, None], - model_dir: Path, - tcvis_model_name: str, - notcvis_model_name: str, - device: Literal["cuda", "cpu", "auto"] | int | None, - ee_project: str | None, - ee_use_highvolume: bool, - patch_size: int, - overlap: int, - batch_size: int, - reflection: int, - binarization_threshold: float, - mask_erosion_size: int, - min_object_size: int, - use_quality_mask: bool, - write_model_outputs: bool, -): - # Import here to avoid long loading times when running other commands - import torch - from darts_ensemble.ensemble_v1 import EnsembleV1 - from darts_preprocessing import preprocess_legacy - - from darts.utils.cuda import debug_info, decide_device - from darts.utils.earthengine import init_ee - - debug_info() - device = decide_device(device) - init_ee(ee_project, ee_use_highvolume) - - ensemble = EnsembleV1( - model_dir / tcvis_model_name, - model_dir / notcvis_model_name, - device=torch.device(device), - ) - - for fpath, outpath, aqdata in data_generator: - try: - tile = preprocess_legacy(aqdata.optical, aqdata.arcticdem, aqdata.tcvis, aqdata.data_masks) - - _segment_and_export( - tile, - ensemble, - outpath, - device, - patch_size, - overlap, - batch_size, - reflection, - binarization_threshold, - mask_erosion_size, - min_object_size, - use_quality_mask, - write_model_outputs, - ) - except Exception as e: - logger.warning(f"Could not process folder '{fpath.resolve()}'.\nSkipping...") - logger.exception(e) - - -def run_native_planet_pipeline( - *, - orthotiles_dir: Path, - scenes_dir: Path, - output_data_dir: Path, - arcticdem_slope_vrt: Path, - arcticdem_elevation_vrt: Path, - tcvis_dir: Path, - model_dir: Path, - tcvis_model_name: str = "RTS_v6_tcvis.pt", - notcvis_model_name: str = "RTS_v6_notcvis.pt", - device: Literal["cuda", "cpu", "auto"] | int | None = None, - ee_project: str | None = None, - ee_use_highvolume: bool = True, - patch_size: int = 1024, - overlap: int = 16, - batch_size: int = 8, - reflection: int = 0, - binarization_threshold: float = 0.5, - mask_erosion_size: int = 10, - min_object_size: int = 32, - use_quality_mask: bool = False, - write_model_outputs: bool = False, -): - """Search for all PlanetScope scenes in the given directory and runs the segmentation pipeline on them. - - Args: - orthotiles_dir (Path): The directory containing the PlanetScope orthotiles. - scenes_dir (Path): The directory containing the PlanetScope scenes. - output_data_dir (Path): The "output" directory. - arcticdem_slope_vrt (Path): The path to the ArcticDEM slope VRT file. - arcticdem_elevation_vrt (Path): The path to the ArcticDEM elevation VRT file. - tcvis_dir (Path): The directory containing the TCVis data. - model_dir (Path): The path to the models to use for segmentation. - tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". - notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". - device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. - If "cuda" take the first device (0), if int take the specified device. - If "auto" try to automatically select a free GPU (<50% memory usage). - Defaults to "cuda" if available, else "cpu". - ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if - project is defined within persistent API credentials obtained via `earthengine authenticate`. - ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). - patch_size (int, optional): The patch size to use for inference. Defaults to 1024. - overlap (int, optional): The overlap to use for inference. Defaults to 16. - batch_size (int, optional): The batch size to use for inference. Defaults to 8. - reflection (int, optional): The reflection padding to use for inference. Defaults to 0. - binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. - mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. - Defaults to 10. - min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. - use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask - to mask the output. - write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. - Defaults to False. - - Examples: - ### PS Orthotile - - Data directory structure: - - ```sh - data/input - ├── ArcticDEM - │ ├── elevation.vrt - │ ├── slope.vrt - │ ├── relative_elevation - │ │ └── 4372514_relative_elevation_100.tif - │ └── slope - │ └── 4372514_slope.tif - └── planet - └── PSOrthoTile - └── 4372514/5790392_4372514_2022-07-16_2459 - ├── 5790392_4372514_2022-07-16_2459_BGRN_Analytic_metadata.xml - ├── 5790392_4372514_2022-07-16_2459_BGRN_DN_udm.tif - ├── 5790392_4372514_2022-07-16_2459_BGRN_SR.tif - ├── 5790392_4372514_2022-07-16_2459_metadata.json - └── 5790392_4372514_2022-07-16_2459_udm2.tif - ``` - - then the config should be - - ``` - ... - orthotiles_dir: data/input/planet/PSOrthoTile - arcticdem_slope_vrt: data/input/ArcticDEM/slope.vrt - arcticdem_elevation_vrt: data/input/ArcticDEM/elevation.vrt - ``` - - ### PS Scene - - Data directory structure: - - ```sh - data/input - ├── ArcticDEM - │ ├── elevation.vrt - │ ├── slope.vrt - │ ├── relative_elevation - │ │ └── 4372514_relative_elevation_100.tif - │ └── slope - │ └── 4372514_slope.tif - └── planet - └── PSScene - └── 20230703_194241_43_2427 - ├── 20230703_194241_43_2427_3B_AnalyticMS_metadata.xml - ├── 20230703_194241_43_2427_3B_AnalyticMS_SR.tif - ├── 20230703_194241_43_2427_3B_udm2.tif - ├── 20230703_194241_43_2427_metadata.json - └── 20230703_194241_43_2427.json - ``` - - then the config should be - - ``` - ... - scenes_dir: data/input/planet/PSScene - arcticdem_slope_vrt: data/input/ArcticDEM/slope.vrt - arcticdem_elevation_vrt: data/input/ArcticDEM/elevation.vrt - ``` - - - """ - data_generator = _load_planet( - orthotiles_dir, - scenes_dir, - output_data_dir, - arcticdem_slope_vrt, - arcticdem_elevation_vrt, - tcvis_dir, - tpi_outer_radius=10, - ) - _process( - data_generator, - model_dir, - tcvis_model_name, - notcvis_model_name, - device, - ee_project, - ee_use_highvolume, - patch_size, - overlap, - batch_size, - reflection, - binarization_threshold, - mask_erosion_size, - min_object_size, - use_quality_mask, - write_model_outputs, - ) - - -def run_native_sentinel2_pipeline( - *, - sentinel2_dir: Path, - output_data_dir: Path, - arcticdem_slope_vrt: Path, - arcticdem_elevation_vrt: Path, - tcvis_dir: Path, - model_dir: Path, - tcvis_model_name: str = "RTS_v6_tcvis_s2native.pt", - notcvis_model_name: str = "RTS_v6_notcvis_s2native.pt", - device: Literal["cuda", "cpu", "auto"] | int | None = None, - ee_project: str | None = None, - ee_use_highvolume: bool = True, - patch_size: int = 1024, - overlap: int = 16, - batch_size: int = 8, - reflection: int = 0, - binarization_threshold: float = 0.5, - mask_erosion_size: int = 10, - min_object_size: int = 32, - use_quality_mask: bool = False, - write_model_outputs: bool = False, -): - """Search for all Sentinel scenes in the given directory and runs the segmentation pipeline on them. - - Args: - sentinel2_dir (Path): The directory containing the Sentinel 2 scenes. - output_data_dir (Path): The "output" directory. - arcticdem_slope_vrt (Path): The path to the ArcticDEM slope VRT file. - arcticdem_elevation_vrt (Path): The path to the ArcticDEM elevation VRT file. - tcvis_dir (Path): The directory containing the TCVis data. - model_dir (Path): The path to the models to use for segmentation. - tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". - notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". - device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. - If "cuda" take the first device (0), if int take the specified device. - If "auto" try to automatically select a free GPU (<50% memory usage). - Defaults to "cuda" if available, else "cpu". - ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if - project is defined within persistent API credentials obtained via `earthengine authenticate`. - ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). - patch_size (int, optional): The patch size to use for inference. Defaults to 1024. - overlap (int, optional): The overlap to use for inference. Defaults to 16. - batch_size (int, optional): The batch size to use for inference. Defaults to 8. - reflection (int, optional): The reflection padding to use for inference. Defaults to 0. - binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. - mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. - Defaults to 10. - min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. - use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask - to mask the output. - write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. - Defaults to False. - - Examples: - Data directory structure: - - ```sh - data/input - ├── ArcticDEM - │ ├── elevation.vrt - │ ├── slope.vrt - │ ├── relative_elevation - │ │ └── 4372514_relative_elevation_100.tif - │ └── slope - │ └── 4372514_slope.tif - └── sentinel2 - └── 20220826T200911_20220826T200905_T17XMJ/ - ├── 20220826T200911_20220826T200905_T17XMJ_SCL_clip.tif - └── 20220826T200911_20220826T200905_T17XMJ_SR_clip.tif - ``` - - then the config should be - - ``` - ... - sentinel2_dir: data/input/sentinel2 - arcticdem_slope_vrt: data/input/ArcticDEM/slope.vrt - arcticdem_elevation_vrt: data/input/ArcticDEM/elevation.vrt - ``` - - - """ - data_generator = _load_s2( - sentinel2_dir, - output_data_dir, - arcticdem_slope_vrt, - arcticdem_elevation_vrt, - tcvis_dir, - tpi_outer_radius=10, - ) - _process( - data_generator, - model_dir, - tcvis_model_name, - notcvis_model_name, - device, - ee_project, - ee_use_highvolume, - patch_size, - overlap, - batch_size, - reflection, - binarization_threshold, - mask_erosion_size, - min_object_size, - use_quality_mask, - write_model_outputs, - ) diff --git a/darts/src/darts/legacy_pipeline/legacy_fast.py b/darts/src/darts/legacy_pipeline/legacy_fast.py deleted file mode 100644 index 98513e9..0000000 --- a/darts/src/darts/legacy_pipeline/legacy_fast.py +++ /dev/null @@ -1,273 +0,0 @@ -"""Legacy Pipeline without any other framework, but a faster and improved version.""" - -import logging -from collections.abc import Generator -from pathlib import Path -from typing import Literal - -from darts.legacy_pipeline.shared import AquisitionData, _load_planet, _load_s2, _segment_and_export - -logger = logging.getLogger(__name__) - - -def _process_fast( - data_generator: Generator[tuple[Path, Path, AquisitionData], None, None], - model_dir: Path, - tcvis_model_name: str, - notcvis_model_name: str, - device: Literal["cuda", "cpu", "auto"] | int | None, - ee_project: str | None, - ee_use_highvolume: bool, - tpi_outer_radius: int, - tpi_inner_radius: int, - patch_size: int, - overlap: int, - batch_size: int, - reflection: int, - binarization_threshold: float, - mask_erosion_size: int, - min_object_size: int, - use_quality_mask: bool, - write_model_outputs: bool, -): - # Import here to avoid long loading times when running other commands - import torch - from darts_ensemble.ensemble_v1 import EnsembleV1 - from darts_preprocessing import preprocess_legacy_fast - from dask.distributed import Client - from odc.stac import configure_rio - - from darts.utils.cuda import debug_info, decide_device - from darts.utils.earthengine import init_ee - - debug_info() - device = decide_device(device) - init_ee(ee_project, ee_use_highvolume) - - client = Client() - logger.info(f"Using Dask client: {client}") - configure_rio(cloud_defaults=True, aws={"aws_unsigned": True}, client=client) - logger.info("Configured Rasterio with Dask") - - ensemble = EnsembleV1( - model_dir / tcvis_model_name, - model_dir / notcvis_model_name, - device=torch.device(device), - ) - - for fpath, outpath, aqdata in data_generator: - try: - tile = preprocess_legacy_fast( - aqdata.optical, - aqdata.arcticdem, - aqdata.tcvis, - aqdata.data_masks, - tpi_outer_radius, - tpi_inner_radius, - device, - ) - - _segment_and_export( - tile, - ensemble, - outpath, - device, - patch_size, - overlap, - batch_size, - reflection, - binarization_threshold, - mask_erosion_size, - min_object_size, - use_quality_mask, - write_model_outputs, - ) - except Exception as e: - logger.warning(f"Could not process folder '{fpath.resolve()}'.\nSkipping...") - logger.exception(e) - - -def run_native_planet_pipeline_fast( - *, - orthotiles_dir: Path, - scenes_dir: Path, - output_data_dir: Path, - arcticdem_dir: Path, - tcvis_dir: Path, - model_dir: Path, - tcvis_model_name: str = "RTS_v6_tcvis.pt", - notcvis_model_name: str = "RTS_v6_notcvis.pt", - device: Literal["cuda", "cpu", "auto"] | int | None = None, - ee_project: str | None = None, - ee_use_highvolume: bool = True, - tpi_outer_radius: int = 100, - tpi_inner_radius: int = 0, - patch_size: int = 1024, - overlap: int = 16, - batch_size: int = 8, - reflection: int = 0, - binarization_threshold: float = 0.5, - mask_erosion_size: int = 10, - min_object_size: int = 32, - use_quality_mask: bool = False, - write_model_outputs: bool = False, -): - """Search for all PlanetScope scenes in the given directory and runs the segmentation pipeline on them. - - Loads the ArcticDEM from a datacube instead of VRT which is a lot faster and does not need manual preprocessing. - - Args: - orthotiles_dir (Path): The directory containing the PlanetScope orthotiles. - scenes_dir (Path): The directory containing the PlanetScope scenes. - output_data_dir (Path): The "output" directory. - arcticdem_dir (Path): The directory containing the ArcticDEM data (the datacube and the extent files). - Will be created and downloaded if it does not exist. - tcvis_dir (Path): The directory containing the TCVis data. - model_dir (Path): The path to the models to use for segmentation. - tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". - notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". - device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. - If "cuda" take the first device (0), if int take the specified device. - If "auto" try to automatically select a free GPU (<50% memory usage). - Defaults to "cuda" if available, else "cpu". - ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if - project is defined within persistent API credentials obtained via `earthengine authenticate`. - ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). - tpi_outer_radius (int, optional): The outer radius of the annulus kernel for the tpi calculation - in m. Defaults 100m. - tpi_inner_radius (int, optional): The inner radius of the annulus kernel for the tpi calculation - in m. Defaults to 0. - patch_size (int, optional): The patch size to use for inference. Defaults to 1024. - overlap (int, optional): The overlap to use for inference. Defaults to 16. - batch_size (int, optional): The batch size to use for inference. Defaults to 8. - reflection (int, optional): The reflection padding to use for inference. Defaults to 0. - binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. - mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. - Defaults to 10. - min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. - use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask - to mask the output. - write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. - Defaults to False. - - """ - data_generator = _load_planet( - orthotiles_dir, - scenes_dir, - output_data_dir, - arcticdem_dir, - tcvis_dir, - tpi_outer_radius, - ) - _process_fast( - data_generator, - model_dir, - tcvis_model_name, - notcvis_model_name, - device, - ee_project, - ee_use_highvolume, - tpi_outer_radius, - tpi_inner_radius, - patch_size, - overlap, - batch_size, - reflection, - binarization_threshold, - mask_erosion_size, - min_object_size, - use_quality_mask, - write_model_outputs, - ) - - -def run_native_sentinel2_pipeline_fast( - *, - sentinel2_dir: Path, - output_data_dir: Path, - arcticdem_dir: Path, - tcvis_dir: Path, - model_dir: Path, - tcvis_model_name: str = "RTS_v6_tcvis_s2native.pt", - notcvis_model_name: str = "RTS_v6_notcvis_s2native.pt", - device: Literal["cuda", "cpu", "auto"] | int | None = None, - ee_project: str | None = None, - ee_use_highvolume: bool = True, - tpi_outer_radius: int = 100, - tpi_inner_radius: int = 0, - patch_size: int = 1024, - overlap: int = 16, - batch_size: int = 8, - reflection: int = 0, - binarization_threshold: float = 0.5, - mask_erosion_size: int = 10, - min_object_size: int = 32, - use_quality_mask: bool = False, - write_model_outputs: bool = False, -): - """Search for all Sentinel 2 scenes in the given directory and runs the segmentation pipeline on them. - - Loads the ArcticDEM from a datacube instead of VRT which is a lot faster and does not need manual preprocessing. - - Args: - sentinel2_dir (Path): The directory containing the Sentinel 2 scenes. - scenes_dir (Path): The directory containing the PlanetScope scenes. - output_data_dir (Path): The "output" directory. - arcticdem_dir (Path): The directory containing the ArcticDEM data (the datacube and the extent files). - Will be created and downloaded if it does not exist. - tcvis_dir (Path): The directory containing the TCVis data. - model_dir (Path): The path to the models to use for segmentation. - tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". - notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". - device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. - If "cuda" take the first device (0), if int take the specified device. - If "auto" try to automatically select a free GPU (<50% memory usage). - Defaults to "cuda" if available, else "cpu". - ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if - project is defined within persistent API credentials obtained via `earthengine authenticate`. - ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). - tpi_outer_radius (int, optional): The outer radius of the annulus kernel for the tpi calculation - in m. Defaults to 100m. - tpi_inner_radius (int, optional): The inner radius of the annulus kernel for the tpi calculation - in m. Defaults to 0. - patch_size (int, optional): The patch size to use for inference. Defaults to 1024. - overlap (int, optional): The overlap to use for inference. Defaults to 16. - batch_size (int, optional): The batch size to use for inference. Defaults to 8. - reflection (int, optional): The reflection padding to use for inference. Defaults to 0. - binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. - mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. - Defaults to 10. - min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. - use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask - to mask the output. - write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. - Defaults to False. - - """ - data_generator = _load_s2( - sentinel2_dir, - output_data_dir, - arcticdem_dir, - tcvis_dir, - tpi_outer_radius, - ) - _process_fast( - data_generator, - model_dir, - tcvis_model_name, - notcvis_model_name, - device, - ee_project, - ee_use_highvolume, - tpi_outer_radius, - tpi_inner_radius, - patch_size, - overlap, - batch_size, - reflection, - binarization_threshold, - mask_erosion_size, - min_object_size, - use_quality_mask, - write_model_outputs, - ) diff --git a/darts/src/darts/legacy_pipeline/planet.py b/darts/src/darts/legacy_pipeline/planet.py new file mode 100644 index 0000000..95e2756 --- /dev/null +++ b/darts/src/darts/legacy_pipeline/planet.py @@ -0,0 +1,170 @@ +"""Legacy pipeline for Planet data.""" + +from dataclasses import dataclass +from pathlib import Path +from typing import Literal + +from darts.legacy_pipeline._base import AquisitionData, _BasePipeline, _PlanetMixin, _VRTMixin + + +@dataclass +class _LegacyNativePlanetPipeline(_BasePipeline, _PlanetMixin, _VRTMixin): + def _get_data(self, fpath: Path): + from darts_acquisition.arcticdem import load_arcticdem_from_vrt + from darts_acquisition.planet import load_planet_masks, load_planet_scene + from darts_acquisition.tcvis import load_tcvis + + optical = load_planet_scene(fpath) + arcticdem = load_arcticdem_from_vrt(self.arcticdem_slope_vrt, self.arcticdem_elevation_vrt, optical) + tcvis = load_tcvis(optical.odc.geobox, self.tcvis_dir) + data_masks = load_planet_masks(fpath) + aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) + return aqdata + + +def run_native_planet_pipeline( + *, + orthotiles_dir: Path, + scenes_dir: Path, + output_data_dir: Path, + arcticdem_slope_vrt: Path, + arcticdem_elevation_vrt: Path, + tcvis_dir: Path, + model_dir: Path, + tcvis_model_name: str = "RTS_v6_tcvis.pt", + notcvis_model_name: str = "RTS_v6_notcvis.pt", + device: Literal["cuda", "cpu", "auto"] | int | None = None, + ee_project: str | None = None, + ee_use_highvolume: bool = True, + patch_size: int = 1024, + overlap: int = 16, + batch_size: int = 8, + reflection: int = 0, + binarization_threshold: float = 0.5, + mask_erosion_size: int = 10, + min_object_size: int = 32, + use_quality_mask: bool = False, + write_model_outputs: bool = False, +): + """Search for all PlanetScope scenes in the given directory and runs the segmentation pipeline on them. + + Args: + orthotiles_dir (Path): The directory containing the PlanetScope orthotiles. + scenes_dir (Path): The directory containing the PlanetScope scenes. + output_data_dir (Path): The "output" directory. + arcticdem_slope_vrt (Path): The path to the ArcticDEM slope VRT file. + arcticdem_elevation_vrt (Path): The path to the ArcticDEM elevation VRT file. + tcvis_dir (Path): The directory containing the TCVis data. + model_dir (Path): The path to the models to use for segmentation. + tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". + notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". + device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. + If "cuda" take the first device (0), if int take the specified device. + If "auto" try to automatically select a free GPU (<50% memory usage). + Defaults to "cuda" if available, else "cpu". + ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if + project is defined within persistent API credentials obtained via `earthengine authenticate`. + ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). + patch_size (int, optional): The patch size to use for inference. Defaults to 1024. + overlap (int, optional): The overlap to use for inference. Defaults to 16. + batch_size (int, optional): The batch size to use for inference. Defaults to 8. + reflection (int, optional): The reflection padding to use for inference. Defaults to 0. + binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. + mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. + Defaults to 10. + min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. + use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask + to mask the output. + write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. + Defaults to False. + + Examples: + ### PS Orthotile + + Data directory structure: + + ```sh + data/input + ├── ArcticDEM + │ ├── elevation.vrt + │ ├── slope.vrt + │ ├── relative_elevation + │ │ └── 4372514_relative_elevation_100.tif + │ └── slope + │ └── 4372514_slope.tif + └── planet + └── PSOrthoTile + └── 4372514/5790392_4372514_2022-07-16_2459 + ├── 5790392_4372514_2022-07-16_2459_BGRN_Analytic_metadata.xml + ├── 5790392_4372514_2022-07-16_2459_BGRN_DN_udm.tif + ├── 5790392_4372514_2022-07-16_2459_BGRN_SR.tif + ├── 5790392_4372514_2022-07-16_2459_metadata.json + └── 5790392_4372514_2022-07-16_2459_udm2.tif + ``` + + then the config should be + + ``` + ... + orthotiles_dir: data/input/planet/PSOrthoTile + arcticdem_slope_vrt: data/input/ArcticDEM/slope.vrt + arcticdem_elevation_vrt: data/input/ArcticDEM/elevation.vrt + ``` + + ### PS Scene + + Data directory structure: + + ```sh + data/input + ├── ArcticDEM + │ ├── elevation.vrt + │ ├── slope.vrt + │ ├── relative_elevation + │ │ └── 4372514_relative_elevation_100.tif + │ └── slope + │ └── 4372514_slope.tif + └── planet + └── PSScene + └── 20230703_194241_43_2427 + ├── 20230703_194241_43_2427_3B_AnalyticMS_metadata.xml + ├── 20230703_194241_43_2427_3B_AnalyticMS_SR.tif + ├── 20230703_194241_43_2427_3B_udm2.tif + ├── 20230703_194241_43_2427_metadata.json + └── 20230703_194241_43_2427.json + ``` + + then the config should be + + ``` + ... + scenes_dir: data/input/planet/PSScene + arcticdem_slope_vrt: data/input/ArcticDEM/slope.vrt + arcticdem_elevation_vrt: data/input/ArcticDEM/elevation.vrt + ``` + + + """ + _LegacyNativePlanetPipeline( + orthotiles_dir=orthotiles_dir, + scenes_dir=scenes_dir, + output_data_dir=output_data_dir, + arcticdem_elevation_vrt=arcticdem_elevation_vrt, + arcticdem_slope_vrt=arcticdem_slope_vrt, + tcvis_dir=tcvis_dir, + model_dir=model_dir, + tcvis_model_name=tcvis_model_name, + notcvis_model_name=notcvis_model_name, + device=device, + ee_project=ee_project, + ee_use_highvolume=ee_use_highvolume, + patch_size=patch_size, + overlap=overlap, + batch_size=batch_size, + reflection=reflection, + binarization_threshold=binarization_threshold, + mask_erosion_size=mask_erosion_size, + min_object_size=min_object_size, + use_quality_mask=use_quality_mask, + write_model_outputs=write_model_outputs, + ).run() diff --git a/darts/src/darts/legacy_pipeline/planet_fast.py b/darts/src/darts/legacy_pipeline/planet_fast.py new file mode 100644 index 0000000..9911e5d --- /dev/null +++ b/darts/src/darts/legacy_pipeline/planet_fast.py @@ -0,0 +1,115 @@ +"""Legacy pipeline for Planet data with optimized preprocessing.""" + +from dataclasses import dataclass +from math import ceil, sqrt +from pathlib import Path +from typing import Literal + +from darts.legacy_pipeline._base import AquisitionData, _BasePipeline, _FastMixin, _PlanetMixin + + +@dataclass +class _LegacyNativePlanetPipelineFast(_BasePipeline, _PlanetMixin, _FastMixin): + def _get_data(self, fpath: Path): + from darts_acquisition.arcticdem import load_arcticdem_tile + from darts_acquisition.planet import load_planet_masks, load_planet_scene + from darts_acquisition.tcvis import load_tcvis + + optical = load_planet_scene(fpath) + arcticdem = load_arcticdem_tile( + optical.odc.geobox, self.arcticdem_dir, resolution=10, buffer=ceil(self.tpi_outer_radius / 10 * sqrt(2)) + ) + tcvis = load_tcvis(optical.odc.geobox, self.tcvis_dir) + data_masks = load_planet_masks(fpath) + aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) + return aqdata + + +def run_native_planet_pipeline_fast( + *, + orthotiles_dir: Path, + scenes_dir: Path, + output_data_dir: Path, + arcticdem_dir: Path, + tcvis_dir: Path, + model_dir: Path, + tcvis_model_name: str = "RTS_v6_tcvis.pt", + notcvis_model_name: str = "RTS_v6_notcvis.pt", + device: Literal["cuda", "cpu", "auto"] | int | None = None, + ee_project: str | None = None, + ee_use_highvolume: bool = True, + tpi_outer_radius: int = 100, + tpi_inner_radius: int = 0, + patch_size: int = 1024, + overlap: int = 16, + batch_size: int = 8, + reflection: int = 0, + binarization_threshold: float = 0.5, + mask_erosion_size: int = 10, + min_object_size: int = 32, + use_quality_mask: bool = False, + write_model_outputs: bool = False, +): + """Search for all PlanetScope scenes in the given directory and runs the segmentation pipeline on them. + + Loads the ArcticDEM from a datacube instead of VRT which is a lot faster and does not need manual preprocessing. + + Args: + orthotiles_dir (Path): The directory containing the PlanetScope orthotiles. + scenes_dir (Path): The directory containing the PlanetScope scenes. + output_data_dir (Path): The "output" directory. + arcticdem_dir (Path): The directory containing the ArcticDEM data (the datacube and the extent files). + Will be created and downloaded if it does not exist. + tcvis_dir (Path): The directory containing the TCVis data. + model_dir (Path): The path to the models to use for segmentation. + tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". + notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". + device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. + If "cuda" take the first device (0), if int take the specified device. + If "auto" try to automatically select a free GPU (<50% memory usage). + Defaults to "cuda" if available, else "cpu". + ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if + project is defined within persistent API credentials obtained via `earthengine authenticate`. + ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). + tpi_outer_radius (int, optional): The outer radius of the annulus kernel for the tpi calculation + in m. Defaults 100m. + tpi_inner_radius (int, optional): The inner radius of the annulus kernel for the tpi calculation + in m. Defaults to 0. + patch_size (int, optional): The patch size to use for inference. Defaults to 1024. + overlap (int, optional): The overlap to use for inference. Defaults to 16. + batch_size (int, optional): The batch size to use for inference. Defaults to 8. + reflection (int, optional): The reflection padding to use for inference. Defaults to 0. + binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. + mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. + Defaults to 10. + min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. + use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask + to mask the output. + write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. + Defaults to False. + + """ + _LegacyNativePlanetPipelineFast( + orthotiles_dir=orthotiles_dir, + scenes_dir=scenes_dir, + output_data_dir=output_data_dir, + arcticdem_dir=arcticdem_dir, + tcvis_dir=tcvis_dir, + model_dir=model_dir, + tcvis_model_name=tcvis_model_name, + notcvis_model_name=notcvis_model_name, + device=device, + ee_project=ee_project, + ee_use_highvolume=ee_use_highvolume, + tpi_outer_radius=tpi_outer_radius, + tpi_inner_radius=tpi_inner_radius, + patch_size=patch_size, + overlap=overlap, + batch_size=batch_size, + reflection=reflection, + binarization_threshold=binarization_threshold, + mask_erosion_size=mask_erosion_size, + min_object_size=min_object_size, + use_quality_mask=use_quality_mask, + write_model_outputs=write_model_outputs, + ).run() diff --git a/darts/src/darts/legacy_pipeline/s2.py b/darts/src/darts/legacy_pipeline/s2.py new file mode 100644 index 0000000..fe598e1 --- /dev/null +++ b/darts/src/darts/legacy_pipeline/s2.py @@ -0,0 +1,129 @@ +"""Legacy pipeline for Sentinel 2 data.""" + +from dataclasses import dataclass +from pathlib import Path +from typing import Literal + +from darts.legacy_pipeline._base import AquisitionData, _BasePipeline, _S2Mixin, _VRTMixin + + +@dataclass +class _LegacyNativeSentinel2Pipeline(_BasePipeline, _S2Mixin, _VRTMixin): + def _get_data(self, fpath: Path): + from darts_acquisition.arcticdem import load_arcticdem_from_vrt + from darts_acquisition.s2 import load_s2_masks, load_s2_scene + from darts_acquisition.tcvis import load_tcvis + + optical = load_s2_scene(fpath) + arcticdem = load_arcticdem_from_vrt(self.arcticdem_slope_vrt, self.arcticdem_elevation_vrt, optical) + tcvis = load_tcvis(optical.odc.geobox, self.tcvis_dir) + data_masks = load_s2_masks(fpath, optical.odc.geobox) + aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) + return aqdata + + +def run_native_sentinel2_pipeline( + *, + sentinel2_dir: Path, + output_data_dir: Path, + arcticdem_slope_vrt: Path, + arcticdem_elevation_vrt: Path, + tcvis_dir: Path, + model_dir: Path, + tcvis_model_name: str = "RTS_v6_tcvis_s2native.pt", + notcvis_model_name: str = "RTS_v6_notcvis_s2native.pt", + device: Literal["cuda", "cpu", "auto"] | int | None = None, + ee_project: str | None = None, + ee_use_highvolume: bool = True, + patch_size: int = 1024, + overlap: int = 16, + batch_size: int = 8, + reflection: int = 0, + binarization_threshold: float = 0.5, + mask_erosion_size: int = 10, + min_object_size: int = 32, + use_quality_mask: bool = False, + write_model_outputs: bool = False, +): + """Search for all Sentinel scenes in the given directory and runs the segmentation pipeline on them. + + Args: + sentinel2_dir (Path): The directory containing the Sentinel 2 scenes. + output_data_dir (Path): The "output" directory. + arcticdem_slope_vrt (Path): The path to the ArcticDEM slope VRT file. + arcticdem_elevation_vrt (Path): The path to the ArcticDEM elevation VRT file. + tcvis_dir (Path): The directory containing the TCVis data. + model_dir (Path): The path to the models to use for segmentation. + tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". + notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". + device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. + If "cuda" take the first device (0), if int take the specified device. + If "auto" try to automatically select a free GPU (<50% memory usage). + Defaults to "cuda" if available, else "cpu". + ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if + project is defined within persistent API credentials obtained via `earthengine authenticate`. + ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). + patch_size (int, optional): The patch size to use for inference. Defaults to 1024. + overlap (int, optional): The overlap to use for inference. Defaults to 16. + batch_size (int, optional): The batch size to use for inference. Defaults to 8. + reflection (int, optional): The reflection padding to use for inference. Defaults to 0. + binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. + mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. + Defaults to 10. + min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. + use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask + to mask the output. + write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. + Defaults to False. + + Examples: + Data directory structure: + + ```sh + data/input + ├── ArcticDEM + │ ├── elevation.vrt + │ ├── slope.vrt + │ ├── relative_elevation + │ │ └── 4372514_relative_elevation_100.tif + │ └── slope + │ └── 4372514_slope.tif + └── sentinel2 + └── 20220826T200911_20220826T200905_T17XMJ/ + ├── 20220826T200911_20220826T200905_T17XMJ_SCL_clip.tif + └── 20220826T200911_20220826T200905_T17XMJ_SR_clip.tif + ``` + + then the config should be + + ``` + ... + sentinel2_dir: data/input/sentinel2 + arcticdem_slope_vrt: data/input/ArcticDEM/slope.vrt + arcticdem_elevation_vrt: data/input/ArcticDEM/elevation.vrt + ``` + + + """ + _LegacyNativeSentinel2Pipeline( + sentinel2_dir=sentinel2_dir, + output_data_dir=output_data_dir, + arcticdem_elevation_vrt=arcticdem_elevation_vrt, + arcticdem_slope_vrt=arcticdem_slope_vrt, + tcvis_dir=tcvis_dir, + model_dir=model_dir, + tcvis_model_name=tcvis_model_name, + notcvis_model_name=notcvis_model_name, + device=device, + ee_project=ee_project, + ee_use_highvolume=ee_use_highvolume, + patch_size=patch_size, + overlap=overlap, + batch_size=batch_size, + reflection=reflection, + binarization_threshold=binarization_threshold, + mask_erosion_size=mask_erosion_size, + min_object_size=min_object_size, + use_quality_mask=use_quality_mask, + write_model_outputs=write_model_outputs, + ).run() diff --git a/darts/src/darts/legacy_pipeline/s2_fast.py b/darts/src/darts/legacy_pipeline/s2_fast.py new file mode 100644 index 0000000..b45e197 --- /dev/null +++ b/darts/src/darts/legacy_pipeline/s2_fast.py @@ -0,0 +1,113 @@ +"""Legacy pipeline for Sentinel 2 data with optimized preprocessing.""" + +from dataclasses import dataclass +from math import ceil, sqrt +from pathlib import Path +from typing import Literal + +from darts.legacy_pipeline._base import AquisitionData, _BasePipeline, _FastMixin, _S2Mixin + + +@dataclass +class _LegacyNativeSentinel2PipelineFast(_BasePipeline, _S2Mixin, _FastMixin): + def _get_data(self, fpath: Path): + from darts_acquisition.arcticdem import load_arcticdem_tile + from darts_acquisition.s2 import load_s2_masks, load_s2_scene + from darts_acquisition.tcvis import load_tcvis + + optical = load_s2_scene(fpath) + arcticdem = load_arcticdem_tile( + optical.odc.geobox, self.arcticdem_dir, resolution=10, buffer=ceil(self.tpi_outer_radius / 10 * sqrt(2)) + ) + tcvis = load_tcvis(optical.odc.geobox, self.tcvis_dir) + data_masks = load_s2_masks(fpath, optical.odc.geobox) + aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) + return aqdata + + +def run_native_sentinel2_pipeline_fast( + *, + sentinel2_dir: Path, + output_data_dir: Path, + arcticdem_dir: Path, + tcvis_dir: Path, + model_dir: Path, + tcvis_model_name: str = "RTS_v6_tcvis_s2native.pt", + notcvis_model_name: str = "RTS_v6_notcvis_s2native.pt", + device: Literal["cuda", "cpu", "auto"] | int | None = None, + ee_project: str | None = None, + ee_use_highvolume: bool = True, + tpi_outer_radius: int = 100, + tpi_inner_radius: int = 0, + patch_size: int = 1024, + overlap: int = 16, + batch_size: int = 8, + reflection: int = 0, + binarization_threshold: float = 0.5, + mask_erosion_size: int = 10, + min_object_size: int = 32, + use_quality_mask: bool = False, + write_model_outputs: bool = False, +): + """Search for all Sentinel 2 scenes in the given directory and runs the segmentation pipeline on them. + + Loads the ArcticDEM from a datacube instead of VRT which is a lot faster and does not need manual preprocessing. + + Args: + sentinel2_dir (Path): The directory containing the Sentinel 2 scenes. + scenes_dir (Path): The directory containing the PlanetScope scenes. + output_data_dir (Path): The "output" directory. + arcticdem_dir (Path): The directory containing the ArcticDEM data (the datacube and the extent files). + Will be created and downloaded if it does not exist. + tcvis_dir (Path): The directory containing the TCVis data. + model_dir (Path): The path to the models to use for segmentation. + tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". + notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". + device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. + If "cuda" take the first device (0), if int take the specified device. + If "auto" try to automatically select a free GPU (<50% memory usage). + Defaults to "cuda" if available, else "cpu". + ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if + project is defined within persistent API credentials obtained via `earthengine authenticate`. + ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). + tpi_outer_radius (int, optional): The outer radius of the annulus kernel for the tpi calculation + in m. Defaults to 100m. + tpi_inner_radius (int, optional): The inner radius of the annulus kernel for the tpi calculation + in m. Defaults to 0. + patch_size (int, optional): The patch size to use for inference. Defaults to 1024. + overlap (int, optional): The overlap to use for inference. Defaults to 16. + batch_size (int, optional): The batch size to use for inference. Defaults to 8. + reflection (int, optional): The reflection padding to use for inference. Defaults to 0. + binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. + mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. + Defaults to 10. + min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. + use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask + to mask the output. + write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. + Defaults to False. + + """ + _LegacyNativeSentinel2PipelineFast( + sentinel2_dir=sentinel2_dir, + output_data_dir=output_data_dir, + arcticdem_dir=arcticdem_dir, + tcvis_dir=tcvis_dir, + model_dir=model_dir, + tcvis_model_name=tcvis_model_name, + notcvis_model_name=notcvis_model_name, + device=device, + ee_project=ee_project, + ee_use_highvolume=ee_use_highvolume, + tpi_outer_radius=tpi_outer_radius, + tpi_inner_radius=tpi_inner_radius, + patch_size=patch_size, + overlap=overlap, + batch_size=batch_size, + reflection=reflection, + binarization_threshold=binarization_threshold, + mask_erosion_size=mask_erosion_size, + min_object_size=min_object_size, + use_quality_mask=use_quality_mask, + write_model_outputs=write_model_outputs, + ).run() diff --git a/darts/src/darts/legacy_pipeline/shared.py b/darts/src/darts/legacy_pipeline/shared.py deleted file mode 100644 index c823fd8..0000000 --- a/darts/src/darts/legacy_pipeline/shared.py +++ /dev/null @@ -1,113 +0,0 @@ -"""Data loading for legacy Pipeline.""" - -import logging -from collections import namedtuple -from math import ceil, sqrt -from pathlib import Path -from typing import Literal - -logger = logging.getLogger(__name__) - -AquisitionData = namedtuple("AquisitionData", ["optical", "arcticdem", "tcvis", "data_masks"]) - - -def _planet_file_generator(orthotiles_dir: Path, scenes_dir: Path, output_data_dir: Path): - # Find all PlanetScope orthotiles - for fpath in orthotiles_dir.glob("*/*/"): - tile_id = fpath.parent.name - scene_id = fpath.name - outpath = output_data_dir / tile_id / scene_id - yield fpath, outpath - - # Find all PlanetScope scenes - for fpath in scenes_dir.glob("*/"): - scene_id = fpath.name - outpath = output_data_dir / scene_id - yield fpath, outpath - - -def _load_s2(sentinel2_dir: Path, output_data_dir: Path, arcticdem_dir: Path, tcvis_dir: Path, tpi_outer_radius: int): - from darts_acquisition.arcticdem import load_arcticdem_tile - from darts_acquisition.s2 import load_s2_masks, load_s2_scene - from darts_acquisition.tcvis import load_tcvis - - for fpath in sentinel2_dir.glob("*/"): - scene_id = fpath.name - outpath = output_data_dir / scene_id - try: - optical = load_s2_scene(fpath) - arcticdem = load_arcticdem_tile( - optical.odc.geobox, arcticdem_dir, resolution=10, buffer=ceil(tpi_outer_radius / 10 * sqrt(2)) - ) - tcvis = load_tcvis(optical.odc.geobox, tcvis_dir) - data_masks = load_s2_masks(fpath, optical.odc.geobox) - aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) - yield fpath, outpath, aqdata - except Exception as e: - logger.warning(f"Could not process folder '{fpath.resolve()}'.\nSkipping...") - logger.exception(e) - continue - - -def _load_planet( - orthotiles_dir: Path, - scenes_dir: Path, - output_data_dir: Path, - arcticdem_dir: Path, - tcvis_dir: Path, - tpi_outer_radius: int, -): - from darts_acquisition.arcticdem import load_arcticdem_tile - from darts_acquisition.planet import load_planet_masks, load_planet_scene - from darts_acquisition.tcvis import load_tcvis - - # Find all PlanetScope orthotiles - for fpath, outpath in _planet_file_generator(orthotiles_dir, scenes_dir, output_data_dir): - try: - optical = load_planet_scene(fpath) - arcticdem = load_arcticdem_tile( - optical.odc.geobox, arcticdem_dir, resolution=2, buffer=ceil(tpi_outer_radius / 2 * sqrt(2)) - ) - tcvis = load_tcvis(optical.odc.geobox, tcvis_dir) - data_masks = load_planet_masks(fpath) - aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) - yield fpath, outpath, aqdata - except Exception as e: - logger.warning(f"Could not process folder '{fpath.resolve()}'.\nSkipping...") - logger.exception(e) - continue - - -def _segment_and_export( - tile, - ensemble, - outpath: Path, - device: Literal["cuda", "cpu", "auto"] | int | None, - patch_size: int, - overlap: int, - batch_size: int, - reflection: int, - binarization_threshold: float, - mask_erosion_size: int, - min_object_size: int, - use_quality_mask: bool, - write_model_outputs: bool, -): - from darts_export.inference import InferenceResultWriter - from darts_postprocessing import prepare_export - - tile = ensemble.segment_tile( - tile, - patch_size=patch_size, - overlap=overlap, - batch_size=batch_size, - reflection=reflection, - keep_inputs=write_model_outputs, - ) - tile = prepare_export(tile, binarization_threshold, mask_erosion_size, min_object_size, use_quality_mask, device) - - outpath.mkdir(parents=True, exist_ok=True) - writer = InferenceResultWriter(tile) - writer.export_probabilities(outpath) - writer.export_binarized(outpath) - writer.export_polygonized(outpath)