Skip to content

Commit

Permalink
Hardcode chunk-size of datacubes and write docs
Browse files Browse the repository at this point in the history
  • Loading branch information
relativityhd committed Nov 22, 2024
1 parent 8f4a464 commit 730c57d
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 8 deletions.
7 changes: 2 additions & 5 deletions darts-acquisition/src/darts_acquisition/arcticdem/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@


RESOLUTIONS = Literal[2, 10, 32]
CHUNK_SIZE = 3600
# https://www.pgc.umn.edu/guides/stereo-derived-elevation-models/pgc-dem-products-arcticdem-rema-and-earthdem
DATA_EXTENT = {
2: GeoBox.from_bbox((-3314693.24, -3314693.24, 3314693.24, 3314693.24), "epsg:3413", resolution=2),
Expand Down Expand Up @@ -250,7 +251,6 @@ def load_arcticdem_tile(
geobox: GeoBox,
data_dir: Path,
resolution: RESOLUTIONS,
chunk_size: int = 6000,
buffer: int = 0,
persist: bool = True,
) -> xr.Dataset:
Expand All @@ -260,8 +260,6 @@ def load_arcticdem_tile(
geobox (GeoBox): The geobox for which the tile should be loaded.
data_dir (Path): The directory where the ArcticDEM data is stored.
resolution (Literal[2, 10, 32]): The resolution of the ArcticDEM data in m.
chunk_size (int, optional): The chunk size for the datacube. Only relevant for the initial creation.
Has no effect otherwise. Defaults to 6000.
buffer (int, optional): The buffer around the geobox in pixels. Defaults to 0.
persist (bool, optional): If the data should be persisted in memory.
If not, this will return a Dask backed Dataset. Defaults to True.
Expand All @@ -276,7 +274,6 @@ def load_arcticdem_tile(
2. Geobox must be in a meter based CRS.
"""
# TODO: What is a good chunk size?
# TODO: Thread-safety concers:
# - How can we ensure that the same arcticdem tile is not downloaded twice at the same time?
# - How can we ensure that the extent is not downloaded twice at the same time?
Expand All @@ -297,7 +294,7 @@ def load_arcticdem_tile(
"ArcticDEM Data Cube",
storage,
DATA_EXTENT[resolution],
chunk_size,
CHUNK_SIZE,
DATA_VARS,
DATA_VARS_META,
DATA_VARS_ENCODING,
Expand Down
23 changes: 20 additions & 3 deletions darts-acquisition/src/darts_acquisition/tcvis.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

EE_WARN_MSG = "Unable to retrieve 'system:time_start' values from an ImageCollection due to: No 'system:time_start' values found in the 'ImageCollection'." # noqa: E501

CHUNK_SIZE = 3600
DATA_EXTENT = GeoBox.from_bbox((-180, 60, 180, 90), "epsg:4326", resolution=0.00026949)
DATA_VARS = ["tc_brightness", "tc_greenness", "tc_wetness"]
DATA_VARS_META = {
Expand All @@ -44,6 +45,19 @@


def procedural_download_datacube(storage: zarr.storage.Store, geobox: GeoBox):
"""Download the TCVIS data procedurally and add it to the datacube.
Args:
storage (zarr.storage.Store): The zarr storage object where the datacube will be saved.
geobox (GeoBox): The geobox to download the data for.
References:
- https://earthmover.io/blog/serverless-datacube-pipeline
Warning:
This function is not thread-safe. Thread-safety might be added in the future.
"""
tick_fstart = time.perf_counter()

# Check if data already exists
Expand Down Expand Up @@ -145,14 +159,17 @@ def procedural_download_datacube(storage: zarr.storage.Store, geobox: GeoBox):
def load_tcvis(
geobox: GeoBox,
data_dir: Path,
chunk_size: int = 6000,
buffer: int = 0,
persist: bool = True,
) -> xr.Dataset:
"""Load the Landsat Trends (TCVIS) from Google Earth Engine.
Args:
geobox (GeoBox): The geobox to load the data for.
data_dir (Path): The directory to store the downloaded data for faster access for consecutive calls.
buffer (int, optional): The buffer around the geobox in pixels. Defaults to 0.
persist (bool, optional): If the data should be persisted in memory.
If not, this will return a Dask backed Dataset. Defaults to True.
Returns:
xr.Dataset: The TCVIS dataset.
Expand All @@ -170,14 +187,14 @@ def load_tcvis(
title="Landsat Trends TCVIS 2000-2019",
storage=storage,
geobox=DATA_EXTENT,
chunk_size=chunk_size,
chunk_size=CHUNK_SIZE,
data_vars=DATA_VARS,
meta=DATA_VARS_META,
var_encoding=DATA_VARS_ENCODING,
)

# Download the adjacent tiles (if necessary)
reference_geobox = geobox.to_crs("epsg:4326").pad(buffer)
reference_geobox = geobox.to_crs("epsg:4326", resolution=DATA_EXTENT.resolution.x).pad(buffer)
procedural_download_datacube(storage, reference_geobox)

# Load the datacube and set the spatial_ref since it is set as a coordinate within the zarr format
Expand Down

0 comments on commit 730c57d

Please sign in to comment.