diff --git a/darts-acquisition/src/darts_acquisition/arcticdem/datacube.py b/darts-acquisition/src/darts_acquisition/arcticdem/datacube.py index 10a60d8..aa2b1b1 100644 --- a/darts-acquisition/src/darts_acquisition/arcticdem/datacube.py +++ b/darts-acquisition/src/darts_acquisition/arcticdem/datacube.py @@ -10,7 +10,6 @@ import geopandas as gpd import odc.geo.xr import pystac -import rasterio import requests import shapely import xarray as xr @@ -25,25 +24,81 @@ RESOLUTIONS = Literal[2, 10, 32] -DATA_VARS = ["dem", "count", "mad", "maxdate", "mindate", "datamask"] - - -def download_arcticdem_extend(dem_data_dir: Path): - """Download the gdal ArcticDEM extend data from the provided URL and extracts it to the specified directory. +# https://www.pgc.umn.edu/guides/stereo-derived-elevation-models/pgc-dem-products-arcticdem-rema-and-earthdem +DATA_VARS = ["dem", "datamask"] # ["dem", "count", "mad", "maxdate", "mindate", "datamask"] +DATA_VARS_META = { + "dem": { + "long_name": "Digital Elevation Model", + "source": "ArcticDEM", + "units": "m", + "description": "Digital Elevation Model, elevation resolution is cropped to ~1cm", + }, + # "count": {"long_name": "Count", "source": "ArcticDEM", "description": "Number of contributing DEMs"}, + # "mad": { + # "long_name": "Median Absolute Deviation", + # "source": "ArcticDEM", + # "units": "m", + # "description": "Median absolute deviation of contributing DEMs", + # }, + # "maxdate": { + # "long_name": "Max Date", + # "source": "ArcticDEM", + # "description": "The date of the most recent image in days since 01. Jan 2000", + # }, + # "mindate": { + # "long_name": "Min Date", + # "source": "ArcticDEM", + # "description": "The date of the oldest image in days since 01. Jan 2000", + # }, + "datamask": {"long_name": "Data Mask", "source": "ArcticDEM"}, +} +DATA_VARS_ENCODING = { + "dem": {"dtype": "float32"}, + # "dem": {"dtype": "float32", "_FillValue": float("nan")}, + # "count": {"dtype": "int16", "_FillValue": 0}, + # "mad": {"dtype": "float32", "_FillValue": float("nan")}, + # "maxdate": {"dtype": "int16", "_FillValue": -1}, # Storing the date as int16 is enough until 18. Sep 2089 + # "mindate": {"dtype": "int16", "_FillValue": -1}, # Storing the date as int16 is enough until 18. Sep 2089 + "datamask": {"dtype": "bool"}, + # "datamask": {"dtype": "bool", "_FillValue": False}, +} + + +def download_arcticdem_extent(dem_data_dir: Path): + """Download the ArcticDEM mosaic extent info from the provided URL and extracts it to the specified directory. Args: dem_data_dir (Path): The directory where the extracted data will be saved. + Example: + ```python + from darts_acquisition.arcticdem.datacube import download_arcticdem_extent + + dem_data_dir = Path("data/arcticdem") + download_arcticdem_extent(dem_data_dir) + ``` + + Resulting in the following directory structure: + + ```sh + $ tree data/arcticdem + data/arcticdem + ├── ArcticDEM_Mosaic_Index_v4_1_2m.parquet + ├── ArcticDEM_Mosaic_Index_v4_1_10m.parquet + └── ArcticDEM_Mosaic_Index_v4_1_32m.parquet + ``` + """ - start = time.time() + tick_fstart = time.perf_counter() url = "https://data.pgc.umn.edu/elev/dem/setsm/ArcticDEM/indexes/ArcticDEM_Mosaic_Index_latest_gpqt.zip" - logger.info(f"Downloading the gdal arcticdem extend from {url} to {dem_data_dir.resolve()}") + logger.debug(f"Downloading the arcticdem mosaic extent from {url} to {dem_data_dir.resolve()}") response = requests.get(url) # Get the downloaded data as a byte string data = response.content - logger.debug(f"Downloaded {len(data)} bytes") + tick_download = time.perf_counter() + logger.debug(f"Downloaded {len(data)} bytes in {tick_download - tick_fstart:.2f} seconds") # Create a bytesIO object with io.BytesIO(data) as buffer: @@ -64,7 +119,12 @@ def download_arcticdem_extend(dem_data_dir: Path): # Remove the empty directory extracted_dir.rmdir() - logger.info(f"Download completed in {time.time() - start:.2f} seconds") + tick_extract = time.perf_counter() + logger.debug(f"Extraction completed in {tick_extract - tick_download:.2f} seconds") + logger.info( + f"Download and extraction of the arcticdem mosiac extent from {url} to {dem_data_dir.resolve()}" + f"completed in {tick_extract - tick_fstart:.2f} seconds" + ) def download_arcticdem_stac(stac_url: str) -> xr.Dataset: @@ -80,9 +140,11 @@ def download_arcticdem_stac(stac_url: str) -> xr.Dataset: - mindate - datamask + Read more here: https://www.pgc.umn.edu/guides/stereo-derived-elevation-models/pgc-dem-products-arcticdem-rema-and-earthdem + Args: stac_url (str): The URL of the ArcticDEM STAC. Must be one of: - - A stac-browser url, like it is provided in the mosaic-extend dataframe, e.g. https://polargeospatialcenter.github.io/stac-browser/#/external/pgc-opendata-dems.s3.us-west-2.amazonaws.com/arcticdem/mosaics/v4.1/32m/36_24/36_24_32m_v4.1.json + - A stac-browser url, like it is provided in the mosaic-extent dataframe, e.g. https://polargeospatialcenter.github.io/stac-browser/#/external/pgc-opendata-dems.s3.us-west-2.amazonaws.com/arcticdem/mosaics/v4.1/32m/36_24/36_24_32m_v4.1.json - A direct link to the STAC file, e.g. https://pgc-opendata-dems.s3.us-west-2.amazonaws.com/arcticdem/mosaics/v4.1/32m/36_24/36_24_32m_v4.1.json Returns: @@ -90,26 +152,26 @@ def download_arcticdem_stac(stac_url: str) -> xr.Dataset: Data downloaded on demand, NOT from within this function. """ - start = time.time() - logger.info(f"Loading ArcticDEM data from {stac_url}") + tick_fstart = time.perf_counter() # Check weather the browser url is provided -> if so parse the right url from the string if "#" in stac_url: stac_url = "https://" + "/".join(stac_url.split("#")[1].split("/")[2:]) resolution = int(stac_url.split("/")[-3].replace("m", "")) + logger.debug(f"Downloading ArcticDEM data from {stac_url} with a resolution of {resolution}m") item = pystac.Item.from_file(stac_url) - ds = xr.open_dataset(item, engine="stac", resolution=resolution, crs="3413").isel(time=0).drop_vars("time") - logger.info(f"Metadata download completed in {time.time() - start:.2f} seconds") + tick_fend = time.perf_counter() + logger.debug(f"Scene metadata download completed in {tick_fend - tick_fstart:.2f} seconds") return ds def create_empty_datacube(storage: zarr.storage.Store, resolution: int, chunk_size: int): - """Create an empty datacube from a GeoBox covering the complete extend of the EPSG:3413. + """Create an empty datacube from a GeoBox covering the complete extent of the EPSG:3413. Args: storage (zarr.storage.Store): The zarr storage object where the datacube will be saved. @@ -117,41 +179,44 @@ def create_empty_datacube(storage: zarr.storage.Store, resolution: int, chunk_si chunk_size (int): The size of a single chunk in pixels. """ - data_vars = ["dem", "count", "mad", "maxdate", "mindate", "datamask"] - + tick_fstart = time.perf_counter() + logger.info( + f"Creating an empty zarr datacube with the variables" + f"{DATA_VARS} at a {resolution=}m and {chunk_size=} to {storage=}" + ) geobox = GeoBox.from_bbox((-3314693.24, -3314693.24, 3314693.24, 3314693.24), "epsg:3413", resolution=resolution) ds = xr.Dataset( - {name: odc.geo.xr.xr_zeros(geobox, chunks=-1, dtype="float32") for name in data_vars}, + {name: odc.geo.xr.xr_zeros(geobox, chunks=-1, dtype="float32") for name in DATA_VARS}, attrs={"title": "ArcticDEM Data Cube", "loaded_scenes": []}, ) - lon_encoding = optimize_coord_encoding(ds.x.values, resolution) - lat_encoding = optimize_coord_encoding(ds.y.values, -resolution) + # Add metadata + for name, meta in DATA_VARS_META.items(): + ds[name].attrs.update(meta) + + coords_encoding = { + "x": {"chunks": ds.x.shape, **optimize_coord_encoding(ds.x.values, resolution)}, + "y": {"chunks": ds.y.shape, **optimize_coord_encoding(ds.y.values, -resolution)}, + } var_encoding = { - name: { - "chunks": (chunk_size, chunk_size), - "compressor": zarr.Blosc(cname="zstd"), - # workaround to create a fill value for the underlying zarr array - # since Xarray doesn't let us specify one explicitly - "_FillValue": float("nan"), - # "scale_factor": 0.1, - # "add_offset": 0, - "dtype": "float32", - } - for name in data_vars + name: {"chunks": (chunk_size, chunk_size), "compressor": zarr.Blosc(cname="zstd"), **DATA_VARS_ENCODING[name]} + for name in DATA_VARS } encoding = { - "x": {"chunks": ds.x.shape, **lon_encoding}, - "y": {"chunks": ds.y.shape, **lat_encoding}, "spatial_ref": {"chunks": None, "dtype": "int32"}, + **coords_encoding, **var_encoding, } + logger.debug(f"Datacube {encoding=}") + ds.to_zarr( storage, encoding=encoding, compute=False, ) + tick_fend = time.perf_counter() + logger.info(f"Empty datacube created in {tick_fend - tick_fstart:.2f} seconds") def procedural_download_datacube(storage: zarr.storage.Store, scenes: gpd.GeoDataFrame): @@ -159,14 +224,19 @@ def procedural_download_datacube(storage: zarr.storage.Store, scenes: gpd.GeoDat Args: storage (zarr.storage.Store): The zarr storage object where the datacube will be saved. - scenes (gpd.GeoDataFrame): The GeoDataFrame containing the scene information from the mosaic extend dataframe. + scenes (gpd.GeoDataFrame): The GeoDataFrame containing the scene information from the mosaic extent dataframe. References: - https://earthmover.io/blog/serverless-datacube-pipeline + Warning: + This function is not thread-safe. Thread-safety might be added in the future. + """ + tick_fstart = time.perf_counter() + # Check if zarr data already contains the data via the attrs - arcticdem_datacube = xr.open_zarr(storage) + arcticdem_datacube = xr.open_zarr(storage, mask_and_scale=False) loaded_scenes = arcticdem_datacube.attrs.get("loaded_scenes", []).copy() # TODO: Add another attribute called "loading_scenes" to keep track of the scenes that are currently being loaded @@ -174,42 +244,70 @@ def procedural_download_datacube(storage: zarr.storage.Store, scenes: gpd.GeoDat # Maybe it would be better to turn this into a class which is meant to be used as singleton and can store # the loaded scenes as state + new_scenes_loaded = False for sceneinfo in scenes.itertuples(): # Skip if the scene is already in the datacube if sceneinfo.dem_id in loaded_scenes: + logger.debug(f"Scene {sceneinfo.dem_id} already loaded, skipping") continue + logger.debug(f"Downloading scene {sceneinfo.dem_id} from {sceneinfo.s3url}") scene = download_arcticdem_stac(sceneinfo.s3url) - x_start_idx = int((scene.x[0] - arcticdem_datacube.x[0]) // scene.x.attrs["resolution"]) - y_start_idx = int((scene.y[0] - arcticdem_datacube.y[0]) // scene.y.attrs["resolution"]) - target_slice = { - "x": slice(x_start_idx, x_start_idx + scene.sizes["x"]), - "y": slice(y_start_idx, y_start_idx + scene.sizes["y"]), - } - logger.debug(f"Target slice: {target_slice}") - for var in scene.data_vars: if var not in arcticdem_datacube.data_vars: - logger.warning(f"Variable '{var}' not in the datacube, skipping") + # logger.debug(f"Variable '{var}' not in the datacube, skipping") continue - raw_data = scene[var].values # noqa: PD011 - logger.debug(f"Adding {var} to the datacube: {lovely(raw_data)}") - arcticdem_datacube[var][target_slice] = raw_data + tick_sdownload = time.perf_counter() + # This should actually download the data into memory + # I have no clue why I need to call load twice, but without the data download would start when dropping nan + # which results in multiple download within the dropna functions and longer loading times... + inmem_var = scene[var].load().load() + tick_edownload = time.perf_counter() + logger.debug( + f"Downloaded '{sceneinfo.dem_id}:{var}' in {tick_edownload - tick_sdownload:.2f}s: " + f"{lovely(inmem_var.values)}" + ) + # The edges can have all-nan values which would overwrite existing valid data, hence we drop it + inmem_var = inmem_var.dropna("x", how="all").dropna("y", how="all") + tick_edrop = time.perf_counter() + logger.debug(f"Dropped NaNs in {tick_edrop - tick_edownload:.2f}s.") + + # Get the slice of the datacube where the scene will be written + x_start_idx = int((inmem_var.x[0] - arcticdem_datacube.x[0]) // inmem_var.x.attrs["resolution"]) + y_start_idx = int((inmem_var.y[0] - arcticdem_datacube.y[0]) // inmem_var.y.attrs["resolution"]) + target_slice = { + "x": slice(x_start_idx, x_start_idx + inmem_var.sizes["x"]), + "y": slice(y_start_idx, y_start_idx + inmem_var.sizes["y"]), + } + logger.debug(f"Target slice of '{sceneinfo.dem_id}:{var}': {target_slice}") + + arcticdem_datacube[var][target_slice] = inmem_var.values # noqa: PD011 + # We need to manually delete the attributes (only in memory), since xarray has weird behaviour + # when _FillValue is set and mask_and_scale=False when opening the zarr + arcticdem_datacube[var].attrs = {} arcticdem_datacube[var][target_slice].to_zarr(storage, region="auto", safe_chunks=False) + tick_ewrite = time.perf_counter() + logger.debug(f"Written '{sceneinfo.dem_id}:{var}' to datacube in {tick_ewrite - tick_edownload:.2f}s") loaded_scenes.append(sceneinfo.dem_id) + new_scenes_loaded = True + + if new_scenes_loaded: + logger.debug(f"Updating the loaded scenes metavar in the datacube. ({len(loaded_scenes)} scenes)") + # Update storage (with native zarr, since xarray does not support this yet) + za = zarr.open(storage) + za.attrs["loaded_scenes"] = loaded_scenes + # Xarray default behaviour is to read the consolidated metadata, hence, we must update it + zarr.consolidate_metadata(storage) - # Update storage (with native zarr) - za = zarr.open(storage) - za.attrs["loaded_scenes"] = loaded_scenes - # Xarray default behaviour is to read the consolidated metadata, hence, we must update it - zarr.consolidate_metadata(storage) + tick_fend = time.perf_counter() + logger.info(f"Procedural download of {len(scenes)} scenes completed in {tick_fend - tick_fstart:.2f} seconds") def get_arcticdem_tile( reference_dataset: xr.Dataset, data_dir: Path, - resolution: RESOLUTIONS | None = None, + resolution: RESOLUTIONS, chunk_size: int = 4000, buffer: int = 256, ) -> xr.Dataset: @@ -218,34 +316,35 @@ def get_arcticdem_tile( Args: reference_dataset (xr.Dataset): The reference dataset. data_dir (Path): The directory where the ArcticDEM data is stored. - resolution (Literal[2, 10, 32] | None, optional): The resolution of the ArcticDEM data in m. - If None tries to automatically detect the lowest resolution possible for the reference. Defaults to None. + resolution (Literal[2, 10, 32]): The resolution of the ArcticDEM data in m. chunk_size (int, optional): The chunk size for the datacube. Only relevant for the initial creation. Has no effect otherwise. Defaults to 4000. buffer (int, optional): The buffer around the reference dataset in pixels. Defaults to 256. Returns: - xr.Dataset: The ArcticDEM tile. + xr.Dataset: The ArcticDEM tile, with a buffer applied. + Note: The buffer is applied in the arcticdem dataset's CRS, hence the orientation might be different. + Final dataset is NOT matched to the reference CRS and resolution. + + Warning: + 1. This function is not thread-safe. Thread-safety might be added in the future. + 2. Reference dataset must be in a meter based CRS. """ # TODO: What is a good chunk size? - # TODO: what happens if two processes try to download the same file at the same time? - start = time.time() - logger.info(f"Getting ArcticDEM tile from {data_dir.resolve()}") + # TODO: Thread-safety concers: + # - How can we ensure that the same arcticdem scene is not downloaded twice at the same time? + # - How can we ensure that the extent is not downloaded twice at the same time? - reference_resolution = abs(int(reference_dataset.x[1] - reference_dataset.x[0])) - # Select the resolution based on the reference dataset - if resolution is None: - # TODO: Discuss about the resolution thresholds - if reference_resolution < 8: - resolution = 2 - elif reference_resolution < 25: - resolution = 10 - else: - resolution = 32 + tick_fstart = time.perf_counter() datacube_fpath = data_dir / f"datacube_{resolution}m_v4.1.zarr" storage = zarr.storage.FSStore(datacube_fpath) + logger.debug(f"Getting ArcticDEM tile from {datacube_fpath.resolve()}") + + # ! The reference dataset must be in a meter based CRS + reference_resolution = abs(int(reference_dataset.x[1] - reference_dataset.x[0])) + logger.debug(f"Found a reference resolution of {reference_resolution}m") # Check if the zarr data already exists if not datacube_fpath.exists(): @@ -253,30 +352,29 @@ def get_arcticdem_tile( create_empty_datacube(storage, resolution, chunk_size) # Get the adjacent arcticdem scenes - # * Note: We could also use pystac here, but this would result in a slight performance decrease - # * because of the network overhead - - # Load the extend, download if the file does not exist - extend_fpath = data_dir / f"ArcticDEM_Mosaic_Index_v4_1_{resolution}m.parquet" - if not extend_fpath.exists(): - download_arcticdem_extend(data_dir) - extend = gpd.read_parquet(extend_fpath) + # Note: We could also use pystac here, but this would result in a slight performance decrease + # because of the network overhead, hence we use the extent file + # Download the extent, download if the file does not exist + extent_fpath = data_dir / f"ArcticDEM_Mosaic_Index_v4_1_{resolution}m.parquet" + if not extent_fpath.exists(): + download_arcticdem_extent(data_dir) + extent = gpd.read_parquet(extent_fpath) # Add a buffer around the reference dataset to get the adjacent scenes - buffer_m = buffer * reference_resolution # 256 pixels * the resolution of the reference dataset + buffer_m = buffer * reference_resolution # nbuffer pixels * the resolution of the reference dataset reference_bbox = shapely.geometry.box(*reference_dataset.rio.transform_bounds("epsg:3413")).buffer( buffer_m, join_style="mitre" ) - adjacent_scenes = extend[extend.intersects(reference_bbox)] + adjacent_scenes = extent[extent.intersects(reference_bbox)] # Download the adjacent scenes (if necessary) procedural_download_datacube(storage, adjacent_scenes) - logger.debug(f"Procedural download completed in {time.time() - start:.2f} seconds") # Load the datacube and set the spatial_ref since it is set as a coordinate within the zarr format - arcticdem_datacube = xr.open_zarr(storage).set_coords("spatial_ref") + arcticdem_datacube = xr.open_zarr(storage, mask_and_scale=False).set_coords("spatial_ref") # Get an AOI slice of the datacube, since rio.reproject_match would load the whole datacube + # Note: The bounds are not equal to the bbox orientation, because of the change of the CRS xmin, ymin, xmax, ymax = reference_bbox.bounds aoi_slice = { "x": slice(xmin, xmax), @@ -284,10 +382,13 @@ def get_arcticdem_tile( } logger.debug(f"AOI slice: {aoi_slice}") arcticdem_aoi = arcticdem_datacube.sel(aoi_slice) - logger.debug(f"ArcticDEM AOI: {arcticdem_aoi}") - # TODO: I think the buffer gets lost here, because the reproject_match function crops to the reference dataset - ds = arcticdem_aoi.rio.reproject_match(reference_dataset, resampling=rasterio.enums.Resampling.cubic) + tick_sload = time.perf_counter() + # arcticdem_aoi = arcticdem_aoi.compute() + tick_eload = time.perf_counter() + logger.debug(f"ArcticDEM AOI loaded from disk in {tick_eload - tick_sload:.2f} seconds") - logger.info(f"ArcticDEM tile loaded in {time.time() - start:.2f} seconds") - return ds + # TODO: Add Metadata etc. according to our standards + + logger.info(f"ArcticDEM tile loaded in {time.perf_counter() - tick_fstart:.2f} seconds") + return arcticdem_aoi diff --git a/docs/dev/logging.md b/docs/dev/logging.md index 33031b1..db25bd1 100644 --- a/docs/dev/logging.md +++ b/docs/dev/logging.md @@ -70,3 +70,26 @@ logger.warning(la) logger.warning(da) logger.warning(t) ``` + +### When to use which level + +> The following is only a recommendation and should help writing helpful and not cluttered logs. + +1. `Debug` should be used ot tell what will happen next, `Info` for conclusive statements. + + Example: + + ```py + import logging + import time + + logger = logging.getLogger(__name__.replace("darts_", "darts.")) # don't replace __name__ + + def my_func(param): + tick_fstart = time.perf_counter() # pattern used a lot in the code is: fstart = function_start + logger.debug(f"Doing x with {param=}") + ... # Doing x + logger.info(f"Done x in {time.perf_counter() - tick_fstart:.2f}s") + ``` + +2. Unimportant or very often called functions should only log on `debug` level, independent of the above statement types. diff --git a/notebooks/viz-arcticdem.ipynb b/notebooks/viz-arcticdem.ipynb new file mode 100644 index 0000000..b6ffad1 --- /dev/null +++ b/notebooks/viz-arcticdem.ipynb @@ -0,0 +1,179 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "from pathlib import Path\n", + "\n", + "# Create a cartopy plot with CRS is EPSG:3413 (Focus on north pole)\n", + "import cartopy.crs as ccrs\n", + "import cartopy.feature as cfeature\n", + "import geopandas as gpd\n", + "import matplotlib.path as mpath\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import rioxarray # noqa: F401\n", + "import xarray as xr\n", + "import zarr\n", + "from darts_acquisition.arcticdem.datacube import download_arcticdem_extent\n", + "from rich import traceback\n", + "from rich.logging import RichHandler\n", + "\n", + "from darts.utils.logging import setup_logging\n", + "\n", + "setup_logging()\n", + "logging.basicConfig(\n", + " level=logging.INFO,\n", + " format=\"%(message)s\",\n", + " datefmt=\"[%X]\",\n", + " handlers=[RichHandler(rich_tracebacks=True)],\n", + ")\n", + "traceback.install(show_locals=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "DATA_ROOT = Path(\"../data\")\n", + "\n", + "fpath = DATA_ROOT / \"input/planet/PSOrthoTile/4974017/5854937_4974017_2022-08-14_2475\"\n", + "arcticdem_dir = DATA_ROOT / \"download/arcticdem\"\n", + "resolution = 32\n", + "coarsen = 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "extent_fpath = arcticdem_dir / f\"ArcticDEM_Mosaic_Index_v4_1_{resolution}m.parquet\"\n", + "if not extent_fpath.exists():\n", + " download_arcticdem_extent(arcticdem_dir)\n", + "extent = gpd.read_parquet(extent_fpath)\n", + "extent.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "datacube_fpath = arcticdem_dir / f\"datacube_{resolution}m_v4.1.zarr\"\n", + "storage = zarr.storage.FSStore(datacube_fpath)\n", + "\n", + "# Check if zarr data already contains the data via the attrs\n", + "arcticdem_datacube = xr.open_zarr(storage, mask_and_scale=True)\n", + "loaded_scenes = arcticdem_datacube.attrs.get(\"loaded_scenes\", []).copy()\n", + "\n", + "lowres_data = []\n", + "for sceneinfo in extent.itertuples():\n", + " if sceneinfo.dem_id not in loaded_scenes:\n", + " continue\n", + "\n", + " geom = sceneinfo.SHAPE\n", + " scene_data = arcticdem_datacube.dem.sel(\n", + " x=slice(geom.bounds[0], geom.bounds[2]), y=slice(geom.bounds[3], geom.bounds[1])\n", + " )\n", + " scene_data_lowres = scene_data.coarsen(x=coarsen, y=coarsen, boundary=\"trim\").mean()\n", + " lowres_data.append(scene_data_lowres)\n", + "len(lowres_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lowres_data_da = xr.combine_by_coords(lowres_data).dem\n", + "lowres_data_da.rio.write_crs(\"EPSG:3413\", inplace=True)\n", + "lowres_data_da" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lowres_data_da.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define the projection\n", + "projection = ccrs.Stereographic(central_latitude=90, central_longitude=-45, true_scale_latitude=70)\n", + "\n", + "# Create a figure\n", + "fig, ax = plt.subplots(figsize=(10, 10), subplot_kw={\"projection\": projection})\n", + "\n", + "# Set the extent to focus on the North Pole\n", + "ax.set_extent([-180, 180, 60, 90], crs=ccrs.PlateCarree())\n", + "\n", + "# Add features\n", + "ax.add_feature(cfeature.LAND, zorder=0, edgecolor=\"black\", facecolor=\"white\")\n", + "ax.add_feature(cfeature.OCEAN, zorder=0, facecolor=\"lightgrey\")\n", + "ax.add_feature(cfeature.COASTLINE)\n", + "ax.add_feature(cfeature.BORDERS, linestyle=\":\")\n", + "ax.add_feature(cfeature.LAKES, alpha=0.5)\n", + "ax.add_feature(cfeature.RIVERS)\n", + "\n", + "# Add gridlines\n", + "gl = ax.gridlines(draw_labels=True)\n", + "gl.top_labels = False\n", + "gl.right_labels = False\n", + "\n", + "# Compute a circle in axes coordinates, which we can use as a boundary\n", + "# for the map. We can pan/zoom as much as we like - the boundary will be\n", + "# permanently circular.\n", + "theta = np.linspace(0, 2 * np.pi, 100)\n", + "center, radius = [0.5, 0.5], 0.5\n", + "verts = np.vstack([np.sin(theta), np.cos(theta)]).T\n", + "circle = mpath.Path(verts * radius + center)\n", + "\n", + "ax.set_boundary(circle, transform=ax.transAxes)\n", + "\n", + "# Add the xarray data\n", + "lowres_data_da.rio.reproject(\"epsg:4326\").plot(ax=ax, transform=ccrs.PlateCarree(), cmap=\"viridis\", add_colorbar=False)\n", + "\n", + "# Show the plot\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pyproject.toml b/pyproject.toml index 0497ebe..d9fdbd4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,8 @@ dependencies = [ "boto3>=1.35.54", "odc-geo>=0.4.8", "zarr[jupyter]>=2.18.3", + "cartopy>=0.24.1", + "odc-stac>=0.3.10", # Training and Inference "segmentation-models-pytorch>=0.3.4", # Processing