Skip to content

Commit

Permalink
Add tcvis to preprocessing with ee
Browse files Browse the repository at this point in the history
  • Loading branch information
relativityhd committed Oct 26, 2024
1 parent 246c33c commit 17a9dbd
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def load_planet_scene(fpath: str | Path) -> xr.Dataset:

# Merge all datasets into one
ds_planet = xr.merge(datasets)
ds_planet.attrs["scene_id"] = fpath.stem
logger.debug(f"Loaded Planet scene in {time.time() - start_time} seconds.")
return ds_planet

Expand Down
75 changes: 53 additions & 22 deletions darts-preprocessing/src/darts_preprocessing/data_sources/tcvis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,19 @@

import logging
import time
import warnings
from pathlib import Path

import ee
import numpy as np
import pyproj
import rioxarray # noqa: F401
import rasterio
import xarray as xr
import xee # noqa: F401

logger = logging.getLogger(__name__)


def init_ee():
"""Initialize Earth Engine. Authenticate if necessary."""
logger.debug("Initializing Earth Engine")
try:
ee.Initialize()
except Exception:
logger.debug("Initializing Earth Engine failed, trying to authenticate before")
ee.Authenticate()
ee.Initialize()
logger.debug("Earth Engine initialized")
EE_WARN_MSG = "Unable to retrieve 'system:time_start' values from an ImageCollection due to: No 'system:time_start' values found in the 'ImageCollection'." # noqa: E501


def ee_geom_from_image_bounds(reference_dataset: xr.Dataset, buffer=1000) -> ee.Geometry.Rectangle:
Expand All @@ -36,36 +29,74 @@ def ee_geom_from_image_bounds(reference_dataset: xr.Dataset, buffer=1000) -> ee.
"""
# get bounds
xmin, ymin, xmax, ymax = reference_dataset.rio.bounds()
xmin, ymin, xmax, ymax = reference_dataset.rio.bounds() # left, bottom, right, top
region_rect = [[xmin - buffer, ymin - buffer], [xmax + buffer, ymax + buffer]]

# make polygon
transformer = pyproj.Transformer.from_crs(reference_dataset.rio.crs(), "epsg:4326")
transformer = pyproj.Transformer.from_crs(reference_dataset.rio.crs, "epsg:4326")
region_transformed = [transformer.transform(*vertex)[::-1] for vertex in region_rect]
return ee.Geometry.Rectangle(coords=region_transformed)


def load_tcvis_xee(cache_dir: Path, scene_id: str, reference_dataset: xr.Dataset) -> xr.Dataset:
def load_tcvis(reference_dataset: xr.Dataset, cache_dir: Path | None = None) -> xr.Dataset:
"""Load the Landsat Trends (TCVIS) from Google Earth Engine.
Args:
cache_dir (Path): Path for the cache file. The file get's created if not exists.
reference_dataset (xr.Dataset): The reference dataset.
cache_dir (Path | None): The cache directory. If None, no caching will be used. Defaults to None.
Returns:
xr.Dataset: The TCVIS dataset.
"""
start_time = time.time()

cache_file = cache_dir / f"tcvis_{scene_id}.nc"
if cache_file.exists():
return xr.open_dataset(cache_file)
# Try to load from cache - else from Google Earth Engine
cache_fname = f"tcvis_{reference_dataset.attrs['scene_id']}.nc"
if cache_dir is not None and (cache_dir / cache_fname).exists():
logger.debug(f"Loading cached TCVis from {cache_dir / cache_fname}")
return xr.open_dataset(cache_dir / cache_fname, engine="h5netcdf")

ee_image_tcvis = ee.ImageCollection("users/ingmarnitze/TCTrend_SR_2000-2019_TCVIS").mosaic()
logger.debug("Loading TCVis from Google Earth Engine, since no cache was found")
geom = ee_geom_from_image_bounds(reference_dataset)
ds = xr.open_dataset(ee_image_tcvis, engine="ee", projection=ee_image_tcvis.projection(), geometry=geom)
ds = ds.rio.reproject_match(reference_dataset)
ee_image_tcvis = ee.ImageCollection("users/ingmarnitze/TCTrend_SR_2000-2019_TCVIS").mosaic().clip(geom)
# Wrap into image collection again to be able to use the engine
ee_image_tcvis = ee.ImageCollection(ee_image_tcvis)
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=UserWarning, message=EE_WARN_MSG)
ds = xr.open_dataset(ee_image_tcvis, engine="ee", geometry=geom, crs=str(reference_dataset.rio.crs), scale=30)
# Update dataset properties to fit our pipeline-api
ds = ds.isel(time=0).rename({"X": "x", "Y": "y"}).transpose("y", "x")
ds = ds.rename_vars(
{
"TCB_slope": "tc_brightness",
"TCG_slope": "tc_greenness",
"TCW_slope": "tc_wetness",
}
)
for band in ds.data_vars:
ds[band].attrs = {
"data_source": "landsat-trends",
"long_name": f"TC {band.split('_')[1].capitalize()}",
}

ds.rio.write_crs(ds.attrs["crs"], inplace=True)
ds.rio.set_spatial_dims(x_dim="x", y_dim="y", inplace=True)
search_time = time.time()
logger.debug(f"Found a dataset with shape {ds.sizes} in {search_time - start_time} seconds")

logger.debug(f"Reproject dataset to match reference dataset {reference_dataset.sizes}")
ds = ds.rio.reproject_match(reference_dataset, resampling=rasterio.enums.Resampling.cubic)
logger.debug(f"Reshaped dataset in {time.time() - search_time} seconds")

for band in ds.data_vars:
ds[band] = ds[band].astype(np.uint8)

# Save to cache
if cache_dir is not None:
logger.debug(f"Saving TCVis to cache to {cache_dir / cache_fname}")
cache_dir.mkdir(parents=True, exist_ok=True)
ds.to_netcdf(cache_dir / cache_fname, engine="h5netcdf")

logger.debug(f"Loading TCVis took {time.time() - start_time} seconds")
return ds
11 changes: 7 additions & 4 deletions darts-preprocessing/src/darts_preprocessing/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,19 @@

from darts_preprocessing.data_sources.arcticdem import load_arcticdem
from darts_preprocessing.data_sources.planet import load_planet_masks, load_planet_scene
from darts_preprocessing.data_sources.tcvis import load_tcvis
from darts_preprocessing.engineering.indices import calculate_ndvi


def load_and_preprocess_planet_scene(planet_scene_path: Path, arcticdem_dir: Path) -> xr.Dataset:
def load_and_preprocess_planet_scene(
planet_scene_path: Path, arcticdem_dir: Path, cache_dir: Path | None = None
) -> xr.Dataset:
"""Load and preprocess a Planet Scene (PSOrthoTile or PSScene) into an xr.Dataset.
Args:
planet_scene_path (Path): path to the Planet Scene
arcticdem_dir (Path): path to the ArcticDEM directory
cache_dir (Path | None): The cache directory. If None, no caching will be used. Defaults to None.
Returns:
xr.Dataset: preprocessed Planet Scene
Expand Down Expand Up @@ -60,13 +64,12 @@ def load_and_preprocess_planet_scene(planet_scene_path: Path, arcticdem_dir: Pat

ds_articdem = load_arcticdem(arcticdem_dir, ds_planet)

# # get xr.dataset for tcvis
# ds_tcvis = load_auxiliary(planet_scene_path, tcvis_path)
ds_tcvis = load_tcvis(ds_planet, cache_dir)

# load udm2
ds_data_masks = load_planet_masks(planet_scene_path)

# merge to final dataset
ds_merged = xr.merge([ds_planet, ds_ndvi, ds_articdem, ds_data_masks])
ds_merged = xr.merge([ds_planet, ds_ndvi, ds_articdem, ds_tcvis, ds_data_masks])

return ds_merged
29 changes: 29 additions & 0 deletions darts/src/darts/utils/earthengine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Earth Engine utilities."""

import logging

import ee

# geemap is not used yet
# import geemap

logger = logging.getLogger(__name__)


def init_ee(project: str):
"""Initialize Earth Engine. Authenticate if necessary.
Args:
project (str): The project name.
"""
logger.debug("Initializing Earth Engine")
try:
ee.Initialize(project=project)
# geemap.ee_initialize(project=project, opt_url="https://earthengine-highvolume.googleapis.com")
except Exception:
logger.debug("Initializing Earth Engine failed, trying to authenticate before")
ee.Authenticate()
ee.Initialize(project=project)
# geemap.ee_initialize(project=project, opt_url="https://earthengine-highvolume.googleapis.com")
logger.debug("Earth Engine initialized")
2 changes: 2 additions & 0 deletions darts/src/darts/utils/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ def setup_logging():
logging.getLogger("darts_superresolution").setLevel(DARTS_LEVEL)
logging.getLogger("darts").setLevel(DARTS_LEVEL)

logging.captureWarnings(True)


def add_logging_handlers(command: str, console: Console, log_dir: Path):
"""Add logging handlers (rich-console and file) to the application.
Expand Down

0 comments on commit 17a9dbd

Please sign in to comment.