diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst index a0c0db504b..c2febd1688 100644 --- a/doc/sphinx/source/input.rst +++ b/doc/sphinx/source/input.rst @@ -301,6 +301,8 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESACCI-WATERVAPOUR | prw (Amon) | 3 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ +| ESDC | tas, tasmax, tasmin (Amon) | 2 | Python | ++------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESRL | co2s (Amon) | 2 | NCL | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | FLUXCOM | gpp (Lmon) | 3 | Python | diff --git a/environment.yml b/environment.yml index 066a641793..7f97fae73f 100644 --- a/environment.yml +++ b/environment.yml @@ -11,6 +11,7 @@ channels: dependencies: - pip!=21.3 - python>=3.8 + - aiohttp - cartopy - cdo>=1.9.7 - cdsapi @@ -62,6 +63,7 @@ dependencies: - xesmf==0.3.0 - xgboost>1.6.1 # github.com/ESMValGroup/ESMValTool/issues/2779 - xlsxwriter + - zarr # Python packages needed for testing - flake8 - pytest >=3.9,!=6.0.0rc1,!=6.0.0 diff --git a/environment_osx.yml b/environment_osx.yml index a9cc65c3a8..b58b8a6050 100644 --- a/environment_osx.yml +++ b/environment_osx.yml @@ -11,6 +11,7 @@ channels: dependencies: - pip!=21.3 - python>=3.8 + - aiohttp - cartopy - cdo>=1.9.7 - cdsapi @@ -62,6 +63,7 @@ dependencies: - xesmf==0.3.0 - xgboost>1.6.1 # github.com/ESMValGroup/ESMValTool/issues/2779 - xlsxwriter + - zarr # Python packages needed for testing - flake8 - pytest >=3.9,!=6.0.0rc1,!=6.0.0 diff --git a/esmvaltool/cmorizers/data/cmor_config/ESDC.yml b/esmvaltool/cmorizers/data/cmor_config/ESDC.yml new file mode 100644 index 0000000000..7fc4a77ff7 --- /dev/null +++ b/esmvaltool/cmorizers/data/cmor_config/ESDC.yml @@ -0,0 +1,26 @@ +--- +filename: 'esdc-8d-{grid}-{chunking}-{version}.zarr' + +attributes: + project_id: OBS6 + dataset_id: ESDC + version: 3.0.1 + tier: 2 + grid: 0.25deg + chunking: 1x720x1440 + # chunking: 256x128x128 + modeling_realm: reanaly + source: http://data.rsc4earth.de/EarthSystemDataCube/ + reference: 'esdc' + comment: '' + +variables: + tas: + mip: Amon + raw: air_temperature_2m + tasmax: + mip: Amon + raw: max_air_temperature_2m + tasmin: + mip: Amon + raw: min_air_temperature_2m diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index ca0abfa5dc..096fd7b693 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -524,6 +524,18 @@ datasets: data/tcwv/dataset3_1/CDR-*/... All files need to be in one directory, not in yearly subdirectories. + ESDC: + tier: 2 + source: http://data.rsc4earth.de/EarthSystemDataCube/ + last_access: 2023-01-26 + info: | + It is not necessary to download the data, as the cmorizer script can access + it directly from the cloud if it is not available locally. + + To download a dataset, the dataset folder can be explored on the source + website, and downloaded using wget: + ```wget -m -nH -np -R "index.html*" http://data.rsc4earth.de/EarthSystemDataCube/v3.0.1/``` + ESRL: tier: 2 source: http://www.esrl.noaa.gov/gmd/dv/data/index.php diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esdc.py b/esmvaltool/cmorizers/data/formatters/datasets/esdc.py new file mode 100644 index 0000000000..bf473f53be --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/esdc.py @@ -0,0 +1,149 @@ +"""ESMValTool CMORizer for Earth System Data Cube data. + +Tier + Tier 2: other freely-available dataset. + +Source + http://data.rsc4earth.de/EarthSystemDataCube/ + +Last access + 20230126 + +Download and processing instructions + It is not necessary to download the data, as the cmorizer script can access + it directly from the cloud if it is not available locally. + + To download a dataset, the dataset folder can be explored on the source + website, and downloaded using wget: + ```wget -m -nH -np -R "index.html*" http://data.rsc4earth.de/EarthSystemDataCube/v3.0.1/``` +""" # noqa: E501 +import logging +from copy import deepcopy +from pathlib import Path + +import cf_units +import iris.std_names +import xarray as xr +from esmvalcore.preprocessor import monthly_statistics + +from esmvaltool.cmorizers.data import utilities as utils + +logger = logging.getLogger(__name__) + + +def _fix_cube(var, cube, cfg): + """General fixes for all cubes.""" + cmor_info = cfg['cmor_table'].get_variable(var['mip'], var['short_name']) + + # Set correct names + cube.var_name = cmor_info.short_name + if cmor_info.standard_name: + cube.standard_name = cmor_info.standard_name + cube.long_name = cmor_info.long_name + + # Set calendar to gregorian instead of proleptic gregorian + old_unit = cube.coord('time').units + if old_unit.calendar == 'proleptic_gregorian': + logger.info("Converting time units to gregorian") + cube.coord('time').units = cf_units.Unit(old_unit.origin, + calendar='gregorian') + utils.fix_coords(cube) + cube.convert_units(cmor_info.units) + if 'height2m' in cmor_info.dimensions: + utils.add_height2m(cube) + # Conversion from 8-d to monthly frequency + cube = monthly_statistics(cube, operator="mean") + + # Fix metadata + attrs = cfg['attributes'] + attrs['mip'] = var['mip'] + utils.fix_var_metadata(cube, cmor_info) + utils.set_global_atts(cube, attrs) + + return cube + + +def _open_zarr(path): + """Open zarr dataset.""" + logger.info('Opening zarr in "%s"', path) + try: + zarr_dataset = xr.open_dataset(path, engine='zarr') + return zarr_dataset + except KeyError as exception: + # Happens when the zarr folder is missing metadata, e.g. when + # it is a zarr array instead of a zarr dataset. + logger.error('Could not open zarr dataset "%s": "KeyError: %s"', path, + exception) + raise exception + + +def _extract_variable(zarr_path, var, cfg, out_dir): + """Open and cmorize cube.""" + attributes = deepcopy(cfg['attributes']) + all_attributes = { + **attributes, + **var + } # add the mip to the other attributes + raw_name = var['raw'] + zarr_dataset = _open_zarr(zarr_path) + cube_xr = zarr_dataset[raw_name] + + # Invalid standard names must be removed before converting to iris + standard_name = cube_xr.attrs.get('standard_name', None) + if (standard_name is not None + and standard_name not in iris.std_names.STD_NAMES): + del cube_xr.attrs['standard_name'] + logger.info('Removed invalid standard name "%s".', standard_name) + + cube_iris = cube_xr.to_iris() + cube = _fix_cube(var, cube_iris, cfg) + + utils.save_variable(cube=cube, + var=var['short_name'], + outdir=out_dir, + attrs=all_attributes, + unlimited_dimensions=['time']) + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """Cmorize the dataset.""" + if start_date: + logger.warning('start_date set to "%s", but will be ignored', + start_date) + if end_date: + logger.warning('end_date set to "%s", but will be ignored', end_date) + + attributes = cfg['attributes'] + variables = cfg['variables'] + version = attributes['version'] + filename_pattern = cfg['filename'].format(grid=attributes['grid'], + chunking=attributes['chunking'], + version=version) + + local_path = Path(in_dir) + in_files = list(local_path.glob(filename_pattern)) + logger.debug('Pattern %s matched: %s', Path(local_path, filename_pattern), + in_files) + + if len(in_files) > 1: + logger.warning( + 'Pattern has matched "%i" files, ' + 'but only the first one will be used.', len(in_files)) + logger.warning('The following files will be ignored.: "%s"', + in_files[1:]) + zarr_path = in_files[0] + elif len(in_files) == 0: + logger.info( + 'No local matches for pattern "%s", ' + 'attempting connection to the cloud.', + Path(local_path, filename_pattern)) + if '*' in filename_pattern: + logger.warning( + 'Detected a wildcard character in path (*), ' + 'online connection to \"%s\" may not work', filename_pattern) + zarr_path = f'{attributes["source"]}/v{version}/{filename_pattern}' + + for short_name, var in variables.items(): + if 'short_name' not in var: + var['short_name'] = short_name + _extract_variable(zarr_path, var, cfg, out_dir) diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index 31b96ed704..78d3b37038 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -288,6 +288,18 @@ diagnostics: scripts: null + ESDC: + description: ESDC check + variables: + tas: + tasmin: + tasmax: + additional_datasets: + - {dataset: ESDC, project: OBS6, mip: Amon, tier: 2, + type: reanaly, version: 3.0.1, + start_year: 1979, end_year: 2021} + scripts: null + ESRL: description: ESRL check variables: diff --git a/esmvaltool/references/esdc.bibtex b/esmvaltool/references/esdc.bibtex new file mode 100644 index 0000000000..887cb1f2bf --- /dev/null +++ b/esmvaltool/references/esdc.bibtex @@ -0,0 +1,11 @@ +@article{esdc, + doi = {10.5194/esd-11-201-2020}, + url = {https://esd.copernicus.org/articles/11/201/2020/}, + year = {2020}, + volume = {11}, + number = {1}, + pages = {201--234}, + author = {Mahecha, M. D. and Gans, F. and Brandt, G. and Christiansen, R. and Cornell, S. E. and Fomferra, N. and Kraemer, G. and Peters, J. and Bodesheim, P. and Camps-Valls, G. and Donges, J. F. and Dorigo, W. and Estupinan-Suarez, L. M. and Gutierrez-Velez, V. H. and Gutwin, M. and Jung, M. and Londo\~no, M. C. and Miralles, D. G. and Papastefanou, P. and Reichstein, M.}, + title = {Earth system data cubes unravel global multivariate dynamics}, + journal = {Earth System Dynamics} +} diff --git a/setup.py b/setup.py index bf0c37d176..4d59862c54 100755 --- a/setup.py +++ b/setup.py @@ -20,6 +20,7 @@ # Installation dependencies # Use with pip install . to install from source 'install': [ + 'aiohttp', 'cartopy', 'cdo', 'cdsapi', @@ -65,6 +66,7 @@ 'xesmf==0.3.0', 'xgboost>1.6.1', # github.com/ESMValGroup/ESMValTool/issues/2779 'xlsxwriter', + 'zarr', ], # Test dependencies # Execute `pip install .[test]` once and the use `pytest` to run tests