diff --git a/CITATION.cff b/CITATION.cff index 48cd294253..aa57af62e3 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -98,5 +98,5 @@ license: "Apache-2.0" message: "If you use this software, please cite it using these metadata." repository-code: "https://github.com/ESMValGroup/ESMValCore/" title: ESMValCore -version: "v2.0.0b8" +version: "v2.0.0b9" ... diff --git a/doc/changelog.rst b/doc/changelog.rst index 8f05f42b60..89dd0f77b3 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -1,5 +1,29 @@ Changelog ========= +v2.0.0b9 +-------- -For older releases, see the release notes on https://github.com/ESMValGroup/ESMValCore/releases. \ No newline at end of file +This release includes + +Bug fixes +~~~~~~~~~ + +- Cast dtype float32 to output from zonal and meridional area preprocessors (`#581 `__) `Valeriu Predoi `__ + +Improvements +~~~~~~~~~~~~ + +- Unpin on Python<3.8 for conda package (run) (`#570 `__) `Valeriu Predoi `__ +- Update pytest installation marker (`#572 `__) `Bouwe Andela `__ +- Remove vmrh2o (`#573 `__) `Mattia Righi `__ +- Restructure documentation (`#575 `__) `Bouwe Andela `__ +- Fix mask in land variables for CCSM4 (`#579 `__) `Klaus Zimmermann `__ +- Fix derive scripts wrt required method (`#585 `__) `Klaus Zimmermann `__ +- Check coordinates do not have repeated standard names (`#558 `__) `Javier Vegas-Regidor `__ +- Added derivation script for co2s (`#587 `__) `Manuel Schlund `__ +- Adapted custom co2s table to match CMIP6 version (`#588 `__) `Manuel Schlund `__ +- Increase version to v2.0.0b9 (`#593 `__) `Bouwe Andela `__ +- Add a method to save citation information (`#402 `__) `SarahAlidoost `__ + +For older releases, see the release notes on https://github.com/ESMValGroup/ESMValCore/releases. diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py new file mode 100644 index 0000000000..b3ead8a600 --- /dev/null +++ b/esmvalcore/_citation.py @@ -0,0 +1,247 @@ +"""Citation module.""" +import logging +import os +import re +import textwrap +from functools import lru_cache + +import requests + +from ._config import DIAGNOSTICS_PATH + +logger = logging.getLogger(__name__) + +REFERENCES_PATH = DIAGNOSTICS_PATH / 'references' + +CMIP6_URL_STEM = 'https://cera-www.dkrz.de/WDCC/ui/cerasearch' + +# The technical overview paper should always be cited +ESMVALTOOL_PAPER = ( + "@article{righi20gmd,\n" + "\tdoi = {10.5194/gmd-13-1179-2020},\n" + "\turl = {https://doi.org/10.5194/gmd-13-1179-2020},\n" + "\tyear = {2020},\n" + "\tmonth = mar,\n" + "\tpublisher = {Copernicus {GmbH}},\n" + "\tvolume = {13},\n" + "\tnumber = {3},\n" + "\tpages = {1179--1199},\n" + "\tauthor = {Mattia Righi and Bouwe Andela and Veronika Eyring " + "and Axel Lauer and Valeriu Predoi and Manuel Schlund " + "and Javier Vegas-Regidor and Lisa Bock and Bj\"{o}rn Br\"{o}tz " + "and Lee de Mora and Faruk Diblen and Laura Dreyer " + "and Niels Drost and Paul Earnshaw and Birgit Hassler " + "and Nikolay Koldunov and Bill Little and Saskia Loosveldt Tomas " + "and Klaus Zimmermann},\n" + "\ttitle = {Earth System Model Evaluation Tool (ESMValTool) v2.0 " + "-- technical overview},\n" + "\tjournal = {Geoscientific Model Development}\n" + "}\n") + + +def _write_citation_files(filename, provenance): + """ + Write citation information provided by the recorded provenance. + + Recipe and cmip6 data references are saved into one bibtex file. + cmip6 data references are provided by CMIP6 data citation service. + Each cmip6 data reference has a json link. In the case of internet + connection, cmip6 data references are saved into a bibtex file. + Also, cmip6 data reference links are saved into a text file. + """ + product_name = os.path.splitext(filename)[0] + + tags = set() + cmip6_json_urls = set() + cmip6_info_urls = set() + other_info = set() + + for item in provenance.records: + # get cmip6 data citation info + cmip6_data = 'CMIP6' in item.get_attribute('attribute:mip_era') + if cmip6_data: + url_prefix = _make_url_prefix(item.attributes) + cmip6_info_urls.add(_make_info_url(url_prefix)) + cmip6_json_urls.add(_make_json_url(url_prefix)) + + # get other citation info + references = item.get_attribute('attribute:references') + if not references: + # ESMValTool CMORization scripts use 'reference' (without final s) + references = item.get_attribute('attribute:reference') + if references: + if item.identifier.namespace.prefix == 'recipe': + # get recipe citation tags + tags.update(references) + elif item.get_attribute('attribute:script_file'): + # get diagnostics citation tags + tags.update(references) + elif not cmip6_data: + # get any other data citation tags, e.g. CMIP5 + other_info.update(references) + + _save_citation_bibtex(product_name, tags, cmip6_json_urls) + _save_citation_info_txt(product_name, cmip6_info_urls, other_info) + + +def _save_citation_bibtex(product_name, tags, json_urls): + """Save the bibtex entries in a bibtex file.""" + citation_entries = [ESMVALTOOL_PAPER] + + # convert tags to bibtex entries + if tags: + entries = set() + for tag in _extract_tags(tags): + entries.add(_collect_bibtex_citation(tag)) + citation_entries.extend(sorted(entries)) + + # convert json_urls to bibtex entries + entries = set() + for json_url in json_urls: + cmip_citation = _collect_cmip_citation(json_url) + if cmip_citation: + entries.add(cmip_citation) + citation_entries.extend(sorted(entries)) + + with open(f'{product_name}_citation.bibtex', 'w') as file: + file.write('\n'.join(citation_entries)) + + +def _save_citation_info_txt(product_name, info_urls, other_info): + """Save all data citation information in one text file.""" + lines = [] + # Save CMIP6 url_info + if info_urls: + lines.append( + "Follow the links below to find more information about CMIP6 data:" + ) + lines.extend(f'- {url}' for url in sorted(info_urls)) + + # Save any references from the 'references' and 'reference' NetCDF global + # attributes. + if other_info: + if lines: + lines.append('') + lines.append("Additional data citation information was found, for " + "which no entry is available in the bibtex file:") + lines.extend('- ' + str(t).replace('\n', ' ') + for t in sorted(other_info)) + + if lines: + with open(f'{product_name}_data_citation_info.txt', 'w') as file: + file.write('\n'.join(lines) + '\n') + + +def _extract_tags(tags): + """Extract tags. + + Tags are recorded as a list of strings converted to a string in provenance. + For example, a single entry in the list `tags` could be the string + "['acknow_project', 'acknow_author']". + """ + pattern = re.compile(r'\w+') + return set(pattern.findall(str(tags))) + + +def _get_response(url): + """Return information from CMIP6 Data Citation service in json format.""" + json_data = None + if url.lower().startswith('https'): + try: + response = requests.get(url) + if response.status_code == 200: + json_data = response.json() + else: + logger.warning('Error in the CMIP6 citation link: %s', url) + except IOError: + logger.info('No network connection, ' + 'unable to retrieve CMIP6 citation information') + return json_data + + +def _json_to_bibtex(data): + """Make a bibtex entry from CMIP6 Data Citation json data.""" + url = 'url not found' + title = data.get('titles', ['title not found'])[0] + publisher = data.get('publisher', 'publisher not found') + year = data.get('publicationYear', 'publicationYear not found') + authors = 'creators not found' + doi = 'doi not found' + + if 'creators' in data: + author_list = [ + item.get('creatorName', '') for item in data['creators'] + ] + authors = ' and '.join(author_list) + if not authors: + authors = 'creators not found' + + if 'identifier' in data: + doi = data['identifier'].get('id', 'doi not found') + url = f'https://doi.org/{doi}' + + bibtex_entry = textwrap.dedent(f""" + @misc{{{url}, + \turl = {{{url}}}, + \ttitle = {{{title}}}, + \tpublisher = {{{publisher}}}, + \tyear = {year}, + \tauthor = {{{authors}}}, + \tdoi = {{{doi}}}, + }} + """).lstrip() + return bibtex_entry + + +@lru_cache(maxsize=1024) +def _collect_bibtex_citation(tag): + """Collect information from bibtex files.""" + bibtex_file = REFERENCES_PATH / f'{tag}.bibtex' + if bibtex_file.is_file(): + entry = bibtex_file.read_text() + else: + entry = '' + logger.warning( + "The reference file %s does not exist, citation information " + "incomplete.", bibtex_file) + return entry + + +@lru_cache(maxsize=1024) +def _collect_cmip_citation(json_url): + """Collect information from CMIP6 Data Citation Service.""" + json_data = _get_response(json_url) + if json_data: + bibtex_entry = _json_to_bibtex(json_data) + else: + bibtex_entry = '' + return bibtex_entry + + +def _make_url_prefix(attribute): + """Make url prefix based on CMIP6 Data Citation Service.""" + # the order of keys is important + localpart = { + 'mip_era': '', + 'activity_id': '', + 'institution_id': '', + 'source_id': '', + 'experiment_id': '', + } + for key, value in attribute: + if key.localpart in localpart: + localpart[key.localpart] = value + url_prefix = '.'.join(localpart.values()) + return url_prefix + + +def _make_json_url(url_prefix): + """Make json url based on CMIP6 Data Citation Service.""" + json_url = f'{CMIP6_URL_STEM}/cerarest/exportcmip6?input={url_prefix}' + return json_url + + +def _make_info_url(url_prefix): + """Make info url based on CMIP6 Data Citation Service.""" + info_url = f'{CMIP6_URL_STEM}/cmip6?input={url_prefix}' + return info_url diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index d0c5352e2b..bf675fae0b 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -79,7 +79,7 @@ def get_recipe_provenance(documentation, filename): entity = provenance.entity( 'recipe:{}'.format(filename), { 'attribute:description': documentation.get('description', ''), - 'attribute:references': ', '.join( + 'attribute:references': str( documentation.get('references', [])), }) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index ac8cbcbee9..ac68be8c40 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -283,7 +283,7 @@ def _get_default_settings(variable, config_user, derive=False): settings['load'] = { 'callback': concatenate_callback, } - # Configure merge + # Configure concatenation settings['concatenate'] = {} # Configure fixes diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index fe5136c7ea..d348f80ad3 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -17,6 +17,7 @@ import psutil import yaml +from ._citation import _write_citation_files from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags from ._provenance import TrackedFile, get_task_provenance @@ -565,6 +566,7 @@ def _collect_provenance(self): product = TrackedFile(filename, attributes, ancestors) product.initialize_provenance(self.activity) product.save_provenance() + _write_citation_files(product.filename, product.provenance) self.products.add(product) logger.debug("Collecting provenance of task %s took %.1f seconds", self.name, diff --git a/esmvalcore/_version.py b/esmvalcore/_version.py index 4633adb7ef..bdc26a75b8 100644 --- a/esmvalcore/_version.py +++ b/esmvalcore/_version.py @@ -1,2 +1,2 @@ """ESMValCore version.""" -__version__ = '2.0.0b8' +__version__ = '2.0.0b9' diff --git a/esmvalcore/cmor/tables/custom/CMOR_co2s.dat b/esmvalcore/cmor/tables/custom/CMOR_co2s.dat index c1b9af6168..fd0a876b6c 100644 --- a/esmvalcore/cmor/tables/custom/CMOR_co2s.dat +++ b/esmvalcore/cmor/tables/custom/CMOR_co2s.dat @@ -7,10 +7,11 @@ modeling_realm: atmos ! Variable attributes: !---------------------------------- standard_name: mole_fraction_of_carbon_dioxide_in_air -units: mol mol-1 -cell_methods: time: mean +units: 1e-06 +cell_methods: area: time: mean cell_measures: area: areacella -long_name: Mole Fraction of CO2 at surface level +long_name: Atmosphere CO2 +comment: As co2, but only at the surface !---------------------------------- ! Additional variable information: !---------------------------------- diff --git a/esmvalcore/preprocessor/_derive/co2s.py b/esmvalcore/preprocessor/_derive/co2s.py index 4305e08a19..42623206fb 100644 --- a/esmvalcore/preprocessor/_derive/co2s.py +++ b/esmvalcore/preprocessor/_derive/co2s.py @@ -1,39 +1,96 @@ """Derivation of variable ``co2s``.""" import dask.array as da import iris +import numpy as np +import stratify from ._baseclass import DerivedVariableBase +def _get_first_unmasked_data(array, axis): + """Get first unmasked value of an array along an axis.""" + mask = da.ma.getmaskarray(array) + numerical_mask = da.where(mask, -1.0, 1.0) + indices_first_positive = da.argmax(numerical_mask, axis=axis) + indices = da.meshgrid( + *[da.arange(array.shape[i]) for i in range(array.ndim) if i != axis], + indexing='ij') + indices.insert(axis, indices_first_positive) + first_unmasked_data = np.array(array)[tuple(indices)] + return first_unmasked_data + + class DerivedVariable(DerivedVariableBase): - """Derivation of variable ``co2s``.""" + """Derivation of variable ``co2s``. + + Use linear interpolation/extrapolation and surface air pressure to + calculate CO2 mole fraction at surface. + + Note + ---- + In some cases, ``co2`` data is masked. In these cases, the masked values + correspond to values where the pressure level is higher than the surface + air pressure (e.g. the 1000 hPa level for grid cells with high elevation). + To obtain an unmasked ``co2s`` field, it is necessary to fill these masked + values accordingly, i.e. with the lowest unmasked value for each grid cell. + + """ @staticmethod def required(project): """Declare the variables needed for derivation.""" - required = [{'short_name': 'co2'}] + required = [{'short_name': 'co2'}, {'short_name': 'ps'}] return required @staticmethod def calculate(cubes): """Compute mole fraction of CO2 at surface.""" - cube = cubes.extract_strict( + co2_cube = cubes.extract_strict( iris.Constraint(name='mole_fraction_of_carbon_dioxide_in_air')) - mask = da.ma.getmaskarray(cube.core_data()) - if not mask.any(): - cube = cube[:, 0, :, :] - else: - numerical_mask = da.where(mask, -1.0, 1.0) - indices_first_positive = da.argmax(numerical_mask, axis=1) - indices = da.meshgrid( - da.arange(cube.shape[0]), - da.arange(cube.shape[2]), - da.arange(cube.shape[3]), - indexing='ij', - ) - indices.insert(1, indices_first_positive) - surface_data = cube.data[tuple(indices)] - cube = cube[:, 0, :, :] - cube.data = surface_data - cube.convert_units('mol mol-1') - return cube + ps_cube = cubes.extract_strict( + iris.Constraint(name='surface_air_pressure')) + + # Fill masked data if necessary (interpolation fails with masked data) + (z_axis,) = co2_cube.coord_dims(co2_cube.coord(axis='Z', + dim_coords=True)) + mask = da.ma.getmaskarray(co2_cube.core_data()) + if mask.any(): + first_unmasked_data = _get_first_unmasked_data( + co2_cube.core_data(), axis=z_axis) + dim_map = [dim for dim in range(co2_cube.ndim) if dim != z_axis] + first_unmasked_data = iris.util.broadcast_to_shape( + first_unmasked_data, co2_cube.shape, dim_map) + co2_cube.data = da.where(mask, first_unmasked_data, + co2_cube.core_data()) + + # Interpolation (not supported for dask arrays) + air_pressure_coord = co2_cube.coord('air_pressure') + original_levels = iris.util.broadcast_to_shape( + air_pressure_coord.points, co2_cube.shape, + co2_cube.coord_dims(air_pressure_coord)) + target_levels = np.expand_dims(ps_cube.data, axis=z_axis) + co2s_data = stratify.interpolate( + target_levels, + original_levels, + co2_cube.data, + axis=z_axis, + interpolation='linear', + extrapolation='linear', + ) + co2s_data = np.squeeze(co2s_data, axis=z_axis) + + # Construct co2s cube + indices = [slice(None)] * co2_cube.ndim + indices[z_axis] = 0 + co2s_cube = co2_cube[tuple(indices)] + co2s_cube.data = co2s_data + if co2s_cube.coords('air_pressure'): + co2s_cube.remove_coord('air_pressure') + ps_coord = iris.coords.AuxCoord(ps_cube.data, + var_name='plev', + standard_name='air_pressure', + long_name='pressure', + units=ps_cube.units) + co2s_cube.add_aux_coord(ps_coord, np.arange(co2s_cube.ndim)) + co2s_cube.convert_units('1e-6') + return co2s_cube diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index a43824c488..51fa0c201c 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -13,6 +13,7 @@ import yaml from .._task import write_ncl_settings +from ..cmor._fixes.shared import AtmosphereSigmaFactory from ._time import extract_time logger = logging.getLogger(__name__) @@ -28,6 +29,55 @@ } +def _fix_aux_factories(cube): + """Fix :class:`iris.aux_factory.AuxCoordFactory` after concatenation. + + Necessary because of bug in :mod:`iris` (see issue #2478). + + """ + coord_names = [coord.name() for coord in cube.coords()] + + # Hybrid sigma pressure coordinate + # TODO possibly add support for other hybrid coordinates + if 'atmosphere_hybrid_sigma_pressure_coordinate' in coord_names: + new_aux_factory = iris.aux_factory.HybridPressureFactory( + delta=cube.coord(var_name='ap'), + sigma=cube.coord(var_name='b'), + surface_air_pressure=cube.coord(var_name='ps'), + ) + for aux_factory in cube.aux_factories: + if isinstance(aux_factory, iris.aux_factory.HybridPressureFactory): + break + else: + cube.add_aux_factory(new_aux_factory) + + # Hybrid sigma height coordinate + if 'atmosphere_hybrid_height_coordinate' in coord_names: + new_aux_factory = iris.aux_factory.HybridHeightFactory( + delta=cube.coord(var_name='lev'), + sigma=cube.coord(var_name='b'), + orography=cube.coord(var_name='orog'), + ) + for aux_factory in cube.aux_factories: + if isinstance(aux_factory, iris.aux_factory.HybridHeightFactory): + break + else: + cube.add_aux_factory(new_aux_factory) + + # Atmosphere sigma coordinate + if 'atmosphere_sigma_coordinate' in coord_names: + new_aux_factory = AtmosphereSigmaFactory( + pressure_at_top=cube.coord(var_name='ptop'), + sigma=cube.coord(var_name='lev'), + surface_air_pressure=cube.coord(var_name='ps'), + ) + for aux_factory in cube.aux_factories: + if isinstance(aux_factory, AtmosphereSigmaFactory): + break + else: + cube.add_aux_factory(new_aux_factory) + + def _get_attr_from_field_coord(ncfield, coord_name, attr): if coord_name is not None: attrs = ncfield.cf_group[coord_name].cf_attrs() @@ -107,9 +157,16 @@ def concatenate(cubes): concatenated = _concatenate_overlapping_cubes(concatenated) if len(concatenated) == 1: - return concatenated[0] - - logger.error('Can not concatenate cubes into a single one.') + cube = concatenated[0] + _fix_aux_factories(cube) + return cube + + # Concatenation not successful -> retrieve exact error message + try: + iris.cube.CubeList(cubes).concatenate_cube() + except iris.exceptions.ConcatenateError as exc: + msg = str(exc) + logger.error('Can not concatenate cubes into a single one: %s', msg) logger.error('Resulting cubes:') for cube in concatenated: logger.error(cube) @@ -119,7 +176,7 @@ def concatenate(cubes): pass else: logger.error('From %s to %s', time.cell(0), time.cell(-1)) - raise ValueError('Can not concatenate cubes.') + raise ValueError(f'Can not concatenate cubes: {msg}') def save(cubes, filename, optimize_access='', compress=False, **kwargs): diff --git a/esmvalcore/utils/draft_release_notes.py b/esmvalcore/utils/draft_release_notes.py index 0702ba590c..ad3af567b4 100644 --- a/esmvalcore/utils/draft_release_notes.py +++ b/esmvalcore/utils/draft_release_notes.py @@ -22,8 +22,17 @@ "~/.github_api_key, see:\nhttps://help.github.com/en/github/" "authenticating-to-github/creating-a-personal-access-token-" "for-the-command-line") + +from esmvalcore import __version__ + +VERSION = f"v{__version__}" GITHUB_REPO = "ESMValGroup/ESMValCore" +TITLES = { + 'bug': 'Bug fixes', + 'enhancement': 'Improvements', +} + def draft_notes_since(previous_release_date, labels): """Draft release notes containing the merged pull requests. @@ -57,18 +66,26 @@ def draft_notes_since(previous_release_date, labels): user = pull.user username = user.login if user.name is None else user.name - line = (f"- {pull.title} (#{pull.number}) " - f"[{username}](https://github.com/{user.login})") + line = ( + f"- {pull.title} (`#{pull.number} " + f"`__) " + f"`{username} `__") if label not in lines: lines[label] = [] lines[label].append((pull.closed_at, line)) # Create sections - sections = ["This release includes"] + sections = [ + VERSION, + '-' * len(VERSION), + '', + "This release includes", + ] for label in sorted(lines): - sections.append('\n' + label) - lines[label].sort() # sort by merge time - sections.append('\n'.join(line for _, line in lines[label])) + entries = sorted(lines[label]) # sort by merge time + label = TITLES.get(label, label) + sections.append('\n'.join(['', label, '~' * len(label), ''])) + sections.append('\n'.join(entry for _, entry in entries)) notes = '\n'.join(sections) print(notes) @@ -76,7 +93,7 @@ def draft_notes_since(previous_release_date, labels): if __name__ == '__main__': - PREVIOUS_RELEASE = datetime.datetime(2020, 1, 17) + PREVIOUS_RELEASE = datetime.datetime(2020, 3, 6) LABELS = ('bug', 'fix for dataset') draft_notes_since(PREVIOUS_RELEASE, LABELS) diff --git a/meta.yaml b/meta.yaml index 0ec6e432a6..babb88dc52 100644 --- a/meta.yaml +++ b/meta.yaml @@ -5,7 +5,7 @@ # conda build . -c conda-forge -c esmvalgroup # Package version number -{% set version = "2.0.0b8" %} +{% set version = "2.0.0b9" %} package: name: esmvalcore diff --git a/tests/integration/preprocessor/_io/test_concatenate.py b/tests/integration/preprocessor/_io/test_concatenate.py index 2ee24bae0d..1fbace4eba 100644 --- a/tests/integration/preprocessor/_io/test_concatenate.py +++ b/tests/integration/preprocessor/_io/test_concatenate.py @@ -1,18 +1,223 @@ """Integration tests for :func:`esmvalcore.preprocessor._io.concatenate`.""" +import warnings import unittest +from unittest.mock import call import numpy as np +import pytest from cf_units import Unit -from iris.coords import DimCoord -from iris.cube import Cube +from iris.aux_factory import HybridHeightFactory, HybridPressureFactory +from iris.coords import AuxCoord, DimCoord +from iris.cube import Cube, CubeList from iris.exceptions import ConcatenateError from esmvalcore.preprocessor import _io +def get_hybrid_pressure_cube(): + """Return cube with hybrid pressure coordinate.""" + ap_coord = AuxCoord([1.0], bounds=[[0.0, 2.0]], var_name='ap', units='Pa') + b_coord = AuxCoord([0.0], bounds=[[-0.5, 1.5]], var_name='b') + ps_coord = AuxCoord([[[100000]]], var_name='ps', units='Pa') + x_coord = AuxCoord( + 0.0, + var_name='x', + standard_name='atmosphere_hybrid_sigma_pressure_coordinate', + ) + cube = Cube([[[[0.0]]]], var_name='x', + aux_coords_and_dims=[(ap_coord, 1), (b_coord, 1), + (ps_coord, (0, 2, 3)), (x_coord, ())]) + return cube + + +def get_hybrid_pressure_cube_list(): + """Return list of cubes including hybrid pressure coordinate.""" + cube_0 = get_hybrid_pressure_cube() + cube_1 = get_hybrid_pressure_cube() + cube_0.add_dim_coord(get_time_coord(0), 0) + cube_1.add_dim_coord(get_time_coord(1), 0) + cubes = CubeList([cube_0, cube_1]) + for cube in cubes: + aux_factory = HybridPressureFactory( + delta=cube.coord(var_name='ap'), + sigma=cube.coord(var_name='b'), + surface_air_pressure=cube.coord(var_name='ps'), + ) + cube.add_aux_factory(aux_factory) + return cubes + + +def get_time_coord(time_point): + """Time coordinate.""" + return DimCoord([time_point], var_name='time', standard_name='time', + units='days since 6453-2-1') + + +@pytest.fixture +def mock_empty_cube(): + """Return mocked cube with irrelevant coordinates.""" + cube = unittest.mock.create_autospec(Cube, spec_set=True, instance=True) + a_coord = AuxCoord(0.0, var_name='a') + b_coord = AuxCoord(0.0, var_name='b') + cube.coords.return_value = [a_coord, b_coord] + return cube + + +@pytest.fixture +def mock_hybrid_height_cube(): + """Return mocked cube with hybrid height coordinate.""" + cube = unittest.mock.create_autospec(Cube, spec_set=True, instance=True) + lev_coord = AuxCoord([1.0], bounds=[[0.0, 2.0]], var_name='lev', units='m') + b_coord = AuxCoord([0.0], bounds=[[-0.5, 1.5]], var_name='b') + orog_coord = AuxCoord([[[100000]]], var_name='orog', units='m') + cube.coord.side_effect = [lev_coord, b_coord, orog_coord, + lev_coord, b_coord, orog_coord] + cube.coords.return_value = [ + lev_coord, + b_coord, + orog_coord, + AuxCoord(0.0, standard_name='atmosphere_hybrid_height_coordinate'), + ] + aux_factory = HybridHeightFactory( + delta=lev_coord, + sigma=b_coord, + orography=orog_coord, + ) + cube.aux_factories = ['dummy', aux_factory] + return cube + + +@pytest.fixture +def mock_hybrid_pressure_cube(): + """Return mocked cube with hybrid pressure coordinate.""" + cube = unittest.mock.create_autospec(Cube, spec_set=True, instance=True) + ap_coord = AuxCoord([1.0], bounds=[[0.0, 2.0]], var_name='ap', units='Pa') + b_coord = AuxCoord([0.0], bounds=[[-0.5, 1.5]], var_name='b') + ps_coord = AuxCoord([[[100000]]], var_name='ps', units='Pa') + cube.coord.side_effect = [ap_coord, b_coord, ps_coord, + ap_coord, b_coord, ps_coord] + cube.coords.return_value = [ + ap_coord, + b_coord, + ps_coord, + AuxCoord(0.0, + standard_name='atmosphere_hybrid_sigma_pressure_coordinate'), + ] + aux_factory = HybridPressureFactory( + delta=ap_coord, + sigma=b_coord, + surface_air_pressure=ps_coord, + ) + cube.aux_factories = ['dummy', aux_factory] + return cube + + +@pytest.fixture +def real_hybrid_pressure_cube(): + """Return real cube with hybrid pressure coordinate.""" + return get_hybrid_pressure_cube() + + +@pytest.fixture +def real_hybrid_pressure_cube_list(): + """Return real list of cubes with hybrid pressure coordinate.""" + return get_hybrid_pressure_cube_list() + + +def check_if_fix_aux_factories_is_necessary(): + """Check if _fix_aux_factories() is necessary (i.e. iris bug is fixed).""" + cubes = get_hybrid_pressure_cube_list() + cube = cubes.concatenate_cube() + coords = [coord.name() for coord in cube.coords()] + msg = ("Apparently concatenation of cubes that have a derived variable " + "is now possible in iris (i.e. issue #2478 has been fixed). Thus, " + "this test and ALL appearances of the function " + "'_fix_aux_factories' can safely be removed!") + if 'air_pressure' in coords: + warnings.warn(msg) + + +def test_fix_aux_factories_empty_cube(mock_empty_cube): + """Test fixing with empty cube.""" + check_if_fix_aux_factories_is_necessary() + _io._fix_aux_factories(mock_empty_cube) + assert mock_empty_cube.mock_calls == [call.coords()] + + +def test_fix_aux_factories_hybrid_height(mock_hybrid_height_cube): + """Test fixing of hybrid height coordinate.""" + check_if_fix_aux_factories_is_necessary() + + # Test with aux_factory object + _io._fix_aux_factories(mock_hybrid_height_cube) + mock_hybrid_height_cube.coords.assert_called_once_with() + mock_hybrid_height_cube.coord.assert_has_calls([call(var_name='lev'), + call(var_name='b'), + call(var_name='orog')]) + mock_hybrid_height_cube.add_aux_factory.assert_not_called() + + # Test without aux_factory object + mock_hybrid_height_cube.reset_mock() + mock_hybrid_height_cube.aux_factories = ['dummy'] + _io._fix_aux_factories(mock_hybrid_height_cube) + mock_hybrid_height_cube.coords.assert_called_once_with() + mock_hybrid_height_cube.coord.assert_has_calls([call(var_name='lev'), + call(var_name='b'), + call(var_name='orog')]) + mock_hybrid_height_cube.add_aux_factory.assert_called_once() + + +def test_fix_aux_factories_hybrid_pressure(mock_hybrid_pressure_cube): + """Test fixing of hybrid pressure coordinate.""" + check_if_fix_aux_factories_is_necessary() + + # Test with aux_factory object + _io._fix_aux_factories(mock_hybrid_pressure_cube) + mock_hybrid_pressure_cube.coords.assert_called_once_with() + mock_hybrid_pressure_cube.coord.assert_has_calls([call(var_name='ap'), + call(var_name='b'), + call(var_name='ps')]) + mock_hybrid_pressure_cube.add_aux_factory.assert_not_called() + + # Test without aux_factory object + mock_hybrid_pressure_cube.reset_mock() + mock_hybrid_pressure_cube.aux_factories = ['dummy'] + _io._fix_aux_factories(mock_hybrid_pressure_cube) + mock_hybrid_pressure_cube.coords.assert_called_once_with() + mock_hybrid_pressure_cube.coord.assert_has_calls([call(var_name='ap'), + call(var_name='b'), + call(var_name='ps')]) + mock_hybrid_pressure_cube.add_aux_factory.assert_called_once() + + +def test_fix_aux_factories_real_cube(real_hybrid_pressure_cube): + """Test fixing of hybrid pressure coordinate on real cube.""" + check_if_fix_aux_factories_is_necessary() + assert not real_hybrid_pressure_cube.coords('air_pressure') + _io._fix_aux_factories(real_hybrid_pressure_cube) + air_pressure_coord = real_hybrid_pressure_cube.coord('air_pressure') + expected_coord = AuxCoord([[[[1.0]]]], bounds=[[[[[-50000., 150002.]]]]], + standard_name='air_pressure', units='Pa') + assert air_pressure_coord == expected_coord + + +def test_concatenation_with_aux_factory(real_hybrid_pressure_cube_list): + """Test actual concatenation of a cube with a derived coordinate.""" + concatenated = _io.concatenate(real_hybrid_pressure_cube_list) + air_pressure_coord = concatenated.coord('air_pressure') + expected_coord = AuxCoord( + [[[[1.0]]], [[[1.0]]]], + bounds=[[[[[-50000.0, 150002.0]]]], [[[[-50000.0, 150002.0]]]]], + standard_name='air_pressure', + units='Pa', + ) + assert air_pressure_coord == expected_coord + + class TestConcatenate(unittest.TestCase): """Tests for :func:`esmvalcore.preprocessor._io.concatenate`.""" + def setUp(self): """Start tests.""" self._model_coord = DimCoord([1., 2.], @@ -37,7 +242,7 @@ def test_concatenate(self): concatenated.coord('time').points, np.array([1, 2, 3, 4, 5, 6])) def test_concatenate_with_overlap(self): - """Test concatenation of time overalapping cubes""" + """Test concatenation of time overalapping cubes.""" self._add_cube([6.5, 7.5], [6., 7.]) concatenated = _io.concatenate(self.raw_cubes) np.testing.assert_array_equal( diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py new file mode 100644 index 0000000000..50fb65ef02 --- /dev/null +++ b/tests/integration/test_citation.py @@ -0,0 +1,118 @@ +"""Test _citation.py.""" +import textwrap + +from prov.model import ProvDocument + +import esmvalcore +from esmvalcore._citation import (CMIP6_URL_STEM, ESMVALTOOL_PAPER, + _write_citation_files) +from esmvalcore._provenance import ESMVALTOOL_URI_PREFIX + + +def test_references(tmp_path, monkeypatch): + """Test1: references are replaced with bibtex.""" + # Create fake provenance + provenance = ProvDocument() + provenance.add_namespace('file', uri=ESMVALTOOL_URI_PREFIX + 'file') + provenance.add_namespace('attribute', + uri=ESMVALTOOL_URI_PREFIX + 'attribute') + filename = str(tmp_path / 'output.nc') + attributes = { + 'attribute:references': 'test_tag', + 'attribute:script_file': 'diagnostics.py' + } + provenance.entity('file:' + filename, attributes) + + # Create fake bibtex references tag file + references_path = tmp_path / 'references' + references_path.mkdir() + monkeypatch.setattr(esmvalcore._citation, 'REFERENCES_PATH', + references_path) + fake_bibtex_file = references_path / 'test_tag.bibtex' + fake_bibtex = "Fake bibtex file content\n" + fake_bibtex_file.write_text(fake_bibtex) + + _write_citation_files(filename, provenance) + citation_file = tmp_path / 'output_citation.bibtex' + citation = citation_file.read_text() + assert citation == '\n'.join([ESMVALTOOL_PAPER, fake_bibtex]) + + +def mock_get_response(url): + """Mock _get_response() function.""" + json_data = False + if url.lower().startswith('https'): + json_data = {'titles': ['title is found']} + return json_data + + +def test_cmip6_data_citation(tmp_path, monkeypatch): + """Test2: CMIP6 citation info is retrieved from ES-DOC.""" + # Create fake provenance + provenance = ProvDocument() + provenance.add_namespace('file', uri=ESMVALTOOL_URI_PREFIX + 'file') + provenance.add_namespace('attribute', + uri=ESMVALTOOL_URI_PREFIX + 'attribute') + attributes = { + 'attribute:mip_era': 'CMIP6', + 'attribute:activity_id': 'activity', + 'attribute:institution_id': 'institution', + 'attribute:source_id': 'source', + 'attribute:experiment_id': 'experiment', + } + filename = str(tmp_path / 'output.nc') + provenance.entity('file:' + filename, attributes) + + monkeypatch.setattr(esmvalcore._citation, '_get_response', + mock_get_response) + _write_citation_files(filename, provenance) + citation_file = tmp_path / 'output_citation.bibtex' + + # Create fake bibtex entry + url = 'url not found' + title = 'title is found' + publisher = 'publisher not found' + year = 'publicationYear not found' + authors = 'creators not found' + doi = 'doi not found' + fake_bibtex_entry = textwrap.dedent(f""" + @misc{{{url}, + \turl = {{{url}}}, + \ttitle = {{{title}}}, + \tpublisher = {{{publisher}}}, + \tyear = {year}, + \tauthor = {{{authors}}}, + \tdoi = {{{doi}}}, + }} + """).lstrip() + assert citation_file.read_text() == '\n'.join( + [ESMVALTOOL_PAPER, fake_bibtex_entry]) + + +def test_cmip6_data_citation_url(tmp_path): + """Test3: CMIP6 info_url is retrieved from ES-DOC.""" + # Create fake provenance + provenance = ProvDocument() + provenance.add_namespace('file', uri=ESMVALTOOL_URI_PREFIX + 'file') + provenance.add_namespace('attribute', + uri=ESMVALTOOL_URI_PREFIX + 'attribute') + attributes = { + 'attribute:mip_era': 'CMIP6', + 'attribute:activity_id': 'activity', + 'attribute:institution_id': 'institution', + 'attribute:source_id': 'source', + 'attribute:experiment_id': 'experiment', + } + filename = str(tmp_path / 'output.nc') + provenance.entity('file:' + filename, attributes) + _write_citation_files(filename, provenance) + citation_url = tmp_path / 'output_data_citation_info.txt' + + # Create fake info url + fake_url_prefix = '.'.join(attributes.values()) + text = '\n'.join([ + "Follow the links below to find more information about CMIP6 data:", + f"- {CMIP6_URL_STEM}/cmip6?input={fake_url_prefix}", + '', + ]) + assert citation_url.read_text() == text diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index c9efc7a467..e6f64b8ad0 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1190,11 +1190,6 @@ def simulate_diagnostic_run(diagnostic_task): 'name': 'Bouwe Andela', }, }, - 'references': { - 'acknow_author': "Please acknowledge the author(s).", - 'contact_authors': "Please contact the author(s) ...", - 'acknow_project': "Please acknowledge the project(s).", - }, 'projects': { 'c3s-magic': 'C3S MAGIC project', }, @@ -1271,7 +1266,7 @@ def test_diagnostic_task_provenance( key).pop() == record[key] # Check that diagnostic script tags have been added - for key in ('statistics', 'domains', 'authors', 'references'): + for key in ('statistics', 'domains', 'authors'): assert product.attributes[key] == tuple(TAGS[key][k] for k in record[key]) @@ -1287,7 +1282,7 @@ def test_diagnostic_task_provenance( for key in ('description', 'references'): value = src['documentation'][key] if key == 'references': - value = ', '.join(TAGS[key][k] for k in value) + value = str(src['documentation'][key]) assert recipe_record[0].get_attribute('attribute:' + key).pop() == value diff --git a/tests/unit/preprocessor/_derive/test_co2s.py b/tests/unit/preprocessor/_derive/test_co2s.py index fd5155a0bd..3fd364edd7 100644 --- a/tests/unit/preprocessor/_derive/test_co2s.py +++ b/tests/unit/preprocessor/_derive/test_co2s.py @@ -7,28 +7,51 @@ import esmvalcore.preprocessor._derive.co2s as co2s -def get_coord_spec(): +def get_coord_spec(include_plev=True): """Coordinate specs for cubes.""" time_coord = iris.coords.DimCoord([0], var_name='time', standard_name='time', units='days since 0000-01-01 00:00:00') - plev_coord = iris.coords.DimCoord([123456.0, 50000.0, 1000.0], - var_name='plev', - standard_name='air_pressure', units='Pa') lat_coord = iris.coords.DimCoord([0.0, 1.0], var_name='latitude', standard_name='latitude', units='degrees') lon_coord = iris.coords.DimCoord([0.0, 1.0], var_name='longitude', standard_name='longitude', units='degrees') - coord_spec = [ - (time_coord, 0), - (plev_coord, 1), - (lat_coord, 2), - (lon_coord, 3), - ] + if include_plev: + plev_coord = iris.coords.DimCoord([100000.0, 90000.0, 50000.0], + var_name='plev', + standard_name='air_pressure', + units='Pa') + coord_spec = [ + (time_coord, 0), + (plev_coord, 1), + (lat_coord, 2), + (lon_coord, 3), + ] + else: + coord_spec = [ + (time_coord, 0), + (lat_coord, 1), + (lon_coord, 2), + ] return coord_spec +def get_ps_cube(): + """Surface air pressure cube.""" + ps_data = [[[105000.0, 50000.0], + [95000.0, 60000.0]]] + coord_spec = get_coord_spec(include_plev=False) + cube = iris.cube.Cube( + ps_data, + var_name='ps', + standard_name='surface_air_pressure', + units='Pa', + dim_coords_and_dims=coord_spec, + ) + return cube + + @pytest.fixture def masked_cubes(): """Masked CO2 cube.""" @@ -39,14 +62,15 @@ def masked_cubes(): [80.0, -1.0]], [[100.0, 50.0], [30.0, 10.0]]]], 0.0) - cube = iris.cube.Cube( + co2_cube = iris.cube.Cube( co2_data, var_name='co2', standard_name='mole_fraction_of_carbon_dioxide_in_air', - units='1', + units='1e-6', dim_coords_and_dims=coord_spec, ) - return iris.cube.CubeList([cube]) + ps_cube = get_ps_cube() + return iris.cube.CubeList([co2_cube, ps_cube]) @pytest.fixture @@ -59,14 +83,15 @@ def unmasked_cubes(): [70.0, 5.0]], [[100.0, 50.0], [30.0, 1.0]]]]) - cube = iris.cube.Cube( + co2_cube = iris.cube.Cube( co2_data, var_name='co2', standard_name='mole_fraction_of_carbon_dioxide_in_air', - units='1e-1', + units='1e-8', dim_coords_and_dims=coord_spec, ) - return iris.cube.CubeList([cube]) + ps_cube = get_ps_cube() + return iris.cube.CubeList([co2_cube, ps_cube]) def test_co2_calculate_masked_cubes(masked_cubes): @@ -75,11 +100,16 @@ def test_co2_calculate_masked_cubes(masked_cubes): out_cube = derived_var.calculate(masked_cubes) assert not np.ma.is_masked(out_cube.data) np.testing.assert_allclose(out_cube.data, - [[[170.0, 100.0], + [[[180.0, 50.0], [80.0, 10.0]]]) - assert out_cube.units == 'mol mol-1' - np.testing.assert_allclose(out_cube.coord('air_pressure').points, - 123456.0) + assert out_cube.units == '1e-6' + plev_coord = out_cube.coord('air_pressure') + assert plev_coord.var_name == 'plev' + assert plev_coord.standard_name == 'air_pressure' + assert plev_coord.long_name == 'pressure' + assert plev_coord.units == 'Pa' + np.testing.assert_allclose(plev_coord.points, + [[[105000.0, 50000.0], [95000.0, 60000.0]]]) def test_co2_calculate_unmasked_cubes(unmasked_cubes): @@ -88,8 +118,13 @@ def test_co2_calculate_unmasked_cubes(unmasked_cubes): out_cube = derived_var.calculate(unmasked_cubes) assert not np.ma.is_masked(out_cube.data) np.testing.assert_allclose(out_cube.data, - [[[20.0, 10.0], - [8.0, 0.9]]]) - assert out_cube.units == 'mol mol-1' - np.testing.assert_allclose(out_cube.coord('air_pressure').points, - 123456.0) + [[[2.25, 0.50], + [0.75, 0.02]]]) + assert out_cube.units == '1e-6' + plev_coord = out_cube.coord('air_pressure') + assert plev_coord.var_name == 'plev' + assert plev_coord.standard_name == 'air_pressure' + assert plev_coord.long_name == 'pressure' + assert plev_coord.units == 'Pa' + np.testing.assert_allclose(plev_coord.points, + [[[105000.0, 50000.0], [95000.0, 60000.0]]])