diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 243abede..58846c3f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -82,3 +82,33 @@ jobs: shell: bash -l {0} run: | pytest -n auto --ignore=cmip6_preprocessing/tests/test_preprocessing_cloud.py + + cloud-tests: + name: Build (cloud-data-tests) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Cache conda + uses: actions/cache@v1 + env: + # Increase this value to reset cache if ci/environment-upstream-dev.yml has not changed + CACHE_NUMBER: 0 + with: + path: ~/conda_pkgs_dir + key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/environment-upstream-dev.yml') }} + - uses: conda-incubator/setup-miniconda@v2 + with: + activate-environment: test_env_cmip6_preprocessing # Defined in ci/environment-upstream-dev.yml + auto-update-conda: false + python-version: 3.8 + environment-file: ci/environment-cloud-test.yml + use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly! + - name: Set up conda environment + shell: bash -l {0} + run: | + python -m pip install -e . + conda list + - name: Run Tests + shell: bash -l {0} + run: | + pytest -v -n auto --maxfail 100 --reruns 0 --reruns-delay 1 diff --git a/ci/environment-cloud-test.yml b/ci/environment-cloud-test.yml new file mode 100644 index 00000000..3c980536 --- /dev/null +++ b/ci/environment-cloud-test.yml @@ -0,0 +1,22 @@ +name: test_env_cmip6_preprocessing +channels: + - conda-forge +dependencies: + + - python=3.8 + - xarray + - numpy + - pandas + - intake-esm==2020.08.15 #temporary solution see https://github.com/intake/intake-esm/issues/305 + - gcsfs + - zarr + - xgcm + - pyproj + - matplotlib + - regionmask # this will fail until the current version is released on conda + - black + - pytest-cov + - pytest-xdist + - pytest-rerunfailures + - codecov + - cftime diff --git a/ci/environment-py3.6.yml b/ci/environment-py3.6.yml index a40ee234..7c4b867f 100644 --- a/ci/environment-py3.6.yml +++ b/ci/environment-py3.6.yml @@ -6,10 +6,10 @@ dependencies: - xarray - numpy - pandas - - intake-esm - xgcm - pyproj - matplotlib + - cftime - pip - pip: - codecov diff --git a/ci/environment-py3.7.yml b/ci/environment-py3.7.yml index 5bddc760..bd27ca67 100644 --- a/ci/environment-py3.7.yml +++ b/ci/environment-py3.7.yml @@ -6,8 +6,6 @@ dependencies: - xarray - numpy - pandas - - intake-esm - - gcsfs - zarr - xgcm - pyproj @@ -17,3 +15,4 @@ dependencies: - pytest-cov - pytest-xdist - codecov + - cftime diff --git a/ci/environment-py3.8.yml b/ci/environment-py3.8.yml index 822211f2..84617840 100644 --- a/ci/environment-py3.8.yml +++ b/ci/environment-py3.8.yml @@ -6,8 +6,6 @@ dependencies: - xarray - numpy - pandas - - intake-esm - - gcsfs - zarr - xgcm - pyproj @@ -17,3 +15,4 @@ dependencies: - pytest-cov - pytest-xdist - codecov + - cftime diff --git a/ci/environment-upstream-dev.yml b/ci/environment-upstream-dev.yml index e1b75924..60e17093 100644 --- a/ci/environment-upstream-dev.yml +++ b/ci/environment-upstream-dev.yml @@ -5,19 +5,17 @@ dependencies: - python=3.8 - pyproj - matplotlib - - intake - cartopy - - gcsfs # needed for google cloud storage - - zarr # needed for google cloud storage tests - codecov - pytest-cov - black - numpy + - cftime + - dask - pip - pip: - pytest-xdist - git+https://github.com/mathause/regionmask.git - git+https://github.com/pydata/xarray.git - git+https://github.com/pandas-dev/pandas.git - - git+https://github.com/NCAR/intake-esm.git - git+https://github.com/xgcm/xgcm.git diff --git a/ci/environment-upstream-master.yml b/ci/environment-upstream-master.yml deleted file mode 100644 index 2189f77c..00000000 --- a/ci/environment-upstream-master.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: test_env_cmip6_preprocessing -channels: - - conda-forge -dependencies: - - python=3.7 - - pyproj - - matplotlib - - intake - - cartopy - - gcsfs # needed for google cloud storage - - zarr # needed for google cloud storage tests - - codecov - - pytest-cov - - black - - numpy - - pip - - pip: - - pytest-xdist - - git+https://github.com/mathause/regionmask.git - - git+https://github.com/pydata/xarray.git - - git+https://github.com/pandas-dev/pandas.git - - git+https://github.com/NCAR/intake-esm.git - - git+https://github.com/xgcm/xgcm.git diff --git a/cmip6_preprocessing/tests/cloud_test_utils.py b/cmip6_preprocessing/tests/cloud_test_utils.py new file mode 100644 index 00000000..ed53e08d --- /dev/null +++ b/cmip6_preprocessing/tests/cloud_test_utils.py @@ -0,0 +1,110 @@ +import pytest +import contextlib +import xarray as xr +import numpy as np +import intake +import fsspec +import itertools +from cmip6_preprocessing.preprocessing import combined_preprocessing +from cmip6_preprocessing.grids import combine_staggered_grid + +pytest.importorskip("gcsfs") + + +def col(): + return intake.open_esm_datastore( + "https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json" + ) + + +def diagnose_doubles(data): + """displays non-unique entries in data""" + _, idx = np.unique(data, return_index=True) + missing = np.array([i for i in np.arange(len(data)) if i not in idx]) + if len(missing) > 0: + missing_values = data[missing] + print(f"Missing values Indicies[{missing}]/ Values[{missing_values}]") + + +def xfail_wrapper(specs, fail_specs): + # fail out if there is a fail spec that is not in the list + # unknown_fail_specs = [fail for fail in fail_specs if fail not in specs] + # if len(unknown_fail_specs) > 0: + # raise ValueError( + # f"Found fail specs that are not part of the testing {unknown_fail_specs}" + # ) + wrapped_specs = [] + for spec in specs: + if spec in fail_specs: + wrapped_specs.append( + pytest.param(*spec, marks=pytest.mark.xfail(strict=True)) + ) + else: + wrapped_specs.append(spec) + return wrapped_specs + + +def data(source_id, variable_id, experiment_id, grid_label, use_intake_esm): + zarr_kwargs = { + "consolidated": True, + "decode_times": False, + # "decode_times": True, + # "use_cftime": True, + } + + cat = col().search( + source_id=source_id, + experiment_id=experiment_id, + variable_id=variable_id, + # member_id="r1i1p1f1", + table_id="Omon", + grid_label=grid_label, + ) + + if len(cat.df["zstore"]) > 0: + if use_intake_esm: + ddict = cat.to_dataset_dict( + zarr_kwargs=zarr_kwargs, + preprocess=combined_preprocessing, + storage_options={"token": "anon"}, + ) + _, ds = ddict.popitem() + else: + ##### debugging options + # @charlesbluca suggested this to make this work in GHA + # https://github.com/jbusecke/cmip6_preprocessing/pull/62#issuecomment-741928365 + mm = fsspec.get_mapper( + cat.df["zstore"][0] + ) # think you can pass in storage options here as well? + ds_raw = xr.open_zarr(mm, **zarr_kwargs) + print(ds_raw) + ds = combined_preprocessing(ds_raw) + else: + ds = None + + return ds, cat + + +def all_models(): + df = col().df + all_models = df["source_id"].unique() + all_models = tuple(np.sort(all_models)) + return all_models + + +def full_specs(): + grid_labels = tuple(["gn", "gr"]) + experiment_ids = tuple(["historical", "ssp585"]) + variable_ids = tuple(["thetao", "o2"]) + + test_specs = list( + itertools.product( + *[ + all_models(), + variable_ids, + experiment_ids, + grid_labels, + ] + ) + ) + return test_specs diff --git a/cmip6_preprocessing/tests/test_preprocessing.py b/cmip6_preprocessing/tests/test_preprocessing.py index 55d8666d..0b429cba 100644 --- a/cmip6_preprocessing/tests/test_preprocessing.py +++ b/cmip6_preprocessing/tests/test_preprocessing.py @@ -1,5 +1,4 @@ import pytest -import intake import pandas as pd import numpy as np import xarray as xr @@ -20,15 +19,6 @@ combined_preprocessing, ) -# get all available ocean models from the cloud. -url = "https://storage.googleapis.com/cmip6/pangeo-cmip6.csv" -df = pd.read_csv(url) -df_ocean = df[(df.table_id == "Omon") + (df.table_id == "Oyr")] -ocean_models = df_ocean.source_id.unique() - -# TODO: Need to adapt atmos only models -all_models = ocean_models - def create_test_ds(xname, yname, zname, xlen, ylen, zlen): x = np.linspace(0, 359, xlen) diff --git a/cmip6_preprocessing/tests/test_preprocessing_cloud.py b/cmip6_preprocessing/tests/test_preprocessing_cloud.py index c70cc0a9..af4ded99 100644 --- a/cmip6_preprocessing/tests/test_preprocessing_cloud.py +++ b/cmip6_preprocessing/tests/test_preprocessing_cloud.py @@ -1,171 +1,260 @@ -# This module tests data directly from the pangeo google cloud storage +# This module tests data directly from the pangeo google cloud storage. +# Tests are meant to be more high level and also serve to document known problems (see skip statements). import pytest +import xarray as xr import numpy as np -import intake +from cmip6_preprocessing.tests.cloud_test_utils import ( + full_specs, + xfail_wrapper, + all_models, + data, + diagnose_doubles, +) from cmip6_preprocessing.preprocessing import combined_preprocessing from cmip6_preprocessing.grids import combine_staggered_grid pytest.importorskip("gcsfs") +print(f"\n\n\n\n$$$$$$$ All available models: {all_models()}$$$$$$$\n\n\n\n") -@pytest.fixture -def col(): - return intake.open_esm_datastore( - "https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json" - ) - - -def all_models(): - col = intake.open_esm_datastore( - "https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json" - ) - df = col.df - all_models = df["source_id"].unique() - - # TODO: finally get IPSL model to run and release this - # TODO: Allow the AWI regridded model output for the preprocessing module - return [m for m in all_models if (("IPSL" not in m) & ("AWI" not in m))] - # return [m for m in all_models if "MIROC" in m] - - -def _diagnose_doubles(data): - """displays non-unique entries in data""" - _, idx = np.unique(data, return_index=True) - missing = np.array([i for i in np.arange(len(data)) if i not in idx]) - if len(missing) > 0: - missing_values = data[missing] - print(f"Missing values Indicies[{missing}]/ Values[{missing_values}]") - - -# These are too many tests. Perhaps I could load all the data first and then -# test each dict item? - - -@pytest.mark.parametrize("grid_label", ["gr", "gn"]) -@pytest.mark.parametrize("experiment_id", ["historical"]) -@pytest.mark.parametrize("variable_id", ["o2", "thetao"]) -@pytest.mark.parametrize("source_id", all_models()) -def test_preprocessing_combined(col, source_id, experiment_id, grid_label, variable_id): - cat = col.search( - source_id=source_id, - experiment_id=experiment_id, - variable_id=variable_id, - # member_id="r1i1p1f1", - table_id="Omon", - grid_label=grid_label, - ) - - # ddict_raw = cat.to_dataset_dict( - # zarr_kwargs={"consolidated": True, "decode_times": False}, - # preprocess=None, - # storage_options={"token": "anon"}, - # ) - # if len(ddict_raw) > 0: - # _, ds_raw = ddict_raw.popitem() - # print(ds_raw) - - ddict = cat.to_dataset_dict( - zarr_kwargs={"consolidated": True, "decode_times": False}, - preprocess=combined_preprocessing, - storage_options={"token": "anon"}, - ) - - if len(ddict) > 0: - - _, ds = ddict.popitem() - - if source_id == "CESM2-FV2": - pytest.skip("And `` has nans in the lon/lat") - - ##### Check for dim duplicates - # check all dims for duplicates - # for di in ds.dims: - # for now only test a subset of the dims. TODO: Add the bounds once they - # are cleaned up. - for di in ["x", "y", "lev", "time"]: - if di in ds.dims: - _diagnose_doubles(ds[di].load().data) - assert len(ds[di]) == len(np.unique(ds[di])) + +# manually combine all pytest parameters, so that I have very fine grained control over +# which combination of parameters is expected to fail. + + +########################### Most basic test ######################### +expected_failures = [ + ("AWI-ESM-1-1-LR", "thetao", "historical", "gn"), + ("AWI-ESM-1-1-LR", "thetao", "ssp585", "gn"), + ("AWI-CM-1-1-MR", "thetao", "historical", "gn"), + ("AWI-CM-1-1-MR", "thetao", "ssp585", "gn"), + # TODO: would be nice to have a "*" matching... + ("CESM2-FV2", "thetao", "historical", "gn"), + ("CESM2-FV2", "thetao", "ssp585", "gn"), +] + + +@pytest.mark.parametrize( + "source_id,variable_id,experiment_id,grid_label", + xfail_wrapper(full_specs(), expected_failures), +) +def test_check_dim_coord_values_wo_intake( + source_id, variable_id, experiment_id, grid_label +): + # there must be a better way to build this at the class level and then tear it down again + # I can probably get this done with fixtures, but I dont know how atm + ds, cat = data(source_id, variable_id, experiment_id, grid_label, False) + + if ds is None: + pytest.skip( + f"No data found for {source_id}|{variable_id}|{experiment_id}|{grid_label}" + ) + + ##### Check for dim duplicates + # check all dims for duplicates + # for di in ds.dims: + # for now only test a subset of the dims. TODO: Add the bounds once they + # are cleaned up. + for di in ["x", "y", "lev", "time"]: + if di in ds.dims: + diagnose_doubles(ds[di].load().data) + assert len(ds[di]) == len(np.unique(ds[di])) + if di != "time": # these tests do not make sense for decoded time assert ~np.all(np.isnan(ds[di])) assert np.all(ds[di].diff(di) >= 0) - assert ds.lon.min().load() >= 0 - assert ds.lon.max().load() <= 360 - if "lon_bounds" in ds.variables: - assert ds.lon_bounds.min().load() >= 0 - assert ds.lon_bounds.max().load() <= 360 - assert ds.lat.min().load() >= -90 - assert ds.lat.max().load() <= 90 - # make sure lon and lat are 2d - assert len(ds.lon.shape) == 2 - assert len(ds.lat.shape) == 2 - - if "vertex" in ds.dims: - np.testing.assert_allclose(ds.vertex.data, np.arange(4)) - - if source_id == "FGOALS-f3-L": - pytest.skip("`FGOALS-f3-L` does not come with lon/lat bounds") - - ####Check for existing bounds and verticies - for co in ["lon_bounds", "lat_bounds", "lon_verticies", "lat_verticies"]: - assert co in ds.coords - # make sure that all other dims are eliminated from the bounds. - assert (set(ds[co].dims) - set(["bnds", "vertex"])) == set(["x", "y"]) - - #### Check the order of the vertex - # Ill only check these south of the Arctic for now. Up there - # things are still weird. - - test_ds = ds.sel(y=slice(-40, 40)) - - vertex_lon_diff1 = test_ds.lon_verticies.isel( - vertex=3 - ) - test_ds.lon_verticies.isel(vertex=0) - vertex_lon_diff2 = test_ds.lon_verticies.isel( - vertex=2 - ) - test_ds.lon_verticies.isel(vertex=1) - vertex_lat_diff1 = test_ds.lat_verticies.isel( - vertex=1 - ) - test_ds.lat_verticies.isel(vertex=0) - vertex_lat_diff2 = test_ds.lat_verticies.isel( - vertex=2 - ) - test_ds.lat_verticies.isel(vertex=3) - for vertex_diff in [vertex_lon_diff1, vertex_lon_diff2]: - assert (vertex_diff <= 0).sum() <= (3 * len(vertex_diff.y)) - # allowing for a few rows to be negative - - for vertex_diff in [vertex_lat_diff1, vertex_lat_diff2]: - assert (vertex_diff <= 0).sum() <= (5 * len(vertex_diff.x)) - # allowing for a few rows to be negative - # This is just to make sure that not the majority of values is negative or zero. - - # Same for the bounds: - lon_diffs = test_ds.lon_bounds.diff("bnds") - lat_diffs = test_ds.lat_bounds.diff("bnds") - - assert (lon_diffs <= 0).sum() <= (5 * len(lon_diffs.y)) - assert (lat_diffs <= 0).sum() <= (5 * len(lat_diffs.y)) - - # Test the staggered grid creation - - print(ds) - # This is just a rudimentary test to see if the creation works - staggered_grid, ds_staggered = combine_staggered_grid( - ds, recalculate_metrics=True + assert ds.lon.min().load() >= 0 + assert ds.lon.max().load() <= 360 + if "lon_bounds" in ds.variables: + assert ds.lon_bounds.min().load() >= 0 + assert ds.lon_bounds.max().load() <= 360 + assert ds.lat.min().load() >= -90 + assert ds.lat.max().load() <= 90 + # make sure lon and lat are 2d + assert len(ds.lon.shape) == 2 + assert len(ds.lat.shape) == 2 + + +expected_failures = [ + ("AWI-ESM-1-1-LR", "thetao", "historical", "gn"), + ("AWI-ESM-1-1-LR", "thetao", "ssp585", "gn"), + ("AWI-CM-1-1-MR", "thetao", "historical", "gn"), + ("AWI-CM-1-1-MR", "thetao", "ssp585", "gn"), + # TODO: would be nice to have a "*" matching... + ("CESM2-FV2", "thetao", "historical", "gn"), + ("CESM2-FV2", "thetao", "ssp585", "gn"), + ( + "IPSL-CM6A-LR", + "thetao", + "historical", + "gn", + ), # IPSL has an issue with `lev` dims concatting + ("IPSL-CM6A-LR", "o2", "historical", "gn"), + ("NorESM2-MM", "thetao", "historical", "gn"), + ("NorESM2-MM", "thetao", "historical", "gr"), +] + + +@pytest.mark.parametrize( + "source_id,variable_id,experiment_id,grid_label", + xfail_wrapper(full_specs(), expected_failures), +) +def test_check_dim_coord_values(source_id, variable_id, experiment_id, grid_label): + # there must be a better way to build this at the class level and then tear it down again + # I can probably get this done with fixtures, but I dont know how atm + ds, cat = data(source_id, variable_id, experiment_id, grid_label, True) + + if ds is None: + pytest.skip( + f"No data found for {source_id}|{variable_id}|{experiment_id}|{grid_label}" + ) + + ##### Check for dim duplicates + # check all dims for duplicates + # for di in ds.dims: + # for now only test a subset of the dims. TODO: Add the bounds once they + # are cleaned up. + for di in ["x", "y", "lev", "time"]: + if di in ds.dims: + diagnose_doubles(ds[di].load().data) + assert len(ds[di]) == len(np.unique(ds[di])) + if di != "time": # these tests do not make sense for decoded time + assert ~np.all(np.isnan(ds[di])) + assert np.all(ds[di].diff(di) >= 0) + + assert ds.lon.min().load() >= 0 + assert ds.lon.max().load() <= 360 + if "lon_bounds" in ds.variables: + assert ds.lon_bounds.min().load() >= 0 + assert ds.lon_bounds.max().load() <= 360 + assert ds.lat.min().load() >= -90 + assert ds.lat.max().load() <= 90 + # make sure lon and lat are 2d + assert len(ds.lon.shape) == 2 + assert len(ds.lat.shape) == 2 + + +############################### Specific Bound Coords Test ############################### +expected_failures = [ + ("AWI-ESM-1-1-LR", "thetao", "historical", "gn"), + ("AWI-ESM-1-1-MR", "thetao", "historical", "gn"), + ("AWI-ESM-1-1-MR", "thetao", "ssp585", "gn"), + ("AWI-CM-1-1-MR", "thetao", "historical", "gn"), + ("AWI-CM-1-1-MR", "thetao", "ssp585", "gn"), + ("CESM2-FV2", "thetao", "historical", "gn"), + ("FGOALS-f3-L", "thetao", "historical", "gn"), + ("FGOALS-f3-L", "thetao", "ssp585", "gn"), + ("FGOALS-g3", "thetao", "ssp585", "gn"), + ("NorESM2-MM", "thetao", "historical", "gn"), + ("NorESM2-MM", "thetao", "historical", "gr"), + ("IPSL-CM6A-LR", "thetao", "historical", "gn"), + ("IPSL-CM6A-LR", "o2", "historical", "gn"), +] + + +@pytest.mark.parametrize( + "source_id,variable_id,experiment_id,grid_label", + xfail_wrapper(full_specs(), expected_failures), +) +def test_check_bounds_verticies(source_id, variable_id, experiment_id, grid_label): + + ds, cat = data(source_id, variable_id, experiment_id, grid_label, True) + + if ds is None: + pytest.skip( + f"No data found for {source_id}|{variable_id}|{experiment_id}|{grid_label}" + ) + + if "vertex" in ds.dims: + np.testing.assert_allclose(ds.vertex.data, np.arange(4)) + + ####Check for existing bounds and verticies + for co in ["lon_bounds", "lat_bounds", "lon_verticies", "lat_verticies"]: + assert co in ds.coords + # make sure that all other dims are eliminated from the bounds. + assert (set(ds[co].dims) - set(["bnds", "vertex"])) == set(["x", "y"]) + + #### Check the order of the vertex + # Ill only check these south of the Arctic for now. Up there + # things are still weird. + + test_ds = ds.sel(y=slice(-40, 40)) + + vertex_lon_diff1 = test_ds.lon_verticies.isel( + vertex=3 + ) - test_ds.lon_verticies.isel(vertex=0) + vertex_lon_diff2 = test_ds.lon_verticies.isel( + vertex=2 + ) - test_ds.lon_verticies.isel(vertex=1) + vertex_lat_diff1 = test_ds.lat_verticies.isel( + vertex=1 + ) - test_ds.lat_verticies.isel(vertex=0) + vertex_lat_diff2 = test_ds.lat_verticies.isel( + vertex=2 + ) - test_ds.lat_verticies.isel(vertex=3) + for vertex_diff in [vertex_lon_diff1, vertex_lon_diff2]: + assert (vertex_diff <= 0).sum() <= (3 * len(vertex_diff.y)) + # allowing for a few rows to be negative + + for vertex_diff in [vertex_lat_diff1, vertex_lat_diff2]: + assert (vertex_diff <= 0).sum() <= (5 * len(vertex_diff.x)) + # allowing for a few rows to be negative + # This is just to make sure that not the majority of values is negative or zero. + + # Same for the bounds: + lon_diffs = test_ds.lon_bounds.diff("bnds") + lat_diffs = test_ds.lat_bounds.diff("bnds") + + assert (lon_diffs <= 0).sum() <= (5 * len(lon_diffs.y)) + assert (lat_diffs <= 0).sum() <= (5 * len(lat_diffs.y)) + + +################################# xgcm grid specific tests ######################################## +expected_failures = [ + ("AWI-ESM-1-1-LR", "thetao", "historical", "gn"), + ("AWI-ESM-1-1-MR", "thetao", "historical", "gn"), + ("AWI-ESM-1-1-MR", "thetao", "ssp585", "gn"), + ("AWI-CM-1-1-MR", "thetao", "historical", "gn"), + ("AWI-CM-1-1-MR", "thetao", "ssp585", "gn"), + ("CESM2-FV2", "thetao", "historical", "gn"), + ("CMCC-CM2-SR5", "thetao", "historical", "gn"), + ("CMCC-CM2-SR5", "thetao", "ssp585", "gn"), + ("FGOALS-f3-L", "thetao", "historical", "gn"), + ("FGOALS-f3-L", "thetao", "ssp585", "gn"), + ("FGOALS-g3", "thetao", "ssp585", "gn"), + ("MPI-ESM-1-2-HAM", "thetao", "historical", "gn"), + ("MPI-ESM-1-2-HAM", "o2", "historical", "gn"), + ("NorESM2-MM", "thetao", "historical", "gn"), + ("NorESM2-MM", "thetao", "historical", "gr"), + ("IPSL-CM6A-LR", "thetao", "historical", "gn"), + ("IPSL-CM6A-LR", "o2", "historical", "gn"), +] + + +@pytest.mark.parametrize( + "source_id,variable_id,experiment_id,grid_label", + xfail_wrapper(full_specs(), expected_failures), +) +def test_check_grid(source_id, variable_id, experiment_id, grid_label): + + ds, cat = data(source_id, variable_id, experiment_id, grid_label, True) + + if ds is None: + pytest.skip( + f"No data found for {source_id}|{variable_id}|{experiment_id}|{grid_label}" ) - if source_id == "MPI-ESM-1-2-HAM" or source_id == "MPI-ESM1-2-LR": - pytest.skip("No available grid shift info") + # This is just a rudimentary test to see if the creation works + staggered_grid, ds_staggered = combine_staggered_grid(ds, recalculate_metrics=True) - assert ds_staggered is not None - # - if "lev" in ds_staggered.dims: - assert "bnds" in ds_staggered.lev_bounds.dims + print(ds_staggered) - for axis in ["X", "Y"]: - for metric in ["_t", "_gx", "_gy", "_gxgy"]: - assert f"d{axis.lower()}{metric}" in list(ds_staggered.coords) - # TODO: Include actual test to combine variables + assert ds_staggered is not None + # + if "lev" in ds_staggered.dims: + assert "bnds" in ds_staggered.lev_bounds.dims - else: - pytest.xfail("Model data not available") + for axis in ["X", "Y"]: + for metric in ["_t", "_gx", "_gy", "_gxgy"]: + assert f"d{axis.lower()}{metric}" in list(ds_staggered.coords) + # TODO: Include actual test to combine variables diff --git a/cmip6_preprocessing/tests/test_regionmask.py b/cmip6_preprocessing/tests/test_regionmask.py index 398abd1d..f05713b8 100644 --- a/cmip6_preprocessing/tests/test_regionmask.py +++ b/cmip6_preprocessing/tests/test_regionmask.py @@ -1,5 +1,4 @@ import pytest -import intake import numpy as np import xarray as xr from cmip6_preprocessing.preprocessing import combined_preprocessing @@ -11,15 +10,6 @@ def test_merge_mask(): - # load test dataset in the cloud (this does not work from within the CI...will check that out later. ) - # # import example cloud datasets - # col_url = "https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json" - # col = intake.open_esm_datastore(col_url) - # cat = col.search(source_id=['GFDL-ESM4'],experiment_id='historical', variable_id='thetao') - # data_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times': False}, - # preprocess=combined_preprocessing) - # ds = data_dict[list(data_dict.keys())[0]] - x = np.linspace(0, 360, 720) y = np.linspace(-90, 90, 360) data = np.random.rand(len(x), len(y)) diff --git a/pytest.ini b/pytest.ini index 0c2a2b9c..d61d0296 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,2 +1,2 @@ [pytest] -# addopts = -n2 +xfail_strict=true