Merge pull request #62 from jbusecke/cloud_tests_gh_actions

Cloud tests gh actions
jbusecke · Dec 17, 2020 · c625c65 · c625c65
2 parents 1d57184 + dbcc1f3
commit c625c65
Show file tree

Hide file tree

Showing 12 changed files with 412 additions and 208 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -82,3 +82,33 @@ jobs:
       shell: bash -l {0}
       run: |
         pytest -n auto --ignore=cmip6_preprocessing/tests/test_preprocessing_cloud.py
+        
+  cloud-tests:
+    name: Build (cloud-data-tests)
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Cache conda
+      uses: actions/cache@v1
+      env:
+        # Increase this value to reset cache if ci/environment-upstream-dev.yml has not changed
+        CACHE_NUMBER: 0
+      with:
+        path: ~/conda_pkgs_dir
+        key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/environment-upstream-dev.yml') }}
+    - uses: conda-incubator/setup-miniconda@v2
+      with:
+        activate-environment: test_env_cmip6_preprocessing # Defined in ci/environment-upstream-dev.yml
+        auto-update-conda: false
+        python-version: 3.8
+        environment-file: ci/environment-cloud-test.yml
+        use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+    - name: Set up conda environment
+      shell: bash -l {0}
+      run: |
+        python -m pip install -e .
+        conda list
+    - name: Run Tests
+      shell: bash -l {0}
+      run: |
+        pytest -v -n auto --maxfail 100 --reruns 0 --reruns-delay 1
diff --git a/ci/environment-cloud-test.yml b/ci/environment-cloud-test.yml
@@ -0,0 +1,22 @@
+name: test_env_cmip6_preprocessing
+channels:
+  - conda-forge
+dependencies:
+
+  - python=3.8
+  - xarray
+  - numpy
+  - pandas
+  - intake-esm==2020.08.15 #temporary solution see https://github.com/intake/intake-esm/issues/305
+  - gcsfs
+  - zarr
+  - xgcm
+  - pyproj
+  - matplotlib
+  - regionmask # this will fail until the current version is released on conda
+  - black
+  - pytest-cov
+  - pytest-xdist
+  - pytest-rerunfailures
+  - codecov
+  - cftime
diff --git a/ci/environment-py3.6.yml b/ci/environment-py3.6.yml
@@ -6,10 +6,10 @@ dependencies:
   - xarray
   - numpy
   - pandas
-  - intake-esm
   - xgcm
   - pyproj
   - matplotlib
+  - cftime
   - pip
   - pip:
     - codecov

diff --git a/ci/environment-py3.7.yml b/ci/environment-py3.7.yml
@@ -6,8 +6,6 @@ dependencies:
   - xarray
   - numpy
   - pandas
-  - intake-esm
-  - gcsfs
   - zarr
   - xgcm
   - pyproj
@@ -17,3 +15,4 @@ dependencies:
   - pytest-cov
   - pytest-xdist
   - codecov
+  - cftime
diff --git a/ci/environment-py3.8.yml b/ci/environment-py3.8.yml
@@ -6,8 +6,6 @@ dependencies:
   - xarray
   - numpy
   - pandas
-  - intake-esm
-  - gcsfs
   - zarr
   - xgcm
   - pyproj
@@ -17,3 +15,4 @@ dependencies:
   - pytest-cov
   - pytest-xdist
   - codecov
+  - cftime
diff --git a/ci/environment-upstream-dev.yml b/ci/environment-upstream-dev.yml
@@ -5,19 +5,17 @@ dependencies:
   - python=3.8
   - pyproj
   - matplotlib
-  - intake
   - cartopy
-  - gcsfs # needed for google cloud storage
-  - zarr # needed for google cloud storage tests
   - codecov
   - pytest-cov
   - black
   - numpy
+  - cftime
+  - dask
   - pip
   - pip:
     - pytest-xdist
     - git+https://github.com/mathause/regionmask.git
     - git+https://github.com/pydata/xarray.git
     - git+https://github.com/pandas-dev/pandas.git
-    - git+https://github.com/NCAR/intake-esm.git
     - git+https://github.com/xgcm/xgcm.git
diff --git a/ci/environment-upstream-master.yml b/ci/environment-upstream-master.yml
diff --git a/cmip6_preprocessing/tests/cloud_test_utils.py b/cmip6_preprocessing/tests/cloud_test_utils.py
@@ -0,0 +1,110 @@
+import pytest
+import contextlib
+import xarray as xr
+import numpy as np
+import intake
+import fsspec
+import itertools
+from cmip6_preprocessing.preprocessing import combined_preprocessing
+from cmip6_preprocessing.grids import combine_staggered_grid
+
+pytest.importorskip("gcsfs")
+
+
+def col():
+    return intake.open_esm_datastore(
+        "https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json"
+    )
+
+
+def diagnose_doubles(data):
+    """displays non-unique entries in data"""
+    _, idx = np.unique(data, return_index=True)
+    missing = np.array([i for i in np.arange(len(data)) if i not in idx])
+    if len(missing) > 0:
+        missing_values = data[missing]
+        print(f"Missing values Indicies[{missing}]/ Values[{missing_values}]")
+
+
+def xfail_wrapper(specs, fail_specs):
+    # fail out if there is a fail spec that is not in the list
+    # unknown_fail_specs = [fail for fail in fail_specs if fail not in specs]
+    # if len(unknown_fail_specs) > 0:
+    #     raise ValueError(
+    #         f"Found fail specs that are not part of the testing {unknown_fail_specs}"
+    #     )
+    wrapped_specs = []
+    for spec in specs:
+        if spec in fail_specs:
+            wrapped_specs.append(
+                pytest.param(*spec, marks=pytest.mark.xfail(strict=True))
+            )
+        else:
+            wrapped_specs.append(spec)
+    return wrapped_specs
+
+
+def data(source_id, variable_id, experiment_id, grid_label, use_intake_esm):
+    zarr_kwargs = {
+        "consolidated": True,
+        "decode_times": False,
+        # "decode_times": True,
+        # "use_cftime": True,
+    }
+
+    cat = col().search(
+        source_id=source_id,
+        experiment_id=experiment_id,
+        variable_id=variable_id,
+        # member_id="r1i1p1f1",
+        table_id="Omon",
+        grid_label=grid_label,
+    )
+
+    if len(cat.df["zstore"]) > 0:
+        if use_intake_esm:
+            ddict = cat.to_dataset_dict(
+                zarr_kwargs=zarr_kwargs,
+                preprocess=combined_preprocessing,
+                storage_options={"token": "anon"},
+            )
+            _, ds = ddict.popitem()
+        else:
+            ##### debugging options
+            # @charlesbluca suggested this to make this work in GHA
+            # https://github.com/jbusecke/cmip6_preprocessing/pull/62#issuecomment-741928365
+            mm = fsspec.get_mapper(
+                cat.df["zstore"][0]
+            )  # think you can pass in storage options here as well?
+            ds_raw = xr.open_zarr(mm, **zarr_kwargs)
+            print(ds_raw)
+            ds = combined_preprocessing(ds_raw)
+    else:
+        ds = None
+
+    return ds, cat
+
+
+def all_models():
+    df = col().df
+    all_models = df["source_id"].unique()
+    all_models = tuple(np.sort(all_models))
+    return all_models
+
+
+def full_specs():
+    grid_labels = tuple(["gn", "gr"])
+    experiment_ids = tuple(["historical", "ssp585"])
+    variable_ids = tuple(["thetao", "o2"])
+
+    test_specs = list(
+        itertools.product(
+            *[
+                all_models(),
+                variable_ids,
+                experiment_ids,
+                grid_labels,
+            ]
+        )
+    )
+    return test_specs
diff --git a/cmip6_preprocessing/tests/test_preprocessing.py b/cmip6_preprocessing/tests/test_preprocessing.py
@@ -1,5 +1,4 @@
 import pytest
-import intake
 import pandas as pd
 import numpy as np
 import xarray as xr
@@ -20,15 +19,6 @@
     combined_preprocessing,
 )
 
-# get all available ocean models from the cloud.
-url = "https://storage.googleapis.com/cmip6/pangeo-cmip6.csv"
-df = pd.read_csv(url)
-df_ocean = df[(df.table_id == "Omon") + (df.table_id == "Oyr")]
-ocean_models = df_ocean.source_id.unique()
-
-# TODO: Need to adapt atmos only models
-all_models = ocean_models
-
 
 def create_test_ds(xname, yname, zname, xlen, ylen, zlen):
     x = np.linspace(0, 359, xlen)
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,10 +6,10 @@ dependencies: @@
       - xarray
       - numpy
       - pandas
-      - intake-esm
       - xgcm
       - pyproj
       - matplotlib
+      - cftime
       - pip
       - pip:
         - codecov
@@ Expand Down @@