diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 243abede..58846c3f 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -82,3 +82,33 @@ jobs:
       shell: bash -l {0}
       run: |
         pytest -n auto --ignore=cmip6_preprocessing/tests/test_preprocessing_cloud.py
+        
+  cloud-tests:
+    name: Build (cloud-data-tests)
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Cache conda
+      uses: actions/cache@v1
+      env:
+        # Increase this value to reset cache if ci/environment-upstream-dev.yml has not changed
+        CACHE_NUMBER: 0
+      with:
+        path: ~/conda_pkgs_dir
+        key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/environment-upstream-dev.yml') }}
+    - uses: conda-incubator/setup-miniconda@v2
+      with:
+        activate-environment: test_env_cmip6_preprocessing # Defined in ci/environment-upstream-dev.yml
+        auto-update-conda: false
+        python-version: 3.8
+        environment-file: ci/environment-cloud-test.yml
+        use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+    - name: Set up conda environment
+      shell: bash -l {0}
+      run: |
+        python -m pip install -e .
+        conda list
+    - name: Run Tests
+      shell: bash -l {0}
+      run: |
+        pytest -v -n auto --maxfail 100 --reruns 0 --reruns-delay 1
diff --git a/ci/environment-cloud-test.yml b/ci/environment-cloud-test.yml
new file mode 100644
index 00000000..3c980536
--- /dev/null
+++ b/ci/environment-cloud-test.yml
@@ -0,0 +1,22 @@
+name: test_env_cmip6_preprocessing
+channels:
+  - conda-forge
+dependencies:
+  
+  - python=3.8
+  - xarray
+  - numpy
+  - pandas
+  - intake-esm==2020.08.15 #temporary solution see https://github.com/intake/intake-esm/issues/305
+  - gcsfs
+  - zarr
+  - xgcm
+  - pyproj
+  - matplotlib
+  - regionmask # this will fail until the current version is released on conda
+  - black
+  - pytest-cov
+  - pytest-xdist
+  - pytest-rerunfailures
+  - codecov
+  - cftime
diff --git a/ci/environment-py3.6.yml b/ci/environment-py3.6.yml
index a40ee234..7c4b867f 100644
--- a/ci/environment-py3.6.yml
+++ b/ci/environment-py3.6.yml
@@ -6,10 +6,10 @@ dependencies:
   - xarray
   - numpy
   - pandas
-  - intake-esm
   - xgcm
   - pyproj
   - matplotlib
+  - cftime
   - pip
   - pip:
     - codecov
diff --git a/ci/environment-py3.7.yml b/ci/environment-py3.7.yml
index 5bddc760..bd27ca67 100644
--- a/ci/environment-py3.7.yml
+++ b/ci/environment-py3.7.yml
@@ -6,8 +6,6 @@ dependencies:
   - xarray
   - numpy
   - pandas
-  - intake-esm
-  - gcsfs
   - zarr
   - xgcm
   - pyproj
@@ -17,3 +15,4 @@ dependencies:
   - pytest-cov
   - pytest-xdist
   - codecov
+  - cftime
diff --git a/ci/environment-py3.8.yml b/ci/environment-py3.8.yml
index 822211f2..84617840 100644
--- a/ci/environment-py3.8.yml
+++ b/ci/environment-py3.8.yml
@@ -6,8 +6,6 @@ dependencies:
   - xarray
   - numpy
   - pandas
-  - intake-esm
-  - gcsfs
   - zarr
   - xgcm
   - pyproj
@@ -17,3 +15,4 @@ dependencies:
   - pytest-cov
   - pytest-xdist
   - codecov
+  - cftime
diff --git a/ci/environment-upstream-dev.yml b/ci/environment-upstream-dev.yml
index e1b75924..60e17093 100644
--- a/ci/environment-upstream-dev.yml
+++ b/ci/environment-upstream-dev.yml
@@ -5,19 +5,17 @@ dependencies:
   - python=3.8
   - pyproj
   - matplotlib
-  - intake
   - cartopy
-  - gcsfs # needed for google cloud storage
-  - zarr # needed for google cloud storage tests
   - codecov
   - pytest-cov
   - black
   - numpy
+  - cftime
+  - dask
   - pip
   - pip:
     - pytest-xdist
     - git+https://github.com/mathause/regionmask.git
     - git+https://github.com/pydata/xarray.git
     - git+https://github.com/pandas-dev/pandas.git
-    - git+https://github.com/NCAR/intake-esm.git
     - git+https://github.com/xgcm/xgcm.git
diff --git a/ci/environment-upstream-master.yml b/ci/environment-upstream-master.yml
deleted file mode 100644
index 2189f77c..00000000
--- a/ci/environment-upstream-master.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-name: test_env_cmip6_preprocessing
-channels:
-  - conda-forge
-dependencies:
-  - python=3.7
-  - pyproj
-  - matplotlib
-  - intake
-  - cartopy
-  - gcsfs # needed for google cloud storage
-  - zarr # needed for google cloud storage tests
-  - codecov
-  - pytest-cov
-  - black
-  - numpy
-  - pip
-  - pip:
-    - pytest-xdist
-    - git+https://github.com/mathause/regionmask.git
-    - git+https://github.com/pydata/xarray.git
-    - git+https://github.com/pandas-dev/pandas.git
-    - git+https://github.com/NCAR/intake-esm.git
-    - git+https://github.com/xgcm/xgcm.git
diff --git a/cmip6_preprocessing/tests/cloud_test_utils.py b/cmip6_preprocessing/tests/cloud_test_utils.py
new file mode 100644
index 00000000..ed53e08d
--- /dev/null
+++ b/cmip6_preprocessing/tests/cloud_test_utils.py
@@ -0,0 +1,110 @@
+import pytest
+import contextlib
+import xarray as xr
+import numpy as np
+import intake
+import fsspec
+import itertools
+from cmip6_preprocessing.preprocessing import combined_preprocessing
+from cmip6_preprocessing.grids import combine_staggered_grid
+
+pytest.importorskip("gcsfs")
+
+
+def col():
+    return intake.open_esm_datastore(
+        "https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json"
+    )
+
+
+def diagnose_doubles(data):
+    """displays non-unique entries in data"""
+    _, idx = np.unique(data, return_index=True)
+    missing = np.array([i for i in np.arange(len(data)) if i not in idx])
+    if len(missing) > 0:
+        missing_values = data[missing]
+        print(f"Missing values Indicies[{missing}]/ Values[{missing_values}]")
+
+
+def xfail_wrapper(specs, fail_specs):
+    # fail out if there is a fail spec that is not in the list
+    # unknown_fail_specs = [fail for fail in fail_specs if fail not in specs]
+    # if len(unknown_fail_specs) > 0:
+    #     raise ValueError(
+    #         f"Found fail specs that are not part of the testing {unknown_fail_specs}"
+    #     )
+    wrapped_specs = []
+    for spec in specs:
+        if spec in fail_specs:
+            wrapped_specs.append(
+                pytest.param(*spec, marks=pytest.mark.xfail(strict=True))
+            )
+        else:
+            wrapped_specs.append(spec)
+    return wrapped_specs
+
+
+def data(source_id, variable_id, experiment_id, grid_label, use_intake_esm):
+    zarr_kwargs = {
+        "consolidated": True,
+        "decode_times": False,
+        # "decode_times": True,
+        # "use_cftime": True,
+    }
+
+    cat = col().search(
+        source_id=source_id,
+        experiment_id=experiment_id,
+        variable_id=variable_id,
+        # member_id="r1i1p1f1",
+        table_id="Omon",
+        grid_label=grid_label,
+    )
+
+    if len(cat.df["zstore"]) > 0:
+        if use_intake_esm:
+            ddict = cat.to_dataset_dict(
+                zarr_kwargs=zarr_kwargs,
+                preprocess=combined_preprocessing,
+                storage_options={"token": "anon"},
+            )
+            _, ds = ddict.popitem()
+        else:
+            ##### debugging options
+            # @charlesbluca suggested this to make this work in GHA
+            # https://github.com/jbusecke/cmip6_preprocessing/pull/62#issuecomment-741928365
+            mm = fsspec.get_mapper(
+                cat.df["zstore"][0]
+            )  # think you can pass in storage options here as well?
+            ds_raw = xr.open_zarr(mm, **zarr_kwargs)
+            print(ds_raw)
+            ds = combined_preprocessing(ds_raw)
+    else:
+        ds = None
+
+    return ds, cat
+
+
+def all_models():
+    df = col().df
+    all_models = df["source_id"].unique()
+    all_models = tuple(np.sort(all_models))
+    return all_models
+
+
+def full_specs():
+    grid_labels = tuple(["gn", "gr"])
+    experiment_ids = tuple(["historical", "ssp585"])
+    variable_ids = tuple(["thetao", "o2"])
+
+    test_specs = list(
+        itertools.product(
+            *[
+                all_models(),
+                variable_ids,
+                experiment_ids,
+                grid_labels,
+            ]
+        )
+    )
+    return test_specs
diff --git a/cmip6_preprocessing/tests/test_preprocessing.py b/cmip6_preprocessing/tests/test_preprocessing.py
index 55d8666d..0b429cba 100644
--- a/cmip6_preprocessing/tests/test_preprocessing.py
+++ b/cmip6_preprocessing/tests/test_preprocessing.py
@@ -1,5 +1,4 @@
 import pytest
-import intake
 import pandas as pd
 import numpy as np
 import xarray as xr
@@ -20,15 +19,6 @@
     combined_preprocessing,
 )
 
-# get all available ocean models from the cloud.
-url = "https://storage.googleapis.com/cmip6/pangeo-cmip6.csv"
-df = pd.read_csv(url)
-df_ocean = df[(df.table_id == "Omon") + (df.table_id == "Oyr")]
-ocean_models = df_ocean.source_id.unique()
-
-# TODO: Need to adapt atmos only models
-all_models = ocean_models
-
 
 def create_test_ds(xname, yname, zname, xlen, ylen, zlen):
     x = np.linspace(0, 359, xlen)
diff --git a/cmip6_preprocessing/tests/test_preprocessing_cloud.py b/cmip6_preprocessing/tests/test_preprocessing_cloud.py
index c70cc0a9..af4ded99 100644
--- a/cmip6_preprocessing/tests/test_preprocessing_cloud.py
+++ b/cmip6_preprocessing/tests/test_preprocessing_cloud.py
@@ -1,171 +1,260 @@
-# This module tests data directly from the pangeo google cloud storage
+# This module tests data directly from the pangeo google cloud storage.
+# Tests are meant to be more high level and also serve to document known problems (see skip statements).
 import pytest
+import xarray as xr
 import numpy as np
-import intake
+from cmip6_preprocessing.tests.cloud_test_utils import (
+    full_specs,
+    xfail_wrapper,
+    all_models,
+    data,
+    diagnose_doubles,
+)
 from cmip6_preprocessing.preprocessing import combined_preprocessing
 from cmip6_preprocessing.grids import combine_staggered_grid
 
 pytest.importorskip("gcsfs")
 
+print(f"\n\n\n\n$$$$$$$ All available models: {all_models()}$$$$$$$\n\n\n\n")
 
-@pytest.fixture
-def col():
-    return intake.open_esm_datastore(
-        "https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json"
-    )
-
-
-def all_models():
-    col = intake.open_esm_datastore(
-        "https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json"
-    )
-    df = col.df
-    all_models = df["source_id"].unique()
-
-    # TODO: finally get IPSL model to run and release this
-    # TODO: Allow the AWI regridded model output for the preprocessing module
-    return [m for m in all_models if (("IPSL" not in m) & ("AWI" not in m))]
-    # return [m for m in all_models if "MIROC" in m]
-
-
-def _diagnose_doubles(data):
-    """displays non-unique entries in data"""
-    _, idx = np.unique(data, return_index=True)
-    missing = np.array([i for i in np.arange(len(data)) if i not in idx])
-    if len(missing) > 0:
-        missing_values = data[missing]
-        print(f"Missing values Indicies[{missing}]/ Values[{missing_values}]")
-
-
-# These are too many tests. Perhaps I could load all the data first and then
-# test each dict item?
-
-
-@pytest.mark.parametrize("grid_label", ["gr", "gn"])
-@pytest.mark.parametrize("experiment_id", ["historical"])
-@pytest.mark.parametrize("variable_id", ["o2", "thetao"])
-@pytest.mark.parametrize("source_id", all_models())
-def test_preprocessing_combined(col, source_id, experiment_id, grid_label, variable_id):
-    cat = col.search(
-        source_id=source_id,
-        experiment_id=experiment_id,
-        variable_id=variable_id,
-        # member_id="r1i1p1f1",
-        table_id="Omon",
-        grid_label=grid_label,
-    )
-
-    # ddict_raw = cat.to_dataset_dict(
-    #     zarr_kwargs={"consolidated": True, "decode_times": False},
-    #     preprocess=None,
-    #     storage_options={"token": "anon"},
-    # )
-    # if len(ddict_raw) > 0:
-    #     _, ds_raw = ddict_raw.popitem()
-    #     print(ds_raw)
-
-    ddict = cat.to_dataset_dict(
-        zarr_kwargs={"consolidated": True, "decode_times": False},
-        preprocess=combined_preprocessing,
-        storage_options={"token": "anon"},
-    )
-
-    if len(ddict) > 0:
-
-        _, ds = ddict.popitem()
-
-        if source_id == "CESM2-FV2":
-            pytest.skip("And `` has nans in the lon/lat")
-
-        ##### Check for dim duplicates
-        # check all dims for duplicates
-        # for di in ds.dims:
-        # for now only test a subset of the dims. TODO: Add the bounds once they
-        # are cleaned up.
-        for di in ["x", "y", "lev", "time"]:
-            if di in ds.dims:
-                _diagnose_doubles(ds[di].load().data)
-                assert len(ds[di]) == len(np.unique(ds[di]))
+
+# manually combine all pytest parameters, so that I have very fine grained control over
+# which combination of parameters is expected to fail.
+
+
+########################### Most basic test #########################
+expected_failures = [
+    ("AWI-ESM-1-1-LR", "thetao", "historical", "gn"),
+    ("AWI-ESM-1-1-LR", "thetao", "ssp585", "gn"),
+    ("AWI-CM-1-1-MR", "thetao", "historical", "gn"),
+    ("AWI-CM-1-1-MR", "thetao", "ssp585", "gn"),
+    # TODO: would be nice to have a "*" matching...
+    ("CESM2-FV2", "thetao", "historical", "gn"),
+    ("CESM2-FV2", "thetao", "ssp585", "gn"),
+]
+
+
+@pytest.mark.parametrize(
+    "source_id,variable_id,experiment_id,grid_label",
+    xfail_wrapper(full_specs(), expected_failures),
+)
+def test_check_dim_coord_values_wo_intake(
+    source_id, variable_id, experiment_id, grid_label
+):
+    # there must be a better way to build this at the class level and then tear it down again
+    # I can probably get this done with fixtures, but I dont know how atm
+    ds, cat = data(source_id, variable_id, experiment_id, grid_label, False)
+
+    if ds is None:
+        pytest.skip(
+            f"No data found for {source_id}|{variable_id}|{experiment_id}|{grid_label}"
+        )
+
+    ##### Check for dim duplicates
+    # check all dims for duplicates
+    # for di in ds.dims:
+    # for now only test a subset of the dims. TODO: Add the bounds once they
+    # are cleaned up.
+    for di in ["x", "y", "lev", "time"]:
+        if di in ds.dims:
+            diagnose_doubles(ds[di].load().data)
+            assert len(ds[di]) == len(np.unique(ds[di]))
+            if di != "time":  # these tests do not make sense for decoded time
                 assert ~np.all(np.isnan(ds[di]))
                 assert np.all(ds[di].diff(di) >= 0)
 
-        assert ds.lon.min().load() >= 0
-        assert ds.lon.max().load() <= 360
-        if "lon_bounds" in ds.variables:
-            assert ds.lon_bounds.min().load() >= 0
-            assert ds.lon_bounds.max().load() <= 360
-        assert ds.lat.min().load() >= -90
-        assert ds.lat.max().load() <= 90
-        # make sure lon and lat are 2d
-        assert len(ds.lon.shape) == 2
-        assert len(ds.lat.shape) == 2
-
-        if "vertex" in ds.dims:
-            np.testing.assert_allclose(ds.vertex.data, np.arange(4))
-
-        if source_id == "FGOALS-f3-L":
-            pytest.skip("`FGOALS-f3-L` does not come with lon/lat bounds")
-
-        ####Check for existing bounds and verticies
-        for co in ["lon_bounds", "lat_bounds", "lon_verticies", "lat_verticies"]:
-            assert co in ds.coords
-            # make sure that all other dims are eliminated from the bounds.
-            assert (set(ds[co].dims) - set(["bnds", "vertex"])) == set(["x", "y"])
-
-        #### Check the order of the vertex
-        # Ill only check these south of the Arctic for now. Up there
-        # things are still weird.
-
-        test_ds = ds.sel(y=slice(-40, 40))
-
-        vertex_lon_diff1 = test_ds.lon_verticies.isel(
-            vertex=3
-        ) - test_ds.lon_verticies.isel(vertex=0)
-        vertex_lon_diff2 = test_ds.lon_verticies.isel(
-            vertex=2
-        ) - test_ds.lon_verticies.isel(vertex=1)
-        vertex_lat_diff1 = test_ds.lat_verticies.isel(
-            vertex=1
-        ) - test_ds.lat_verticies.isel(vertex=0)
-        vertex_lat_diff2 = test_ds.lat_verticies.isel(
-            vertex=2
-        ) - test_ds.lat_verticies.isel(vertex=3)
-        for vertex_diff in [vertex_lon_diff1, vertex_lon_diff2]:
-            assert (vertex_diff <= 0).sum() <= (3 * len(vertex_diff.y))
-            # allowing for a few rows to be negative
-
-        for vertex_diff in [vertex_lat_diff1, vertex_lat_diff2]:
-            assert (vertex_diff <= 0).sum() <= (5 * len(vertex_diff.x))
-            # allowing for a few rows to be negative
-        # This is just to make sure that not the majority of values is negative or zero.
-
-        # Same for the bounds:
-        lon_diffs = test_ds.lon_bounds.diff("bnds")
-        lat_diffs = test_ds.lat_bounds.diff("bnds")
-
-        assert (lon_diffs <= 0).sum() <= (5 * len(lon_diffs.y))
-        assert (lat_diffs <= 0).sum() <= (5 * len(lat_diffs.y))
-
-        # Test the staggered grid creation
-
-        print(ds)
-        # This is just a rudimentary test to see if the creation works
-        staggered_grid, ds_staggered = combine_staggered_grid(
-            ds, recalculate_metrics=True
+    assert ds.lon.min().load() >= 0
+    assert ds.lon.max().load() <= 360
+    if "lon_bounds" in ds.variables:
+        assert ds.lon_bounds.min().load() >= 0
+        assert ds.lon_bounds.max().load() <= 360
+    assert ds.lat.min().load() >= -90
+    assert ds.lat.max().load() <= 90
+    # make sure lon and lat are 2d
+    assert len(ds.lon.shape) == 2
+    assert len(ds.lat.shape) == 2
+
+
+expected_failures = [
+    ("AWI-ESM-1-1-LR", "thetao", "historical", "gn"),
+    ("AWI-ESM-1-1-LR", "thetao", "ssp585", "gn"),
+    ("AWI-CM-1-1-MR", "thetao", "historical", "gn"),
+    ("AWI-CM-1-1-MR", "thetao", "ssp585", "gn"),
+    # TODO: would be nice to have a "*" matching...
+    ("CESM2-FV2", "thetao", "historical", "gn"),
+    ("CESM2-FV2", "thetao", "ssp585", "gn"),
+    (
+        "IPSL-CM6A-LR",
+        "thetao",
+        "historical",
+        "gn",
+    ),  # IPSL has an issue with `lev` dims concatting
+    ("IPSL-CM6A-LR", "o2", "historical", "gn"),
+    ("NorESM2-MM", "thetao", "historical", "gn"),
+    ("NorESM2-MM", "thetao", "historical", "gr"),
+]
+
+
+@pytest.mark.parametrize(
+    "source_id,variable_id,experiment_id,grid_label",
+    xfail_wrapper(full_specs(), expected_failures),
+)
+def test_check_dim_coord_values(source_id, variable_id, experiment_id, grid_label):
+    # there must be a better way to build this at the class level and then tear it down again
+    # I can probably get this done with fixtures, but I dont know how atm
+    ds, cat = data(source_id, variable_id, experiment_id, grid_label, True)
+
+    if ds is None:
+        pytest.skip(
+            f"No data found for {source_id}|{variable_id}|{experiment_id}|{grid_label}"
+        )
+
+    ##### Check for dim duplicates
+    # check all dims for duplicates
+    # for di in ds.dims:
+    # for now only test a subset of the dims. TODO: Add the bounds once they
+    # are cleaned up.
+    for di in ["x", "y", "lev", "time"]:
+        if di in ds.dims:
+            diagnose_doubles(ds[di].load().data)
+            assert len(ds[di]) == len(np.unique(ds[di]))
+            if di != "time":  # these tests do not make sense for decoded time
+                assert ~np.all(np.isnan(ds[di]))
+                assert np.all(ds[di].diff(di) >= 0)
+
+    assert ds.lon.min().load() >= 0
+    assert ds.lon.max().load() <= 360
+    if "lon_bounds" in ds.variables:
+        assert ds.lon_bounds.min().load() >= 0
+        assert ds.lon_bounds.max().load() <= 360
+    assert ds.lat.min().load() >= -90
+    assert ds.lat.max().load() <= 90
+    # make sure lon and lat are 2d
+    assert len(ds.lon.shape) == 2
+    assert len(ds.lat.shape) == 2
+
+
+############################### Specific Bound Coords Test ###############################
+expected_failures = [
+    ("AWI-ESM-1-1-LR", "thetao", "historical", "gn"),
+    ("AWI-ESM-1-1-MR", "thetao", "historical", "gn"),
+    ("AWI-ESM-1-1-MR", "thetao", "ssp585", "gn"),
+    ("AWI-CM-1-1-MR", "thetao", "historical", "gn"),
+    ("AWI-CM-1-1-MR", "thetao", "ssp585", "gn"),
+    ("CESM2-FV2", "thetao", "historical", "gn"),
+    ("FGOALS-f3-L", "thetao", "historical", "gn"),
+    ("FGOALS-f3-L", "thetao", "ssp585", "gn"),
+    ("FGOALS-g3", "thetao", "ssp585", "gn"),
+    ("NorESM2-MM", "thetao", "historical", "gn"),
+    ("NorESM2-MM", "thetao", "historical", "gr"),
+    ("IPSL-CM6A-LR", "thetao", "historical", "gn"),
+    ("IPSL-CM6A-LR", "o2", "historical", "gn"),
+]
+
+
+@pytest.mark.parametrize(
+    "source_id,variable_id,experiment_id,grid_label",
+    xfail_wrapper(full_specs(), expected_failures),
+)
+def test_check_bounds_verticies(source_id, variable_id, experiment_id, grid_label):
+
+    ds, cat = data(source_id, variable_id, experiment_id, grid_label, True)
+
+    if ds is None:
+        pytest.skip(
+            f"No data found for {source_id}|{variable_id}|{experiment_id}|{grid_label}"
+        )
+
+    if "vertex" in ds.dims:
+        np.testing.assert_allclose(ds.vertex.data, np.arange(4))
+
+    ####Check for existing bounds and verticies
+    for co in ["lon_bounds", "lat_bounds", "lon_verticies", "lat_verticies"]:
+        assert co in ds.coords
+        # make sure that all other dims are eliminated from the bounds.
+        assert (set(ds[co].dims) - set(["bnds", "vertex"])) == set(["x", "y"])
+
+    #### Check the order of the vertex
+    # Ill only check these south of the Arctic for now. Up there
+    # things are still weird.
+
+    test_ds = ds.sel(y=slice(-40, 40))
+
+    vertex_lon_diff1 = test_ds.lon_verticies.isel(
+        vertex=3
+    ) - test_ds.lon_verticies.isel(vertex=0)
+    vertex_lon_diff2 = test_ds.lon_verticies.isel(
+        vertex=2
+    ) - test_ds.lon_verticies.isel(vertex=1)
+    vertex_lat_diff1 = test_ds.lat_verticies.isel(
+        vertex=1
+    ) - test_ds.lat_verticies.isel(vertex=0)
+    vertex_lat_diff2 = test_ds.lat_verticies.isel(
+        vertex=2
+    ) - test_ds.lat_verticies.isel(vertex=3)
+    for vertex_diff in [vertex_lon_diff1, vertex_lon_diff2]:
+        assert (vertex_diff <= 0).sum() <= (3 * len(vertex_diff.y))
+        # allowing for a few rows to be negative
+
+    for vertex_diff in [vertex_lat_diff1, vertex_lat_diff2]:
+        assert (vertex_diff <= 0).sum() <= (5 * len(vertex_diff.x))
+        # allowing for a few rows to be negative
+    # This is just to make sure that not the majority of values is negative or zero.
+
+    # Same for the bounds:
+    lon_diffs = test_ds.lon_bounds.diff("bnds")
+    lat_diffs = test_ds.lat_bounds.diff("bnds")
+
+    assert (lon_diffs <= 0).sum() <= (5 * len(lon_diffs.y))
+    assert (lat_diffs <= 0).sum() <= (5 * len(lat_diffs.y))
+
+
+################################# xgcm grid specific tests ########################################
+expected_failures = [
+    ("AWI-ESM-1-1-LR", "thetao", "historical", "gn"),
+    ("AWI-ESM-1-1-MR", "thetao", "historical", "gn"),
+    ("AWI-ESM-1-1-MR", "thetao", "ssp585", "gn"),
+    ("AWI-CM-1-1-MR", "thetao", "historical", "gn"),
+    ("AWI-CM-1-1-MR", "thetao", "ssp585", "gn"),
+    ("CESM2-FV2", "thetao", "historical", "gn"),
+    ("CMCC-CM2-SR5", "thetao", "historical", "gn"),
+    ("CMCC-CM2-SR5", "thetao", "ssp585", "gn"),
+    ("FGOALS-f3-L", "thetao", "historical", "gn"),
+    ("FGOALS-f3-L", "thetao", "ssp585", "gn"),
+    ("FGOALS-g3", "thetao", "ssp585", "gn"),
+    ("MPI-ESM-1-2-HAM", "thetao", "historical", "gn"),
+    ("MPI-ESM-1-2-HAM", "o2", "historical", "gn"),
+    ("NorESM2-MM", "thetao", "historical", "gn"),
+    ("NorESM2-MM", "thetao", "historical", "gr"),
+    ("IPSL-CM6A-LR", "thetao", "historical", "gn"),
+    ("IPSL-CM6A-LR", "o2", "historical", "gn"),
+]
+
+
+@pytest.mark.parametrize(
+    "source_id,variable_id,experiment_id,grid_label",
+    xfail_wrapper(full_specs(), expected_failures),
+)
+def test_check_grid(source_id, variable_id, experiment_id, grid_label):
+
+    ds, cat = data(source_id, variable_id, experiment_id, grid_label, True)
+
+    if ds is None:
+        pytest.skip(
+            f"No data found for {source_id}|{variable_id}|{experiment_id}|{grid_label}"
         )
 
-        if source_id == "MPI-ESM-1-2-HAM" or source_id == "MPI-ESM1-2-LR":
-            pytest.skip("No available grid shift info")
+    # This is just a rudimentary test to see if the creation works
+    staggered_grid, ds_staggered = combine_staggered_grid(ds, recalculate_metrics=True)
 
-        assert ds_staggered is not None
-        #
-        if "lev" in ds_staggered.dims:
-            assert "bnds" in ds_staggered.lev_bounds.dims
+    print(ds_staggered)
 
-        for axis in ["X", "Y"]:
-            for metric in ["_t", "_gx", "_gy", "_gxgy"]:
-                assert f"d{axis.lower()}{metric}" in list(ds_staggered.coords)
-        # TODO: Include actual test to combine variables
+    assert ds_staggered is not None
+    #
+    if "lev" in ds_staggered.dims:
+        assert "bnds" in ds_staggered.lev_bounds.dims
 
-    else:
-        pytest.xfail("Model data not available")
+    for axis in ["X", "Y"]:
+        for metric in ["_t", "_gx", "_gy", "_gxgy"]:
+            assert f"d{axis.lower()}{metric}" in list(ds_staggered.coords)
+    # TODO: Include actual test to combine variables
diff --git a/cmip6_preprocessing/tests/test_regionmask.py b/cmip6_preprocessing/tests/test_regionmask.py
index 398abd1d..f05713b8 100644
--- a/cmip6_preprocessing/tests/test_regionmask.py
+++ b/cmip6_preprocessing/tests/test_regionmask.py
@@ -1,5 +1,4 @@
 import pytest
-import intake
 import numpy as np
 import xarray as xr
 from cmip6_preprocessing.preprocessing import combined_preprocessing
@@ -11,15 +10,6 @@
 
 
 def test_merge_mask():
-    # load test dataset in the cloud (this does not work from within the CI...will check that out later. )
-    #     # import example cloud datasets
-    #     col_url = "https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json"
-    #     col = intake.open_esm_datastore(col_url)
-    #     cat = col.search(source_id=['GFDL-ESM4'],experiment_id='historical', variable_id='thetao')
-    #     data_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times': False},
-    #                                     preprocess=combined_preprocessing)
-    #     ds = data_dict[list(data_dict.keys())[0]]
-
     x = np.linspace(0, 360, 720)
     y = np.linspace(-90, 90, 360)
     data = np.random.rand(len(x), len(y))
diff --git a/pytest.ini b/pytest.ini
index 0c2a2b9c..d61d0296 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,2 +1,2 @@
 [pytest]
-# addopts = -n2
+xfail_strict=true