Skip to content

Commit

Permalink
Merge pull request pangeo-forge#216 from TomAugspurger/fix/214-key-error
Browse files Browse the repository at this point in the history
finalize_target handles coordinateless dimensions
  • Loading branch information
rabernat authored Sep 30, 2021
2 parents 244fe1d + ef19380 commit b717031
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 1 deletion.
5 changes: 4 additions & 1 deletion pangeo_forge_recipes/recipes/xarray_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,10 @@ def finalize_target(
logger.info("Consolidating dimension coordinate arrays")
target_mapper = target.get_mapper()
group = zarr.open(target_mapper, mode="a")
dims = _gather_coordinate_dimensions(group)
# https://github.com/pangeo-forge/pangeo-forge-recipes/issues/214
# intersect the dims from the array metadata with the Zarr group
# to handle coordinateless dimensions.
dims = set(_gather_coordinate_dimensions(group)) & set(group)
for dim in dims:
arr = group[dim]
attrs = dict(arr.attrs)
Expand Down
46 changes: 46 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,18 @@ def daily_xarray_dataset():
return ds


@pytest.fixture(scope="session")
def daily_xarray_dataset_with_coordinateless_dimension(daily_xarray_dataset):
"""
A Dataset with a coordinateless dimension.
Reproduces https://github.com/pangeo-forge/pangeo-forge-recipes/issues/214
"""
ds = daily_xarray_dataset.copy()
del ds["lon"]
return ds


@pytest.fixture(scope="session")
def netcdf_local_paths_sequential_1d(daily_xarray_dataset, tmpdir_factory):
return make_netcdf_local_paths(daily_xarray_dataset, tmpdir_factory, "D", split_up_files_by_day)
Expand Down Expand Up @@ -259,6 +271,18 @@ def netcdf_local_paths_sequential_multivariable(request):
return request.param


@pytest.fixture(scope="session",)
def netcdf_local_paths_sequential_multivariable_with_coordinateless_dimension(
daily_xarray_dataset_with_coordinateless_dimension, tmpdir_factory
):
return make_netcdf_local_paths(
daily_xarray_dataset_with_coordinateless_dimension,
tmpdir_factory,
"D",
split_up_files_by_variable_and_day,
)


@pytest.fixture(
scope="session",
params=[
Expand Down Expand Up @@ -297,6 +321,18 @@ def netcdf_http_paths_sequential_1d(request):
return request.param


@pytest.fixture(scope="session")
def netcdf_local_paths_sequential_with_coordinateless_dimension(
daily_xarray_dataset_with_coordinateless_dimension, tmpdir_factory
):
return make_netcdf_local_paths(
daily_xarray_dataset_with_coordinateless_dimension,
tmpdir_factory,
"D",
split_up_files_by_day,
)


# FilePattern fixtures ----------------------------------------------------------------------------


Expand Down Expand Up @@ -328,6 +364,16 @@ def netcdf_http_file_pattern_sequential_1d(netcdf_http_paths_sequential_1d):
return make_file_pattern(netcdf_http_paths_sequential_1d)


@pytest.fixture(scope="session")
def netcdf_local_file_pattern_sequential_with_coordinateless_dimension(
netcdf_local_paths_sequential_with_coordinateless_dimension,
):
"""
Filepattern where one of the dimensions doesn't have a coordinate.
"""
return make_file_pattern(netcdf_local_paths_sequential_with_coordinateless_dimension)


# Storage fixtures --------------------------------------------------------------------------------


Expand Down
20 changes: 20 additions & 0 deletions tests/recipe_tests/test_XarrayZarrRecipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,26 @@ def test_no_consolidate_dimension_coordinates(netCDFtoZarr_recipe):
assert store["time"].chunks == (file_pattern.nitems_per_input["time"],)


def test_consolidate_dimension_coordinates_with_coordinateless_dimension(
netcdf_local_file_pattern_sequential_with_coordinateless_dimension,
daily_xarray_dataset_with_coordinateless_dimension,
tmp_target,
tmp_cache,
tmp_metadata_target,
):
RecipeClass, file_pattern, kwargs, ds_expected, target = make_netCDFtoZarr_recipe(
netcdf_local_file_pattern_sequential_with_coordinateless_dimension,
daily_xarray_dataset_with_coordinateless_dimension,
tmp_target,
tmp_cache,
tmp_metadata_target,
)
rec = RecipeClass(file_pattern, **kwargs)
rec.to_function()()
ds_actual = xr.open_zarr(target.get_mapper()).load()
xr.testing.assert_identical(ds_actual, ds_expected)


def test_lock_timeout(netCDFtoZarr_recipe_sequential_only, execute_recipe_no_dask):
RecipeClass, file_pattern, kwargs, ds_expected, target = netCDFtoZarr_recipe_sequential_only

Expand Down

0 comments on commit b717031

Please sign in to comment.