Skip to content

Commit

Permalink
Make cell_methods-derived bounds optional in getvar
Browse files Browse the repository at this point in the history
This is particularly useful for ice data where we can get the time
bounds for averaging intervals. However, because we were attaching the
bounds as an xarray Dataset, we could't serialise a variable straight
back to disk, which was catching people out. Instead, we can make the
bounds optional, only if you want them. In this case, return a full
xarray Dataset, rather than just a DataArray.

Closes #284.
  • Loading branch information
angus-g committed Jun 17, 2022
1 parent 47d3bb2 commit a59f276
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 10 deletions.
23 changes: 16 additions & 7 deletions cosima_cookbook/querying.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ def getvar(
frequency=None,
attrs=None,
attrs_unique=None,
return_dataset=False,
**kwargs,
):
"""For a given experiment, return an xarray DataArray containing the
Expand All @@ -310,6 +311,10 @@ def getvar(
must be unique on the returned variables. Defaults to
{'cell_methods': 'time: mean'} and should not generally be
changed.
return_dataset - if True, return xarray.Dataset, containing the
requested variable, along with its time_bounds,
if present. Otherwise (default), return
xarray.DataArray containing only the variable
Note that if start_time and/or end_time are used, the time range
of the resulting dataset may not be bounded exactly on those
Expand Down Expand Up @@ -339,8 +344,11 @@ def getvar(
attrs_unique,
)

# we know at least one variable was returned
variables = _bounds_vars_for_variable(*ncfiles[0])
variables = [variable]
if return_dataset:
# we know at least one variable was returned, so we can index ncfiles
# ask for the extra variables associated with cell_methods, etc.
variables += _bounds_vars_for_variable(*ncfiles[0])

# chunking -- use first row/file and assume it's the same across the whole dataset
xr_kwargs = {"chunks": _parse_chunks(ncfiles[0].NCVar)}
Expand All @@ -365,7 +373,11 @@ def _preprocess(d):
**xr_kwargs,
)

da = ds[variable]
if return_dataset:
da = ds
else:
# if we want a dataarray, we'll strip off the extra info
da = ds[variable]

# Check the chunks given were actually in the data
chunks = xr_kwargs.get("chunks", None)
Expand All @@ -376,9 +388,6 @@ def _preprocess(d):
f"chunking along dimensions {missing_chunk_dims} is not possible. Available dimensions for chunking are {set(da.dims)}"
)

for attr in variables[1:]:
da.attrs[attr] = ds[attr]

da.attrs["ncfiles"] = ncfiles

# Get experiment metadata, delete extraneous fields and add
Expand All @@ -402,7 +411,7 @@ def _preprocess(d):
def _bounds_vars_for_variable(ncfile, ncvar):
"""Return a list of names for a variable and its bounds"""

variables = [ncvar.varname]
variables = []

if "cell_methods" not in ncvar.attrs:
# no cell methods, so no need to look for bounds
Expand Down
8 changes: 5 additions & 3 deletions test/test_querying.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,13 +593,15 @@ def test_disambiguation_by_frequency(session):


def test_time_bounds_on_dataarray(session):
var_salt = cc.querying.getvar("querying", "salt", session, decode_times=False)
var_salt = cc.querying.getvar(
"querying", "salt", session, decode_times=False, return_dataset=True
)

# we should have added time_bounds into the DataArray's attributes
assert "time_bounds" in var_salt.attrs
assert "time_bounds" in var_salt

# and time_bounds should itself be a DataArray
assert isinstance(var_salt.attrs["time_bounds"], xr.DataArray)
assert isinstance(var_salt["time_bounds"], xr.DataArray)


def test_query_with_attrs(session):
Expand Down

0 comments on commit a59f276

Please sign in to comment.