Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make cell_methods-derived bounds optional in getvar #294

Merged
merged 1 commit into from
Jun 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions cosima_cookbook/querying.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ def getvar(
frequency=None,
attrs=None,
attrs_unique=None,
return_dataset=False,
**kwargs,
):
"""For a given experiment, return an xarray DataArray containing the
Expand All @@ -310,6 +311,10 @@ def getvar(
must be unique on the returned variables. Defaults to
{'cell_methods': 'time: mean'} and should not generally be
changed.
return_dataset - if True, return xarray.Dataset, containing the
requested variable, along with its time_bounds,
if present. Otherwise (default), return
xarray.DataArray containing only the variable

Note that if start_time and/or end_time are used, the time range
of the resulting dataset may not be bounded exactly on those
Expand Down Expand Up @@ -339,8 +344,11 @@ def getvar(
attrs_unique,
)

# we know at least one variable was returned
variables = _bounds_vars_for_variable(*ncfiles[0])
variables = [variable]
if return_dataset:
# we know at least one variable was returned, so we can index ncfiles
# ask for the extra variables associated with cell_methods, etc.
variables += _bounds_vars_for_variable(*ncfiles[0])

# chunking -- use first row/file and assume it's the same across the whole dataset
xr_kwargs = {"chunks": _parse_chunks(ncfiles[0].NCVar)}
Expand All @@ -365,7 +373,11 @@ def _preprocess(d):
**xr_kwargs,
)

da = ds[variable]
if return_dataset:
da = ds
else:
# if we want a dataarray, we'll strip off the extra info
da = ds[variable]

# Check the chunks given were actually in the data
chunks = xr_kwargs.get("chunks", None)
Expand All @@ -376,9 +388,6 @@ def _preprocess(d):
f"chunking along dimensions {missing_chunk_dims} is not possible. Available dimensions for chunking are {set(da.dims)}"
)

for attr in variables[1:]:
da.attrs[attr] = ds[attr]

da.attrs["ncfiles"] = ncfiles

# Get experiment metadata, delete extraneous fields and add
Expand All @@ -402,7 +411,7 @@ def _preprocess(d):
def _bounds_vars_for_variable(ncfile, ncvar):
"""Return a list of names for a variable and its bounds"""

variables = [ncvar.varname]
variables = []

if "cell_methods" not in ncvar.attrs:
# no cell methods, so no need to look for bounds
Expand Down
8 changes: 5 additions & 3 deletions test/test_querying.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,13 +593,15 @@ def test_disambiguation_by_frequency(session):


def test_time_bounds_on_dataarray(session):
var_salt = cc.querying.getvar("querying", "salt", session, decode_times=False)
var_salt = cc.querying.getvar(
"querying", "salt", session, decode_times=False, return_dataset=True
)

# we should have added time_bounds into the DataArray's attributes
assert "time_bounds" in var_salt.attrs
assert "time_bounds" in var_salt

# and time_bounds should itself be a DataArray
assert isinstance(var_salt.attrs["time_bounds"], xr.DataArray)
assert isinstance(var_salt["time_bounds"], xr.DataArray)


def test_query_with_attrs(session):
Expand Down