Skip to content

Commit

Permalink
Fix add_bounds for heavily curved grids (#376)
Browse files Browse the repository at this point in the history
* Fix add_bounds for heavy curved grids - fix no-index issues - adapt tests

* Send bounds guessing to helpers in own functions - upd doc

* added figures

* Add test

* Add mention of no-index dims in error

Co-authored-by: Deepak Cherian <[email protected]>
  • Loading branch information
aulemahal and dcherian authored Dec 7, 2022
1 parent ff740af commit 20bdfdd
Show file tree
Hide file tree
Showing 7 changed files with 138 additions and 36 deletions.
45 changes: 11 additions & 34 deletions cf_xarray/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from xarray.core.weighted import Weighted

from .criteria import cf_role_criteria, coordinate_criteria, regex
from .helpers import bounds_to_vertices
from .helpers import _guess_bounds_1d, _guess_bounds_2d, bounds_to_vertices
from .options import OPTIONS
from .utils import (
_get_version,
Expand Down Expand Up @@ -465,7 +465,7 @@ def wrapper(obj: DataArray | Dataset, key: str):
}


def _guess_bounds_dim(da, dim=None, out_dim="bounds"):
def _guess_bounds(da, dim=None, out_dim="bounds"):
"""
Guess bounds values given a 1D or 2D coordinate variable.
Assumes equal spacing on either side of the coordinate label.
Expand All @@ -477,43 +477,18 @@ def _guess_bounds_dim(da, dim=None, out_dim="bounds"):
f"If dim is None, variable {da.name} must be 1D or 2D. Received {da.ndim}D variable instead."
)
dim = da.dims

if not isinstance(dim, str):
if len(dim) > 2:
raise NotImplementedError(
"Adding bounds with more than 2 dimensions is not supported."
)
elif len(dim) == 2:
daX = _guess_bounds_dim(da, dim[0]).rename(bounds="Xbnds")
daXY = _guess_bounds_dim(daX, dim[1]).rename(bounds="Ybnds")
return xr.concat(
[
daXY.isel(Xbnds=0, Ybnds=0),
daXY.isel(Xbnds=0, Ybnds=1),
daXY.isel(Xbnds=1, Ybnds=1),
daXY.isel(Xbnds=1, Ybnds=0),
],
out_dim,
).transpose(..., "bounds")
return _guess_bounds_2d(da, dim).rename(bounds=out_dim)
else:
dim = dim[0]
if dim not in da.dims:
(dim,) = da.cf.axes[dim]
if dim not in da.coords:
raise NotImplementedError(
"Adding bounds for unindexed dimensions is not supported currently."
)

diff = da.diff(dim)
lower = da - diff / 2
upper = da + diff / 2
bounds = xr.concat([lower, upper], dim=out_dim)

first = (bounds.isel({dim: 0}) - diff.isel({dim: 0})).assign_coords(
{dim: da[dim][0]}
)
result = xr.concat([first, bounds], dim=dim).transpose(..., "bounds")

return result
return _guess_bounds_1d(da, dim).rename(bounds=out_dim)


def _build_docstring(func):
Expand Down Expand Up @@ -2252,15 +2227,17 @@ def add_bounds(

bad_vars: set[str] = variables - set(obj.variables)
if bad_vars:
raise ValueError(
f"{bad_vars!r} are not variables in the underlying object."
)
msg = f"{bad_vars!r} are not variables in the underlying object."
dims_no_idx = bad_vars.intersection(obj.dims)
if dims_no_idx:
msg += f" {dims_no_idx!r} are dimensions with no index."
raise ValueError(msg)

for var in variables:
bname = f"{var}_bounds"
if bname in obj.variables:
raise ValueError(f"Bounds variable name {bname!r} will conflict!")
out = _guess_bounds_dim(
out = _guess_bounds(
obj[var].reset_coords(drop=True), dim=dim, out_dim=output_dim
)
if output_dim in obj.dims and (new := out[output_dim].size) != (
Expand Down
102 changes: 102 additions & 0 deletions cf_xarray/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,108 @@
from xarray import DataArray


def _guess_bounds_1d(da, dim):
"""
Guess bounds values given a 1D coordinate variable.
Assumes equal spacing on either side of the coordinate label.
This is an approximation only.
Output has an added "bounds" dimension at the end.
"""
if dim not in da.dims:
(dim,) = da.cf.axes[dim]
ADDED_INDEX = False
if dim not in da.coords:
# For proper alignment in the lines below, we need an index on dim.
da = da.assign_coords({dim: da[dim]})
ADDED_INDEX = True

diff = da.diff(dim)
lower = da - diff / 2
upper = da + diff / 2
bounds = xr.concat([lower, upper], dim="bounds")

first = (bounds.isel({dim: 0}) - diff.isel({dim: 0})).assign_coords(
{dim: da[dim][0]}
)
result = xr.concat([first, bounds], dim=dim).transpose(..., "bounds")
if ADDED_INDEX:
result = result.drop_vars(dim)
return result


def _guess_bounds_2d(da, dims):
"""
Guess bounds values given a 2D coordinate variable.
Assumes equal spacing on either side of the coordinate label.
This is a coarse approximation, especially for curvilinear grids.
Output has an added "bounds" dimension at the end.
"""
daX = _guess_bounds_1d(da, dims[0]).rename(bounds="Xbnds")
daXY = _guess_bounds_1d(daX, dims[1]).rename(bounds="Ybnds")
# At this point, we might have different corners for adjacent cells, we average them together to have a nice grid
# To make this vectorized and keep the edges, we'll pad with NaNs and ignore them in the averages
daXYp = (
daXY.pad({d: (1, 1) for d in dims}, mode="constant", constant_values=np.NaN)
.transpose(*dims, "Xbnds", "Ybnds")
.values
) # Tranpose for an easier notation
# Mean of the corners that should be the same point.
daXYm = np.stack(
(
# Lower left corner (mean of : upper right of the lower left cell, lower right of the upper left cell, and so on, ccw)
np.nanmean(
np.stack(
(
daXYp[:-2, :-2, 1, 1],
daXYp[:-2, 1:-1, 1, 0],
daXYp[1:-1, 1:-1, 0, 0],
daXYp[1:-1, :-2, 0, 1],
)
),
axis=0,
),
# Upper left corner
np.nanmean(
np.stack(
(
daXYp[:-2, 1:-1, 1, 1],
daXYp[:-2, 2:, 1, 0],
daXYp[1:-1, 2:, 0, 0],
daXYp[1:-1, 1:-1, 0, 1],
)
),
axis=0,
),
# Upper right
np.nanmean(
np.stack(
(
daXYp[1:-1, 1:-1, 1, 1],
daXYp[1:-1, 2:, 1, 0],
daXYp[2:, 2:, 0, 0],
daXYp[2:, 1:-1, 0, 1],
)
),
axis=0,
),
# Lower right
np.nanmean(
np.stack(
(
daXYp[1:-1, :-2, 1, 1],
daXYp[1:-1, 1:-1, 1, 0],
daXYp[2:, 1:-1, 0, 0],
daXYp[2:, :-2, 0, 1],
)
),
axis=0,
),
),
axis=-1,
)
return xr.DataArray(daXYm, dims=(*dims, "bounds"), coords=da.coords)


def bounds_to_vertices(
bounds: DataArray,
bounds_dim: str,
Expand Down
18 changes: 16 additions & 2 deletions cf_xarray/tests/test_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
pomds,
popds,
romsds,
rotds,
vert,
)
from . import raise_if_dask_computes, requires_cftime, requires_pint, requires_scipy
Expand Down Expand Up @@ -818,6 +819,18 @@ def test_add_bounds_nd_variable():
actual = ds.cf.add_bounds("z").z_bounds.reset_coords(drop=True)
xr.testing.assert_identical(actual, expected)

# 2D rotated ds
lon_bounds = (
rotds.drop_vars(["lon_bounds"])
.assign(x=rotds["x"], y=rotds["y"])
.cf.add_bounds(["lon"])
.lon_bounds
)
# upper left of cell must be the EXACT same as the lower left of the cell above
assert lon_bounds[0, 1, 1] == lon_bounds[0, 2, 0]
# upper right of cell must be the EXACT same as the lower right of the cell above
assert lon_bounds[0, 1, 2] == lon_bounds[0, 2, 3]

# 1D
expected = (
xr.concat([ds.z - 1.5, ds.z + 1.5], dim="bounds")
Expand All @@ -828,8 +841,9 @@ def test_add_bounds_nd_variable():
actual = ds.cf.add_bounds("z", dim="x").z_bounds.reset_coords(drop=True)
xr.testing.assert_identical(expected.transpose(..., "bounds"), actual)

with pytest.raises(NotImplementedError):
ds.drop_vars("x").cf.add_bounds("z", dim="x")
# Requesting bounds on a non-variable dimension
with pytest.raises(ValueError, match="are dimensions with no index."):
ds.drop_vars("x").cf.add_bounds("x")

with pytest.raises(ValueError, match="The `bounds` dimension already exists"):
ds.cf.add_bounds("z").cf.add_bounds("x")
Expand Down
Binary file added doc/2D_bounds_averaged.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added doc/2D_bounds_nonunique.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
7 changes: 7 additions & 0 deletions doc/bounds.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,10 @@ See
As an example, we present a "rotated pole" grid. It is defined on a rotated rectilinear grid which uses the `rlat` and `rlon` 1D coordinates, over North America at a resolution of 0.44°. The datasets comes with 2D `lat` and `lon` coordinates. `cf_xarray` will estimate the bounds by linear interpolation (extrapolation at the edges) of the existing `lon` and `lat`, which yields good results on parts of the grid where the rotation is small. However the errors is larger in other places, as seen when visualizing the distance in degrees between the estimated bounds and the true bounds.

![2d bounds error](2D_bounds_error.png)

For grids with a strong curvature between the cartesian axes and the lat/lon coordinates, the basic linear interpolation done for each point individually can yield grid cells with unmatching corners. The next figure shows such a case as it would be expected that the 4 corners within the red circle would all be the same point. To circumvent this issue, `cf_xarray` will average together these 4 different results, as shown on the last figure.

![2d bounds unmatching corners](2D_bounds_nonunique.png)
![2d bounds averaged corners](2D_bounds_averaged.png)

This last examples illustrates again that `cf_xarray` can only estimate the grid bounds, grid metrics provided by the data producer will always be better.
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ What's New
v0.7.6 (unreleased)
===================

- Fix to ``cf.add_bounds`` to support all types of curved grids (:pr:`376`).
By `Pascal Bourgault`_
- Allow custom criteria to match the variable name of DataArray objects (:pr:`379`). By `Mathias Hauser`_

v0.7.5 (Nov 15, 2022)
Expand Down

0 comments on commit 20bdfdd

Please sign in to comment.