From 3194b3ed1e414729ba6ab6f7f3ed39a425da42b1 Mon Sep 17 00:00:00 2001 From: Andrew Williams <56925856+AndrewWilliams3142@users.noreply.github.com> Date: Mon, 25 May 2020 17:55:33 +0100 Subject: [PATCH] xr.cov() and xr.corr() (#4089) * Added chunks='auto' option in dataset.py * reverted accidental changes in dataset.chunk() * Added corr and cov to computation.py. Taken from r-beer:xarray/corr * Added r-beer's tests to test_computation.py Still issues I think * trying to fix github.com/pydata/xarray/pull/3550#discussion_r349935731 * Removing drop=True from the `.where()` calls in `computation.py`+test.py * api.rst and whats-new.rst * Updated `xarray/__init__.py` and added `broadcast` import to computation * added DataArray import to corr, cov * assert_allclose added to test_computation.py * removed whitespace in test_dask...oops * Added to init * format changes * Fiddling around with cov/corr tests in `test_computation.py` * PEP8 changes * pep * remove old todo and comments * isort * Added consistency check between corr() and cov(), ensure they give same * added `skipna=False` to `computation.py`. made consistency+autocov tests * formatting * Added numpy-based tests. * format * formatting again * Update doc/whats-new.rst Co-authored-by: keewis * refactored corr/cov so there is one internal method for calculating both * formatting * updating docstrings and code suggestions from PR * paramterize ddof in tests * removed extraneous test arrays * formatting + adding deterministic docstring * added test for TypeError * formatting * tidying up docstring * formatting and tidying up `_cov_corr()` so that the logic is more clear * flake8 ... Co-authored-by: keewis --- doc/api.rst | 2 + doc/whats-new.rst | 2 + xarray/__init__.py | 4 +- xarray/core/computation.py | 180 ++++++++++++++++++++++++++++++- xarray/tests/test_computation.py | 158 ++++++++++++++++++++++++++- 5 files changed, 343 insertions(+), 3 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 8ec6843d24a..c9f24e8c3f1 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -29,6 +29,8 @@ Top-level functions full_like zeros_like ones_like + cov + corr dot polyval map_blocks diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0be988da690..d4a46c1e020 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -36,6 +36,8 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Added :py:func:`xarray.cov` and :py:func:`xarray.corr` (:issue:`3784`, :pull:`3550`, :pull:`4089`). + By `Andrew Williams `_ and `Robin Beer `_. - Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`) By `Pascal Bourgault `_. - Control over attributes of result in :py:func:`merge`, :py:func:`concat`, diff --git a/xarray/__init__.py b/xarray/__init__.py index 0fead57e5fb..e8274d13ffe 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -17,7 +17,7 @@ from .core.alignment import align, broadcast from .core.combine import auto_combine, combine_by_coords, combine_nested from .core.common import ALL_DIMS, full_like, ones_like, zeros_like -from .core.computation import apply_ufunc, dot, polyval, where +from .core.computation import apply_ufunc, corr, cov, dot, polyval, where from .core.concat import concat from .core.dataarray import DataArray from .core.dataset import Dataset @@ -54,6 +54,8 @@ "concat", "decode_cf", "dot", + "cov", + "corr", "full_like", "load_dataarray", "load_dataset", diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 28bf818e4a3..6ac4f74c3a6 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -24,7 +24,7 @@ import numpy as np from . import dtypes, duck_array_ops, utils -from .alignment import deep_align +from .alignment import align, deep_align from .merge import merge_coordinates_without_align from .options import OPTIONS from .pycompat import dask_array_type @@ -1069,6 +1069,184 @@ def earth_mover_distance(first_samples, return apply_array_ufunc(func, *args, dask=dask) +def cov(da_a, da_b, dim=None, ddof=1): + """ + Compute covariance between two DataArray objects along a shared dimension. + + Parameters + ---------- + da_a: DataArray object + Array to compute. + da_b: DataArray object + Array to compute. + dim : str, optional + The dimension along which the covariance will be computed + ddof: int, optional + If ddof=1, covariance is normalized by N-1, giving an unbiased estimate, + else normalization is by N. + + Returns + ------- + covariance: DataArray + + See also + -------- + pandas.Series.cov: corresponding pandas function + xr.corr: respective function to calculate correlation + + Examples + -------- + >>> da_a = DataArray(np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]), + ... dims=("space", "time"), + ... coords=[('space', ['IA', 'IL', 'IN']), + ... ('time', pd.date_range("2000-01-01", freq="1D", periods=3))]) + >>> da_a + + array([[1. , 2. , 3. ], + [0.1, 0.2, 0.3], + [3.2, 0.6, 1.8]]) + Coordinates: + * space (space) >> da_a = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]), + ... dims=("space", "time"), + ... coords=[('space', ['IA', 'IL', 'IN']), + ... ('time', pd.date_range("2000-01-01", freq="1D", periods=3))]) + >>> da_b + + array([[ 0.2, 0.4, 0.6], + [15. , 10. , 5. ], + [ 3.2, 0.6, 1.8]]) + Coordinates: + * space (space) >> xr.cov(da_a, da_b) + + array(-3.53055556) + >>> xr.cov(da_a, da_b, dim='time') + + array([ 0.2, -0.5, 1.69333333]) + Coordinates: + * space (space) >> da_a = DataArray(np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]), + ... dims=("space", "time"), + ... coords=[('space', ['IA', 'IL', 'IN']), + ... ('time', pd.date_range("2000-01-01", freq="1D", periods=3))]) + >>> da_a + + array([[1. , 2. , 3. ], + [0.1, 0.2, 0.3], + [3.2, 0.6, 1.8]]) + Coordinates: + * space (space) >> da_a = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]), + ... dims=("space", "time"), + ... coords=[('space', ['IA', 'IL', 'IN']), + ... ('time', pd.date_range("2000-01-01", freq="1D", periods=3))]) + >>> da_b + + array([[ 0.2, 0.4, 0.6], + [15. , 10. , 5. ], + [ 3.2, 0.6, 1.8]]) + Coordinates: + * space (space) >> xr.corr(da_a, da_b) + + array(-0.57087777) + >>> xr.corr(da_a, da_b, dim='time') + + array([ 1., -1., 1.]) + Coordinates: + * space (space)