diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 27f93c8e578..8c9b61a7364 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,6 +21,7 @@ repos: rev: v0.3.4 hooks: - id: blackdoc + exclude: "generate_reductions.py" - repo: https://gitlab.com/pycqa/flake8 rev: 3.9.2 hooks: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bcc40f4fd48..0128e70caed 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -39,6 +39,11 @@ Bug fixes Documentation ~~~~~~~~~~~~~ +- Better examples in docstrings for groupby and resampling reductions (:pull:`5871`). + By `Deepak Cherian `_, + `Maximilian Roos `_, + `Jimmy Westling `_ . + Internal Changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py new file mode 100644 index 00000000000..67fbbd482d0 --- /dev/null +++ b/xarray/core/_reductions.py @@ -0,0 +1,3739 @@ +"""Mixin classes with reduction operations.""" +# This file was generated using xarray.util.generate_reductions. Do not edit manually. + +import sys +from typing import Any, Callable, Hashable, Optional, Sequence, Union + +from . import duck_array_ops +from .types import T_DataArray, T_Dataset + +if sys.version_info >= (3, 8): + from typing import Protocol +else: + from typing_extensions import Protocol + + +class DatasetReduce(Protocol): + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> T_Dataset: + ... + + +class DatasetGroupByReductions: + __slots__ = () + + def count( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``count`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").count() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) int64 1 2 2 + + See Also + -------- + numpy.count + Dataset.count + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.count, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def all( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``all`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").all() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) bool False True True + + See Also + -------- + numpy.all + Dataset.all + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.array_all, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def any( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``any`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").any() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) bool True True True + + See Also + -------- + numpy.any + Dataset.any + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.array_any, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def max( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``max`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").max() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 2.0 3.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").max(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 3.0 + + See Also + -------- + numpy.max + Dataset.max + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def min( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``min`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").min() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 2.0 1.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").min(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 1.0 + + See Also + -------- + numpy.min + Dataset.min + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def mean( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``mean`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").mean() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 2.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").mean(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 2.0 + + See Also + -------- + numpy.mean + Dataset.mean + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def prod( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``prod`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").prod() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 4.0 3.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").prod(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 3.0 + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.groupby("labels").prod(skipna=True, min_count=2) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 3.0 + + See Also + -------- + numpy.prod + Dataset.prod + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def sum( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``sum`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").sum() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 4.0 4.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").sum(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 4.0 + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.groupby("labels").sum(skipna=True, min_count=2) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 4.0 + + See Also + -------- + numpy.sum + Dataset.sum + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def std( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``std`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").std() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 0.0 0.0 1.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").std(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.0 + + See Also + -------- + numpy.std + Dataset.std + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def var( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``var`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").var() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 0.0 0.0 1.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").var(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.0 + + See Also + -------- + numpy.var + Dataset.var + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``median`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").median() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 2.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").median(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 2.0 + + See Also + -------- + numpy.median + Dataset.median + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + +class DatasetResampleReductions: + __slots__ = () + + def count( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``count`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").count() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) int64 1 3 1 + + See Also + -------- + numpy.count + Dataset.count + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.count, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def all( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``all`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").all() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool True True False + + See Also + -------- + numpy.all + Dataset.all + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.array_all, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def any( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``any`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").any() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool True True True + + See Also + -------- + numpy.any + Dataset.any + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.array_any, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def max( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``max`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").max() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 3.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3M").max(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 3.0 nan + + See Also + -------- + numpy.max + Dataset.max + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def min( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``min`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").min() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 1.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3M").min(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 1.0 nan + + See Also + -------- + numpy.min + Dataset.min + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def mean( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``mean`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").mean() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 2.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3M").mean(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 2.0 nan + + See Also + -------- + numpy.mean + Dataset.mean + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def prod( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``prod`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").prod() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 6.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3M").prod(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 6.0 nan + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.resample(time="3M").prod(skipna=True, min_count=2) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 6.0 nan + + See Also + -------- + numpy.prod + Dataset.prod + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def sum( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``sum`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").sum() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 6.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3M").sum(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 6.0 nan + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.resample(time="3M").sum(skipna=True, min_count=2) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 6.0 nan + + See Also + -------- + numpy.sum + Dataset.sum + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def std( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``std`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").std() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.8165 0.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3M").std(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.8165 nan + + See Also + -------- + numpy.std + Dataset.std + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def var( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``var`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").var() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.6667 0.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3M").var(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.6667 nan + + See Also + -------- + numpy.var + Dataset.var + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``median`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").median() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 2.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3M").median(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 2.0 nan + + See Also + -------- + numpy.median + Dataset.median + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + +class DataArrayReduce(Protocol): + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> T_DataArray: + ... + + +class DataArrayGroupByReductions: + __slots__ = () + + def count( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``count`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").count() + + array([1, 2, 2]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + numpy.count + DataArray.count + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.count, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def all( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``all`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").all() + + array([False, True, True]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + numpy.all + DataArray.all + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.array_all, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def any( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``any`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").any() + + array([ True, True, True]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + numpy.any + DataArray.any + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.array_any, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def max( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``max`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").max() + + array([1., 2., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.groupby("labels").max(skipna=False) + + array([nan, 2., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + numpy.max + DataArray.max + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def min( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``min`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").min() + + array([1., 2., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.groupby("labels").min(skipna=False) + + array([nan, 2., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + numpy.min + DataArray.min + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def mean( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``mean`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").mean() + + array([1., 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.groupby("labels").mean(skipna=False) + + array([nan, 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + numpy.mean + DataArray.mean + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def prod( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``prod`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").prod() + + array([1., 4., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.groupby("labels").prod(skipna=False) + + array([nan, 4., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.groupby("labels").prod(skipna=True, min_count=2) + + array([nan, 4., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + numpy.prod + DataArray.prod + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def sum( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``sum`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").sum() + + array([1., 4., 4.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.groupby("labels").sum(skipna=False) + + array([nan, 4., 4.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.groupby("labels").sum(skipna=True, min_count=2) + + array([nan, 4., 4.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + numpy.sum + DataArray.sum + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def std( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``std`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").std() + + array([0., 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.groupby("labels").std(skipna=False) + + array([nan, 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + numpy.std + DataArray.std + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def var( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``var`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").var() + + array([0., 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.groupby("labels").var(skipna=False) + + array([nan, 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + numpy.var + DataArray.var + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``median`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").median() + + array([1., 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.groupby("labels").median(skipna=False) + + array([nan, 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + numpy.median + DataArray.median + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + +class DataArrayResampleReductions: + __slots__ = () + + def count( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``count`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").count() + + array([1, 3, 1]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + numpy.count + DataArray.count + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.count, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def all( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``all`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").all() + + array([ True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + numpy.all + DataArray.all + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.array_all, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def any( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``any`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").any() + + array([ True, True, True]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + numpy.any + DataArray.any + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.array_any, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def max( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``max`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").max() + + array([1., 3., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.resample(time="3M").max(skipna=False) + + array([ 1., 3., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + numpy.max + DataArray.max + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def min( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``min`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").min() + + array([1., 1., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.resample(time="3M").min(skipna=False) + + array([ 1., 1., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + numpy.min + DataArray.min + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def mean( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``mean`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").mean() + + array([1., 2., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.resample(time="3M").mean(skipna=False) + + array([ 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + numpy.mean + DataArray.mean + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def prod( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``prod`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").prod() + + array([1., 6., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.resample(time="3M").prod(skipna=False) + + array([ 1., 6., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.resample(time="3M").prod(skipna=True, min_count=2) + + array([nan, 6., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + numpy.prod + DataArray.prod + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def sum( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``sum`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").sum() + + array([1., 6., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.resample(time="3M").sum(skipna=False) + + array([ 1., 6., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.resample(time="3M").sum(skipna=True, min_count=2) + + array([nan, 6., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + numpy.sum + DataArray.sum + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def std( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``std`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").std() + + array([0. , 0.81649658, 0. ]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.resample(time="3M").std(skipna=False) + + array([0. , 0.81649658, nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + numpy.std + DataArray.std + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def var( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``var`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").var() + + array([0. , 0.66666667, 0. ]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.resample(time="3M").var(skipna=False) + + array([0. , 0.66666667, nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + numpy.var + DataArray.var + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``median`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").median() + + array([1., 2., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.resample(time="3M").median(skipna=False) + + array([ 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + numpy.median + DataArray.median + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index 27ec5ab8dd9..814e9a59877 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -128,8 +128,6 @@ class DataArrayArithmetic( class DataArrayGroupbyArithmetic( - ImplementsArrayReduce, - IncludeReduceMethods, SupportsArithmetic, DataArrayGroupByOpsMixin, ): @@ -137,8 +135,6 @@ class DataArrayGroupbyArithmetic( class DatasetGroupbyArithmetic( - ImplementsDatasetReduce, - IncludeReduceMethods, SupportsArithmetic, DatasetGroupByOpsMixin, ): diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 1ca5de965d0..185b4ae5bec 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -5,6 +5,7 @@ import pandas as pd from . import dtypes, duck_array_ops, nputils, ops +from ._reductions import DataArrayGroupByReductions, DatasetGroupByReductions from .arithmetic import DataArrayGroupbyArithmetic, DatasetGroupbyArithmetic from .concat import concat from .formatting import format_array_flat @@ -712,7 +713,7 @@ def _maybe_reorder(xarray_obj, dim, positions): return xarray_obj[{dim: order}] -class DataArrayGroupBy(GroupBy, DataArrayGroupbyArithmetic): +class DataArrayGroupByBase(GroupBy, DataArrayGroupbyArithmetic): """GroupBy object specialized to grouping DataArray objects""" __slots__ = () @@ -877,7 +878,11 @@ def reduce_array(ar): return self.map(reduce_array, shortcut=shortcut) -class DatasetGroupBy(GroupBy, DatasetGroupbyArithmetic): +class DataArrayGroupBy(DataArrayGroupByBase, DataArrayGroupByReductions): + __slots__ = () + + +class DatasetGroupByBase(GroupBy, DatasetGroupbyArithmetic): __slots__ = () @@ -994,3 +999,7 @@ def assign(self, **kwargs): Dataset.assign """ return self.map(lambda ds: ds.assign(**kwargs)) + + +class DatasetGroupBy(DatasetGroupByBase, DatasetGroupByReductions): + __slots__ = () diff --git a/xarray/core/resample.py b/xarray/core/resample.py index c7749a7e5ca..e2f599e8b4e 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -1,6 +1,7 @@ import warnings -from .groupby import DataArrayGroupBy, DatasetGroupBy +from ._reductions import DataArrayResampleReductions, DatasetResampleReductions +from .groupby import DataArrayGroupByBase, DatasetGroupByBase RESAMPLE_DIM = "__resample_dim__" @@ -156,7 +157,7 @@ def _interpolate(self, kind="linear"): ) -class DataArrayResample(DataArrayGroupBy, Resample): +class DataArrayResample(DataArrayResampleReductions, DataArrayGroupByBase, Resample): """DataArrayGroupBy object specialized to time resampling operations over a specified dimension """ @@ -247,7 +248,7 @@ def apply(self, func, args=(), shortcut=None, **kwargs): return self.map(func=func, shortcut=shortcut, args=args, **kwargs) -class DatasetResample(DatasetGroupBy, Resample): +class DatasetResample(DatasetResampleReductions, DatasetGroupByBase, Resample): """DatasetGroupBy object specialized to resampling a specified dimension""" def __init__(self, *args, dim=None, resample_dim=None, **kwargs): diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py new file mode 100644 index 00000000000..72449195d1e --- /dev/null +++ b/xarray/util/generate_reductions.py @@ -0,0 +1,296 @@ +"""Generate module and stub file for arithmetic operators of various xarray classes. + +For internal xarray development use only. + +Usage: + python xarray/util/generate_reductions.py > xarray/core/_reductions.py + pytest --doctest-modules xarray/core/_reductions.py --accept || true + pytest --doctest-modules xarray/core/_reductions.py --accept + +This requires [pytest-accept](https://github.com/max-sixty/pytest-accept). +The second run of pytest is deliberate, since the first will return an error +while replacing the doctests. + +""" + +import collections +import textwrap +from functools import partial +from typing import Callable, Optional + +MODULE_PREAMBLE = '''\ +"""Mixin classes with reduction operations.""" +# This file was generated using xarray.util.generate_reductions. Do not edit manually. + +import sys +from typing import Any, Callable, Hashable, Optional, Sequence, Union + +from . import duck_array_ops +from .types import T_DataArray, T_Dataset + +if sys.version_info >= (3, 8): + from typing import Protocol +else: + from typing_extensions import Protocol''' + +OBJ_PREAMBLE = """ + +class {obj}Reduce(Protocol): + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> T_{obj}: + ...""" + + +CLASS_PREAMBLE = """ + +class {obj}{cls}Reductions: + __slots__ = ()""" + +_SKIPNA_DOCSTRING = """ +skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64).""" + +_MINCOUNT_DOCSTRING = """ +min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array.""" + + +BOOL_REDUCE_METHODS = ["all", "any"] +NAN_REDUCE_METHODS = [ + "max", + "min", + "mean", + "prod", + "sum", + "std", + "var", + "median", +] +NAN_CUM_METHODS = ["cumsum", "cumprod"] +MIN_COUNT_METHODS = ["prod", "sum"] +NUMERIC_ONLY_METHODS = [ + "mean", + "std", + "var", + "sum", + "prod", + "median", + "cumsum", + "cumprod", +] + +TEMPLATE_REDUCTION = ''' + def {method}( + self: {obj}Reduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None,{skip_na.kwarg}{min_count.kwarg} + keep_attrs: bool = None, + **kwargs, + ) -> T_{obj}: + """ + Reduce this {obj}'s data by applying ``{method}`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``{method}``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. {extra_dim}{extra_args}{skip_na.docs}{min_count.docs} + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``{method}`` on this object's data. + + Returns + ------- + reduced : {obj} + New {obj} with ``{method}`` applied to its data and the + indicated dimension(s) removed + + Examples + --------{example} + + See Also + -------- + numpy.{method} + {obj}.{method} + :ref:`{docref}` + User guide on {docref} operations. + """ + return self.reduce( + duck_array_ops.{array_method}, + dim=dim,{skip_na.call}{min_count.call}{numeric_only_call} + keep_attrs=keep_attrs, + **kwargs, + )''' + + +def generate_groupby_example(obj: str, cls: str, method: str): + """Generate examples for method.""" + dx = "ds" if obj == "Dataset" else "da" + if cls == "Resample": + calculation = f'{dx}.resample(time="3M").{method}' + elif cls == "GroupBy": + calculation = f'{dx}.groupby("labels").{method}' + else: + raise ValueError + + if method in BOOL_REDUCE_METHODS: + np_array = """ + ... np.array([True, True, True, True, True, False], dtype=bool),""" + + else: + np_array = """ + ... np.array([1, 2, 3, 1, 2, np.nan]),""" + + create_da = f""" + >>> da = xr.DataArray({np_array} + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... )""" + + if obj == "Dataset": + maybe_dataset = """ + >>> ds = xr.Dataset(dict(da=da)) + >>> ds""" + else: + maybe_dataset = """ + >>> da""" + + if method in NAN_REDUCE_METHODS: + maybe_skipna = f""" + + Use ``skipna`` to control whether NaNs are ignored. + + >>> {calculation}(skipna=False)""" + else: + maybe_skipna = "" + + if method in MIN_COUNT_METHODS: + maybe_mincount = f""" + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> {calculation}(skipna=True, min_count=2)""" + else: + maybe_mincount = "" + + return f"""{create_da}{maybe_dataset} + + >>> {calculation}(){maybe_skipna}{maybe_mincount}""" + + +def generate_method( + obj: str, + docref: str, + method: str, + skipna: bool, + example_generator: Callable, + array_method: Optional[str] = None, +): + if not array_method: + array_method = method + + if obj == "Dataset": + if method in NUMERIC_ONLY_METHODS: + numeric_only_call = "\n numeric_only=True," + else: + numeric_only_call = "\n numeric_only=False," + else: + numeric_only_call = "" + + kwarg = collections.namedtuple("kwarg", "docs kwarg call") + if skipna: + skip_na = kwarg( + docs=textwrap.indent(_SKIPNA_DOCSTRING, " "), + kwarg="\n skipna: bool = True,", + call="\n skipna=skipna,", + ) + else: + skip_na = kwarg(docs="", kwarg="", call="") + + if method in MIN_COUNT_METHODS: + min_count = kwarg( + docs=textwrap.indent(_MINCOUNT_DOCSTRING, " "), + kwarg="\n min_count: Optional[int] = None,", + call="\n min_count=min_count,", + ) + else: + min_count = kwarg(docs="", kwarg="", call="") + + return TEMPLATE_REDUCTION.format( + obj=obj, + docref=docref, + method=method, + array_method=array_method, + extra_dim="""If ``None``, will reduce over all dimensions + present in the grouped variable.""", + extra_args="", + skip_na=skip_na, + min_count=min_count, + numeric_only_call=numeric_only_call, + example=example_generator(obj=obj, method=method), + ) + + +def render(obj: str, cls: str, docref: str, example_generator: Callable): + yield CLASS_PREAMBLE.format(obj=obj, cls=cls) + yield generate_method( + obj, + method="count", + docref=docref, + skipna=False, + example_generator=example_generator, + ) + for method in BOOL_REDUCE_METHODS: + yield generate_method( + obj, + method=method, + docref=docref, + skipna=False, + array_method=f"array_{method}", + example_generator=example_generator, + ) + for method in NAN_REDUCE_METHODS: + yield generate_method( + obj, + method=method, + docref=docref, + skipna=True, + example_generator=example_generator, + ) + + +if __name__ == "__main__": + print(MODULE_PREAMBLE) + for obj in ["Dataset", "DataArray"]: + print(OBJ_PREAMBLE.format(obj=obj)) + for cls, docref in ( + ("GroupBy", "groupby"), + ("Resample", "resampling"), + ): + for line in render( + obj=obj, + cls=cls, + docref=docref, + example_generator=partial(generate_groupby_example, cls=cls), + ): + print(line)