diff --git a/.deepsource.toml b/.deepsource.toml new file mode 100644 index 00000000000..e37b41de303 --- /dev/null +++ b/.deepsource.toml @@ -0,0 +1,18 @@ +version = 1 + +test_patterns = [ + "*/tests/**", + "*/test_*.py" +] + +exclude_patterns = [ + "doc/**", + "ci/**" +] + +[[analyzers]] +name = "python" +enabled = true + + [analyzers.meta] + runtime_version = "3.x.x" \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md new file mode 100644 index 00000000000..02bc5d0f7b0 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -0,0 +1,39 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + + + +**What happened**: + +**What you expected to happen**: + +**Minimal Complete Verifiable Example**: + +```python +# Put your MCVE code here +``` + +**Anything else we need to know?**: + +**Environment**: + +
Output of xr.show_versions() + + + + +
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index c712cf27979..00000000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -name: Bug report / Feature request -about: 'Post a problem or idea' -title: '' -labels: '' -assignees: '' - ---- - - - - -#### MCVE Code Sample - - -```python -# Your code here - -``` - -#### Expected Output - - -#### Problem Description - - - -#### Versions - -
Output of xr.show_versions() - - - - -
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000000..3389fbfe071 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: true +contact_links: + - name: General Question + url: https://stackoverflow.com/questions/tagged/python-xarray + about: "If you have a question like *How do I append to an xarray.Dataset?* then please ask on Stack Overflow using the #python-xarray tag." diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md new file mode 100644 index 00000000000..7021fe490aa --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -0,0 +1,22 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + + + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context about the feature request here. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index a921bddaa23..c9c0b720c35 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -3,4 +3,5 @@ - [ ] Closes #xxxx - [ ] Tests added - [ ] Passes `isort -rc . && black . && mypy . && flake8` - - [ ] Fully documented, including `whats-new.rst` for all changes and `api.rst` for new API + - [ ] User visible changes (including notable bug fixes) are documented in `whats-new.rst` + - [ ] New functions/methods are listed in `api.rst` diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 26bf4803ef6..447f0007fc2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,12 +11,16 @@ repos: rev: stable hooks: - id: black + - repo: https://github.com/keewis/blackdoc + rev: stable + hooks: + - id: blackdoc - repo: https://gitlab.com/pycqa/flake8 rev: 3.7.9 hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.761 # Must match ci/requirements/*.yml + rev: v0.780 # Must match ci/requirements/*.yml hooks: - id: mypy # run this occasionally, ref discussion https://github.com/pydata/xarray/pull/3194 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000000..7a909aefd08 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1 @@ +Xarray's contributor guidelines [can be found in our online documentation](http://xarray.pydata.org/en/stable/contributing.html) diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md index 3fdd1d7236d..c890d61d966 100644 --- a/HOW_TO_RELEASE.md +++ b/HOW_TO_RELEASE.md @@ -1,4 +1,4 @@ -How to issue an xarray release in 16 easy steps +# How to issue an xarray release in 17 easy steps Time required: about an hour. @@ -6,7 +6,16 @@ Time required: about an hour. ``` git pull upstream master ``` - 2. Look over whats-new.rst and the docs. Make sure "What's New" is complete + 2. Get a list of contributors with: + ``` + git log "$(git tag --sort="v:refname" | sed -n 'x;$p').." --format=%aN | sort -u | perl -pe 's/\n/$1, /' + ``` + or by substituting the _previous_ release in: + ``` + git log v0.X.Y-1.. --format=%aN | sort -u | perl -pe 's/\n/$1, /' + ``` + Add these into `whats-new.rst` somewhere :) + 3. Look over whats-new.rst and the docs. Make sure "What's New" is complete (check the date!) and consider adding a brief summary note describing the release at the top. Things to watch out for: @@ -16,41 +25,41 @@ Time required: about an hour. due to a bad merge. Check for these before a release by using git diff, e.g., `git diff v0.X.Y whats-new.rst` where 0.X.Y is the previous release. - 3. If you have any doubts, run the full test suite one final time! + 4. If you have any doubts, run the full test suite one final time! ``` pytest ``` - 4. Check that the ReadTheDocs build is passing. - 5. On the master branch, commit the release in git: + 5. Check that the ReadTheDocs build is passing. + 6. On the master branch, commit the release in git: ``` git commit -am 'Release v0.X.Y' ``` - 6. Tag the release: + 7. Tag the release: ``` git tag -a v0.X.Y -m 'v0.X.Y' ``` - 7. Build source and binary wheels for pypi: + 8. Build source and binary wheels for pypi: ``` git clean -xdf # this deletes all uncommited changes! python setup.py bdist_wheel sdist ``` - 8. Use twine to check the package build: + 9. Use twine to check the package build: ``` twine check dist/xarray-0.X.Y* ``` - 9. Use twine to register and upload the release on pypi. Be careful, you can't +10. Use twine to register and upload the release on pypi. Be careful, you can't take this back! ``` twine upload dist/xarray-0.X.Y* ``` You will need to be listed as a package owner at https://pypi.python.org/pypi/xarray for this to work. -10. Push your changes to master: +11. Push your changes to master: ``` git push upstream master git push upstream --tags ``` -11. Update the stable branch (used by ReadTheDocs) and switch back to master: +12. Update the stable branch (used by ReadTheDocs) and switch back to master: ``` git checkout stable git rebase master @@ -60,7 +69,7 @@ Time required: about an hour. It's OK to force push to 'stable' if necessary. (We also update the stable branch with `git cherrypick` for documentation only fixes that apply the current released version.) -12. Add a section for the next release (v.X.Y+1) to doc/whats-new.rst: +13. Add a section for the next release (v.X.Y+1) to doc/whats-new.rst: ``` .. _whats-new.0.X.Y+1: @@ -86,19 +95,19 @@ Time required: about an hour. Internal Changes ~~~~~~~~~~~~~~~~ ``` -13. Commit your changes and push to master again: +14. Commit your changes and push to master again: ``` git commit -am 'New whatsnew section' git push upstream master ``` You're done pushing to master! -14. Issue the release on GitHub. Click on "Draft a new release" at +15. Issue the release on GitHub. Click on "Draft a new release" at https://github.com/pydata/xarray/releases. Type in the version number, but don't bother to describe it -- we maintain that on the docs instead. -15. Update the docs. Login to https://readthedocs.org/projects/xray/versions/ +16. Update the docs. Login to https://readthedocs.org/projects/xray/versions/ and switch your new release tag (at the bottom) from "Inactive" to "Active". It should now build automatically. -16. Issue the release announcement! For bug fix releases, I usually only email +17. Issue the release announcement! For bug fix releases, I usually only email xarray@googlegroups.com. For major/feature releases, I will email a broader list (no more than once every 3-6 months): - pydata@googlegroups.com @@ -109,18 +118,8 @@ Time required: about an hour. Google search will turn up examples of prior release announcements (look for "ANN xarray"). - You can get a list of contributors with: - ``` - git log "$(git tag --sort="v:refname" | sed -n 'x;$p').." --format="%aN" | sort -u - ``` - or by substituting the _previous_ release in: - ``` - git log v0.X.Y-1.. --format="%aN" | sort -u - ``` - NB: copying this output into a Google Groups form can cause - [issues](https://groups.google.com/forum/#!topic/xarray/hK158wAviPs) with line breaks, so take care -Note on version numbering: +## Note on version numbering We follow a rough approximation of semantic version. Only major releases (0.X.0) should include breaking changes. Minor releases (0.X.Y) are for bug fixes and diff --git a/azure-pipelines.yml b/azure-pipelines.yml index ff85501c555..e04c8f74f68 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -108,21 +108,3 @@ jobs: python ci/min_deps_check.py ci/requirements/py36-bare-minimum.yml python ci/min_deps_check.py ci/requirements/py36-min-all-deps.yml displayName: minimum versions policy - -- job: Docs - pool: - vmImage: 'ubuntu-16.04' - steps: - - template: ci/azure/install.yml - parameters: - env_file: ci/requirements/doc.yml - - bash: | - source activate xarray-tests - # Replicate the exact environment created by the readthedocs CI - conda install --yes --quiet -c pkgs/main mock pillow sphinx sphinx_rtd_theme - displayName: Replicate readthedocs CI environment - - bash: | - source activate xarray-tests - cd doc - sphinx-build -W --keep-going -j auto -b html -d _build/doctrees . _build/html - displayName: Build HTML docs diff --git a/ci/azure/install.yml b/ci/azure/install.yml index 60559dd2064..83895eebe01 100644 --- a/ci/azure/install.yml +++ b/ci/azure/install.yml @@ -10,16 +10,37 @@ steps: conda env create -n xarray-tests --file ${{ parameters.env_file }} displayName: Install conda dependencies +# TODO: add sparse back in, once Numba works with the development version of +# NumPy again: https://github.com/pydata/xarray/issues/4146 - bash: | source activate xarray-tests + conda uninstall -y --force \ + numpy \ + scipy \ + pandas \ + matplotlib \ + dask \ + distributed \ + zarr \ + cftime \ + rasterio \ + pint \ + bottleneck \ + sparse python -m pip install \ - -f https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com \ + -i https://pypi.anaconda.org/scipy-wheels-nightly/simple \ --no-deps \ --pre \ --upgrade \ - matplotlib \ numpy \ - scipy + scipy \ + pandas + python -m pip install \ + -f https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com \ + --no-deps \ + --pre \ + --upgrade \ + matplotlib python -m pip install \ --no-deps \ --upgrade \ @@ -29,8 +50,7 @@ steps: git+https://github.com/Unidata/cftime \ git+https://github.com/mapbox/rasterio \ git+https://github.com/hgrecco/pint \ - git+https://github.com/pydata/bottleneck \ - git+https://github.com/pandas-dev/pandas + git+https://github.com/pydata/bottleneck condition: eq(variables['UPSTREAM_DEV'], 'true') displayName: Install upstream dev dependencies diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index 2987303c92a..6caebc46cdf 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -17,11 +17,10 @@ dependencies: - netcdf4>=1.5 - numba - numpy>=1.17 - - numpydoc - pandas>=1.0 - rasterio>=1.1 - seaborn - setuptools - sphinx>=2.3 - sphinx_rtd_theme>=0.4 - - zarr>=2.4 \ No newline at end of file + - zarr>=2.4 diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml index 86540197dcc..a72cd000680 100644 --- a/ci/requirements/py36-min-all-deps.yml +++ b/ci/requirements/py36-min-all-deps.yml @@ -15,8 +15,8 @@ dependencies: - cfgrib=0.9 - cftime=1.0 - coveralls - - dask=2.2 - - distributed=2.2 + - dask=2.5 + - distributed=2.5 - flake8 - h5netcdf=0.7 - h5py=2.9 # Policy allows for 2.10, but it's a conflict-fest diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml index a5eded49cd4..cd2b1a18c77 100644 --- a/ci/requirements/py36-min-nep18.yml +++ b/ci/requirements/py36-min-nep18.yml @@ -6,12 +6,11 @@ dependencies: # require drastically newer packages than everything else - python=3.6 - coveralls - - dask=2.4 - - distributed=2.4 + - dask=2.5 + - distributed=2.5 - msgpack-python=0.6 # remove once distributed is bumped. distributed GH3491 - numpy=1.17 - pandas=0.25 - - pint=0.11 - pip - pytest - pytest-cov @@ -19,3 +18,5 @@ dependencies: - scipy=1.2 - setuptools=41.2 - sparse=0.8 + - pip: + - pint==0.13 diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml index a500173f277..aa2baf9dcce 100644 --- a/ci/requirements/py36.yml +++ b/ci/requirements/py36.yml @@ -28,7 +28,6 @@ dependencies: - numba - numpy - pandas - - pint - pip - pseudonetcdf - pydap @@ -45,3 +44,4 @@ dependencies: - zarr - pip: - numbagg + - pint diff --git a/ci/requirements/py37-windows.yml b/ci/requirements/py37-windows.yml index e9e5c7a900a..8b12704d644 100644 --- a/ci/requirements/py37-windows.yml +++ b/ci/requirements/py37-windows.yml @@ -28,7 +28,6 @@ dependencies: - numba - numpy - pandas - - pint - pip - pseudonetcdf - pydap @@ -45,3 +44,4 @@ dependencies: - zarr - pip: - numbagg + - pint diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml index dba3926596e..70c453e8776 100644 --- a/ci/requirements/py37.yml +++ b/ci/requirements/py37.yml @@ -28,7 +28,6 @@ dependencies: - numba - numpy - pandas - - pint - pip - pseudonetcdf - pydap @@ -45,3 +44,4 @@ dependencies: - zarr - pip: - numbagg + - pint diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml index a375d9e1e5a..6d76eecbd6a 100644 --- a/ci/requirements/py38-all-but-dask.yml +++ b/ci/requirements/py38-all-but-dask.yml @@ -25,7 +25,6 @@ dependencies: - numba - numpy - pandas - - pint - pip - pseudonetcdf - pydap @@ -42,3 +41,4 @@ dependencies: - zarr - pip: - numbagg + - pint diff --git a/ci/requirements/py38.yml b/ci/requirements/py38.yml index 24602f884e9..6f35138978c 100644 --- a/ci/requirements/py38.yml +++ b/ci/requirements/py38.yml @@ -22,13 +22,12 @@ dependencies: - isort - lxml # Optional dep of pydap - matplotlib - - mypy=0.761 # Must match .pre-commit-config.yaml + - mypy=0.780 # Must match .pre-commit-config.yaml - nc-time-axis - netcdf4 - numba - numpy - pandas - - pint - pip - pseudonetcdf - pydap @@ -45,3 +44,4 @@ dependencies: - zarr - pip: - numbagg + - pint diff --git a/doc/_templates/autosummary/accessor.rst b/doc/_templates/autosummary/accessor.rst new file mode 100644 index 00000000000..4ba745cd6fd --- /dev/null +++ b/doc/_templates/autosummary/accessor.rst @@ -0,0 +1,6 @@ +{{ fullname }} +{{ underline }} + +.. currentmodule:: {{ module.split('.')[0] }} + +.. autoaccessor:: {{ (module.split('.')[1:] + [objname]) | join('.') }} diff --git a/doc/_templates/autosummary/accessor_attribute.rst b/doc/_templates/autosummary/accessor_attribute.rst new file mode 100644 index 00000000000..b5ad65d6a73 --- /dev/null +++ b/doc/_templates/autosummary/accessor_attribute.rst @@ -0,0 +1,6 @@ +{{ fullname }} +{{ underline }} + +.. currentmodule:: {{ module.split('.')[0] }} + +.. autoaccessorattribute:: {{ (module.split('.')[1:] + [objname]) | join('.') }} diff --git a/doc/_templates/autosummary/accessor_callable.rst b/doc/_templates/autosummary/accessor_callable.rst new file mode 100644 index 00000000000..7a3301814f5 --- /dev/null +++ b/doc/_templates/autosummary/accessor_callable.rst @@ -0,0 +1,6 @@ +{{ fullname }} +{{ underline }} + +.. currentmodule:: {{ module.split('.')[0] }} + +.. autoaccessorcallable:: {{ (module.split('.')[1:] + [objname]) | join('.') }}.__call__ diff --git a/doc/_templates/autosummary/accessor_method.rst b/doc/_templates/autosummary/accessor_method.rst new file mode 100644 index 00000000000..aefbba6ef1b --- /dev/null +++ b/doc/_templates/autosummary/accessor_method.rst @@ -0,0 +1,6 @@ +{{ fullname }} +{{ underline }} + +.. currentmodule:: {{ module.split('.')[0] }} + +.. autoaccessormethod:: {{ (module.split('.')[1:] + [objname]) | join('.') }} diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 313428c29d2..efef4259b74 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -9,8 +9,6 @@ .. autosummary:: :toctree: generated/ - auto_combine - Dataset.nbytes Dataset.chunks @@ -43,8 +41,6 @@ core.rolling.DatasetCoarsen.all core.rolling.DatasetCoarsen.any - core.rolling.DatasetCoarsen.argmax - core.rolling.DatasetCoarsen.argmin core.rolling.DatasetCoarsen.count core.rolling.DatasetCoarsen.max core.rolling.DatasetCoarsen.mean @@ -70,8 +66,6 @@ core.groupby.DatasetGroupBy.where core.groupby.DatasetGroupBy.all core.groupby.DatasetGroupBy.any - core.groupby.DatasetGroupBy.argmax - core.groupby.DatasetGroupBy.argmin core.groupby.DatasetGroupBy.count core.groupby.DatasetGroupBy.max core.groupby.DatasetGroupBy.mean @@ -87,8 +81,6 @@ core.resample.DatasetResample.all core.resample.DatasetResample.any core.resample.DatasetResample.apply - core.resample.DatasetResample.argmax - core.resample.DatasetResample.argmin core.resample.DatasetResample.assign core.resample.DatasetResample.assign_coords core.resample.DatasetResample.bfill @@ -112,8 +104,6 @@ core.resample.DatasetResample.dims core.resample.DatasetResample.groups - core.rolling.DatasetRolling.argmax - core.rolling.DatasetRolling.argmin core.rolling.DatasetRolling.count core.rolling.DatasetRolling.max core.rolling.DatasetRolling.mean @@ -187,8 +177,6 @@ core.rolling.DataArrayCoarsen.all core.rolling.DataArrayCoarsen.any - core.rolling.DataArrayCoarsen.argmax - core.rolling.DataArrayCoarsen.argmin core.rolling.DataArrayCoarsen.count core.rolling.DataArrayCoarsen.max core.rolling.DataArrayCoarsen.mean @@ -213,8 +201,6 @@ core.groupby.DataArrayGroupBy.where core.groupby.DataArrayGroupBy.all core.groupby.DataArrayGroupBy.any - core.groupby.DataArrayGroupBy.argmax - core.groupby.DataArrayGroupBy.argmin core.groupby.DataArrayGroupBy.count core.groupby.DataArrayGroupBy.max core.groupby.DataArrayGroupBy.mean @@ -230,8 +216,6 @@ core.resample.DataArrayResample.all core.resample.DataArrayResample.any core.resample.DataArrayResample.apply - core.resample.DataArrayResample.argmax - core.resample.DataArrayResample.argmin core.resample.DataArrayResample.assign_coords core.resample.DataArrayResample.bfill core.resample.DataArrayResample.count @@ -254,8 +238,6 @@ core.resample.DataArrayResample.dims core.resample.DataArrayResample.groups - core.rolling.DataArrayRolling.argmax - core.rolling.DataArrayRolling.argmin core.rolling.DataArrayRolling.count core.rolling.DataArrayRolling.max core.rolling.DataArrayRolling.mean @@ -425,8 +407,6 @@ IndexVariable.all IndexVariable.any - IndexVariable.argmax - IndexVariable.argmin IndexVariable.argsort IndexVariable.astype IndexVariable.broadcast_equals @@ -566,8 +546,6 @@ CFTimeIndex.all CFTimeIndex.any CFTimeIndex.append - CFTimeIndex.argmax - CFTimeIndex.argmin CFTimeIndex.argsort CFTimeIndex.asof CFTimeIndex.asof_locs diff --git a/doc/api.rst b/doc/api.rst index b37c84e7a81..603e3e8f6cf 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -21,14 +21,16 @@ Top-level functions broadcast concat merge - auto_combine combine_by_coords combine_nested where set_options + infer_freq full_like zeros_like ones_like + cov + corr dot polyval map_blocks @@ -173,6 +175,7 @@ Computation Dataset.quantile Dataset.differentiate Dataset.integrate + Dataset.map_blocks Dataset.polyfit **Aggregation**: @@ -229,6 +232,15 @@ Reshaping and reorganizing Dataset.sortby Dataset.broadcast_like +Plotting +-------- + +.. autosummary:: + :toctree: generated/ + :template: autosummary/accessor_method.rst + + Dataset.plot.scatter + DataArray ========= @@ -358,6 +370,8 @@ Computation DataArray.integrate DataArray.polyfit DataArray.str + DataArray.map_blocks + **Aggregation**: :py:attr:`~DataArray.all` @@ -397,6 +411,122 @@ Computation :py:attr:`~core.groupby.DataArrayGroupBy.where` :py:attr:`~core.groupby.DataArrayGroupBy.quantile` + +String manipulation +------------------- + +.. autosummary:: + :toctree: generated/ + :template: autosummary/accessor_method.rst + + DataArray.str.capitalize + DataArray.str.center + DataArray.str.contains + DataArray.str.count + DataArray.str.decode + DataArray.str.encode + DataArray.str.endswith + DataArray.str.find + DataArray.str.get + DataArray.str.index + DataArray.str.isalnum + DataArray.str.isalpha + DataArray.str.isdecimal + DataArray.str.isdigit + DataArray.str.isnumeric + DataArray.str.isspace + DataArray.str.istitle + DataArray.str.isupper + DataArray.str.len + DataArray.str.ljust + DataArray.str.lower + DataArray.str.lstrip + DataArray.str.match + DataArray.str.pad + DataArray.str.repeat + DataArray.str.replace + DataArray.str.rfind + DataArray.str.rindex + DataArray.str.rjust + DataArray.str.rstrip + DataArray.str.slice + DataArray.str.slice_replace + DataArray.str.startswith + DataArray.str.strip + DataArray.str.swapcase + DataArray.str.title + DataArray.str.translate + DataArray.str.upper + DataArray.str.wrap + DataArray.str.zfill + +Datetimelike properties +----------------------- + +**Datetime properties**: + +.. autosummary:: + :toctree: generated/ + :template: autosummary/accessor_attribute.rst + + DataArray.dt.year + DataArray.dt.month + DataArray.dt.day + DataArray.dt.hour + DataArray.dt.minute + DataArray.dt.second + DataArray.dt.microsecond + DataArray.dt.nanosecond + DataArray.dt.weekofyear + DataArray.dt.week + DataArray.dt.dayofweek + DataArray.dt.weekday + DataArray.dt.weekday_name + DataArray.dt.dayofyear + DataArray.dt.quarter + DataArray.dt.days_in_month + DataArray.dt.daysinmonth + DataArray.dt.season + DataArray.dt.time + DataArray.dt.is_month_start + DataArray.dt.is_month_end + DataArray.dt.is_quarter_end + DataArray.dt.is_year_start + DataArray.dt.is_leap_year + +**Datetime methods**: + +.. autosummary:: + :toctree: generated/ + :template: autosummary/accessor_method.rst + + DataArray.dt.floor + DataArray.dt.ceil + DataArray.dt.round + DataArray.dt.strftime + +**Timedelta properties**: + +.. autosummary:: + :toctree: generated/ + :template: autosummary/accessor_attribute.rst + + DataArray.dt.days + DataArray.dt.seconds + DataArray.dt.microseconds + DataArray.dt.nanoseconds + +**Timedelta methods**: + +.. autosummary:: + :toctree: generated/ + :template: autosummary/accessor_method.rst + + DataArray.dt.floor + DataArray.dt.ceil + DataArray.dt.round + + Reshaping and reorganizing -------------------------- @@ -413,6 +543,27 @@ Reshaping and reorganizing DataArray.sortby DataArray.broadcast_like +Plotting +-------- + +.. autosummary:: + :toctree: generated/ + :template: autosummary/accessor_callable.rst + + DataArray.plot + +.. autosummary:: + :toctree: generated/ + :template: autosummary/accessor_method.rst + + DataArray.plot.contourf + DataArray.plot.contour + DataArray.plot.hist + DataArray.plot.imshow + DataArray.plot.line + DataArray.plot.pcolormesh + DataArray.plot.step + .. _api.ufuncs: Universal functions @@ -518,7 +669,6 @@ Dataset methods Dataset.load Dataset.chunk Dataset.unify_chunks - Dataset.map_blocks Dataset.filter_by_attrs Dataset.info @@ -550,7 +700,6 @@ DataArray methods DataArray.load DataArray.chunk DataArray.unify_chunks - DataArray.map_blocks Coordinates objects =================== @@ -660,25 +809,6 @@ Creating custom indexes cftime_range -Plotting -======== - -.. autosummary:: - :toctree: generated/ - - Dataset.plot - plot.scatter - DataArray.plot - plot.plot - plot.contourf - plot.contour - plot.hist - plot.imshow - plot.line - plot.pcolormesh - plot.step - plot.FacetGrid - Faceting -------- .. autosummary:: diff --git a/doc/combining.rst b/doc/combining.rst index 05b7f2efc50..ffc6575c579 100644 --- a/doc/combining.rst +++ b/doc/combining.rst @@ -4,11 +4,12 @@ Combining data -------------- .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) * For combining datasets or data arrays along a single dimension, see concatenate_. @@ -28,11 +29,10 @@ that dimension: .. ipython:: python - arr = xr.DataArray(np.random.randn(2, 3), - [('x', ['a', 'b']), ('y', [10, 20, 30])]) + arr = xr.DataArray(np.random.randn(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])]) arr[:, :1] # this resembles how you would use np.concatenate - xr.concat([arr[:, :1], arr[:, 1:]], dim='y') + xr.concat([arr[:, :1], arr[:, 1:]], dim="y") In addition to combining along an existing dimension, ``concat`` can create a new dimension by stacking lower dimensional arrays together: @@ -41,7 +41,7 @@ new dimension by stacking lower dimensional arrays together: arr[0] # to combine these 1d arrays into a 2d array in numpy, you would use np.array - xr.concat([arr[0], arr[1]], 'x') + xr.concat([arr[0], arr[1]], "x") If the second argument to ``concat`` is a new dimension name, the arrays will be concatenated along that new dimension, which is always inserted as the first @@ -49,7 +49,7 @@ dimension: .. ipython:: python - xr.concat([arr[0], arr[1]], 'new_dim') + xr.concat([arr[0], arr[1]], "new_dim") The second argument to ``concat`` can also be an :py:class:`~pandas.Index` or :py:class:`~xarray.DataArray` object as well as a string, in which case it is @@ -57,14 +57,14 @@ used to label the values along the new dimension: .. ipython:: python - xr.concat([arr[0], arr[1]], pd.Index([-90, -100], name='new_dim')) + xr.concat([arr[0], arr[1]], pd.Index([-90, -100], name="new_dim")) Of course, ``concat`` also works on ``Dataset`` objects: .. ipython:: python - ds = arr.to_dataset(name='foo') - xr.concat([ds.sel(x='a'), ds.sel(x='b')], 'x') + ds = arr.to_dataset(name="foo") + xr.concat([ds.sel(x="a"), ds.sel(x="b")], "x") :py:func:`~xarray.concat` has a number of options which provide deeper control over which variables are concatenated and how it handles conflicting variables @@ -84,8 +84,8 @@ To combine variables and coordinates between multiple ``DataArray`` and/or .. ipython:: python - xr.merge([ds, ds.rename({'foo': 'bar'})]) - xr.merge([xr.DataArray(n, name='var%d' % n) for n in range(5)]) + xr.merge([ds, ds.rename({"foo": "bar"})]) + xr.merge([xr.DataArray(n, name="var%d" % n) for n in range(5)]) If you merge another dataset (or a dictionary including data array objects), by default the resulting dataset will be aligned on the **union** of all index @@ -93,7 +93,7 @@ coordinates: .. ipython:: python - other = xr.Dataset({'bar': ('x', [1, 2, 3, 4]), 'x': list('abcd')}) + other = xr.Dataset({"bar": ("x", [1, 2, 3, 4]), "x": list("abcd")}) xr.merge([ds, other]) This ensures that ``merge`` is non-destructive. ``xarray.MergeError`` is raised @@ -116,7 +116,7 @@ used in the :py:class:`~xarray.Dataset` constructor: .. ipython:: python - xr.Dataset({'a': arr[:-1], 'b': arr[1:]}) + xr.Dataset({"a": arr[:-1], "b": arr[1:]}) .. _combine: @@ -131,8 +131,8 @@ are filled with ``NaN``. For example: .. ipython:: python - ar0 = xr.DataArray([[0, 0], [0, 0]], [('x', ['a', 'b']), ('y', [-1, 0])]) - ar1 = xr.DataArray([[1, 1], [1, 1]], [('x', ['b', 'c']), ('y', [0, 1])]) + ar0 = xr.DataArray([[0, 0], [0, 0]], [("x", ["a", "b"]), ("y", [-1, 0])]) + ar1 = xr.DataArray([[1, 1], [1, 1]], [("x", ["b", "c"]), ("y", [0, 1])]) ar0.combine_first(ar1) ar1.combine_first(ar0) @@ -152,7 +152,7 @@ variables with new values: .. ipython:: python - ds.update({'space': ('space', [10.2, 9.4, 3.9])}) + ds.update({"space": ("space", [10.2, 9.4, 3.9])}) However, dimensions are still required to be consistent between different Dataset variables, so you cannot change the size of a dimension unless you @@ -170,7 +170,7 @@ syntax: .. ipython:: python - ds['baz'] = xr.DataArray([9, 9, 9, 9, 9], coords=[('x', list('abcde'))]) + ds["baz"] = xr.DataArray([9, 9, 9, 9, 9], coords=[("x", list("abcde"))]) ds.baz Equals and identical @@ -193,7 +193,7 @@ object: .. ipython:: python - arr.identical(arr.rename('bar')) + arr.identical(arr.rename("bar")) :py:attr:`~xarray.Dataset.broadcast_equals` does a more relaxed form of equality check that allows variables to have different dimensions, as long as values @@ -201,8 +201,8 @@ are constant along those new dimensions: .. ipython:: python - left = xr.Dataset(coords={'x': 0}) - right = xr.Dataset({'x': [0, 0, 0]}) + left = xr.Dataset(coords={"x": 0}) + right = xr.Dataset({"x": [0, 0, 0]}) left.broadcast_equals(right) Like pandas objects, two xarray objects are still equal or identical if they have @@ -231,9 +231,9 @@ coordinates as long as any non-missing values agree or are disjoint: .. ipython:: python - ds1 = xr.Dataset({'a': ('x', [10, 20, 30, np.nan])}, {'x': [1, 2, 3, 4]}) - ds2 = xr.Dataset({'a': ('x', [np.nan, 30, 40, 50])}, {'x': [2, 3, 4, 5]}) - xr.merge([ds1, ds2], compat='no_conflicts') + ds1 = xr.Dataset({"a": ("x", [10, 20, 30, np.nan])}, {"x": [1, 2, 3, 4]}) + ds2 = xr.Dataset({"a": ("x", [np.nan, 30, 40, 50])}, {"x": [2, 3, 4, 5]}) + xr.merge([ds1, ds2], compat="no_conflicts") Note that due to the underlying representation of missing values as floating point numbers (``NaN``), variable data type is not always preserved when merging @@ -273,10 +273,12 @@ datasets into a doubly-nested list, e.g: .. ipython:: python - arr = xr.DataArray(name='temperature', data=np.random.randint(5, size=(2, 2)), dims=['x', 'y']) + arr = xr.DataArray( + name="temperature", data=np.random.randint(5, size=(2, 2)), dims=["x", "y"] + ) arr ds_grid = [[arr, arr], [arr, arr]] - xr.combine_nested(ds_grid, concat_dim=['x', 'y']) + xr.combine_nested(ds_grid, concat_dim=["x", "y"]) :py:func:`~xarray.combine_nested` can also be used to explicitly merge datasets with different variables. For example if we have 4 datasets, which are divided @@ -286,10 +288,10 @@ we wish to use ``merge`` instead of ``concat``: .. ipython:: python - temp = xr.DataArray(name='temperature', data=np.random.randn(2), dims=['t']) - precip = xr.DataArray(name='precipitation', data=np.random.randn(2), dims=['t']) + temp = xr.DataArray(name="temperature", data=np.random.randn(2), dims=["t"]) + precip = xr.DataArray(name="precipitation", data=np.random.randn(2), dims=["t"]) ds_grid = [[temp, precip], [temp, precip]] - xr.combine_nested(ds_grid, concat_dim=['t', None]) + xr.combine_nested(ds_grid, concat_dim=["t", None]) :py:func:`~xarray.combine_by_coords` is for combining objects which have dimension coordinates which specify their relationship to and order relative to one @@ -302,8 +304,8 @@ coordinates, not on their position in the list passed to ``combine_by_coords``. .. ipython:: python :okwarning: - x1 = xr.DataArray(name='foo', data=np.random.randn(3), coords=[('x', [0, 1, 2])]) - x2 = xr.DataArray(name='foo', data=np.random.randn(3), coords=[('x', [3, 4, 5])]) + x1 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [0, 1, 2])]) + x2 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [3, 4, 5])]) xr.combine_by_coords([x2, x1]) These functions can be used by :py:func:`~xarray.open_mfdataset` to open many diff --git a/doc/computation.rst b/doc/computation.rst index 4b8014c4782..3660aed93ed 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -18,17 +18,19 @@ Arithmetic operations with a single DataArray automatically vectorize (like numpy) over all array values: .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) .. ipython:: python - arr = xr.DataArray(np.random.RandomState(0).randn(2, 3), - [('x', ['a', 'b']), ('y', [10, 20, 30])]) + arr = xr.DataArray( + np.random.RandomState(0).randn(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])] + ) arr - 3 abs(arr) @@ -45,7 +47,7 @@ Use :py:func:`~xarray.where` to conditionally switch between values: .. ipython:: python - xr.where(arr > 0, 'positive', 'negative') + xr.where(arr > 0, "positive", "negative") Use `@` to perform matrix multiplication: @@ -73,14 +75,14 @@ methods for working with missing data from pandas: .. ipython:: python - x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=['x']) + x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"]) x.isnull() x.notnull() x.count() - x.dropna(dim='x') + x.dropna(dim="x") x.fillna(-1) - x.ffill('x') - x.bfill('x') + x.ffill("x") + x.bfill("x") Like pandas, xarray uses the float value ``np.nan`` (not-a-number) to represent missing values. @@ -90,9 +92,12 @@ for filling missing values via 1D interpolation. .. ipython:: python - x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=['x'], - coords={'xx': xr.Variable('x', [0, 1, 1.1, 1.9, 3])}) - x.interpolate_na(dim='x', method='linear', use_coordinate='xx') + x = xr.DataArray( + [0, 1, np.nan, np.nan, 2], + dims=["x"], + coords={"xx": xr.Variable("x", [0, 1, 1.1, 1.9, 3])}, + ) + x.interpolate_na(dim="x", method="linear", use_coordinate="xx") Note that xarray slightly diverges from the pandas ``interpolate`` syntax by providing the ``use_coordinate`` keyword which facilitates a clear specification @@ -110,8 +115,8 @@ applied along particular dimension(s): .. ipython:: python - arr.sum(dim='x') - arr.std(['x', 'y']) + arr.sum(dim="x") + arr.std(["x", "y"]) arr.min() @@ -121,7 +126,7 @@ for wrapping code designed to work with numpy arrays), you can use the .. ipython:: python - arr.get_axis_num('y') + arr.get_axis_num("y") These operations automatically skip missing values, like in pandas: @@ -142,8 +147,7 @@ method supports rolling window aggregation: .. ipython:: python - arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), - dims=('x', 'y')) + arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), dims=("x", "y")) arr :py:meth:`~xarray.DataArray.rolling` is applied along one dimension using the @@ -194,8 +198,9 @@ We can also manually iterate through ``Rolling`` objects: .. code:: python - for label, arr_window in r: - # arr_window is a view of x + for label, arr_window in r: + # arr_window is a view of x + ... .. _comput.rolling_exp: @@ -222,9 +227,9 @@ windowed rolling, convolution, short-time FFT etc. .. ipython:: python # rolling with 2-point stride - rolling_da = r.construct('window_dim', stride=2) + rolling_da = r.construct("window_dim", stride=2) rolling_da - rolling_da.mean('window_dim', skipna=False) + rolling_da.mean("window_dim", skipna=False) Because the ``DataArray`` given by ``r.construct('window_dim')`` is a view of the original array, it is memory efficient. @@ -232,8 +237,8 @@ You can also use ``construct`` to compute a weighted rolling sum: .. ipython:: python - weight = xr.DataArray([0.25, 0.5, 0.25], dims=['window']) - arr.rolling(y=3).construct('window').dot(weight) + weight = xr.DataArray([0.25, 0.5, 0.25], dims=["window"]) + arr.rolling(y=3).construct("window").dot(weight) .. note:: numpy's Nan-aggregation functions such as ``nansum`` copy the original array. @@ -254,52 +259,52 @@ support weighted ``sum`` and weighted ``mean``. .. ipython:: python - coords = dict(month=('month', [1, 2, 3])) + coords = dict(month=("month", [1, 2, 3])) - prec = xr.DataArray([1.1, 1.0, 0.9], dims=('month', ), coords=coords) - weights = xr.DataArray([31, 28, 31], dims=('month', ), coords=coords) + prec = xr.DataArray([1.1, 1.0, 0.9], dims=("month",), coords=coords) + weights = xr.DataArray([31, 28, 31], dims=("month",), coords=coords) Create a weighted object: .. ipython:: python - weighted_prec = prec.weighted(weights) - weighted_prec + weighted_prec = prec.weighted(weights) + weighted_prec Calculate the weighted sum: .. ipython:: python - weighted_prec.sum() + weighted_prec.sum() Calculate the weighted mean: .. ipython:: python - weighted_prec.mean(dim="month") + weighted_prec.mean(dim="month") The weighted sum corresponds to: .. ipython:: python - weighted_sum = (prec * weights).sum() - weighted_sum + weighted_sum = (prec * weights).sum() + weighted_sum and the weighted mean to: .. ipython:: python - weighted_mean = weighted_sum / weights.sum() - weighted_mean + weighted_mean = weighted_sum / weights.sum() + weighted_mean However, the functions also take missing values in the data into account: .. ipython:: python - data = xr.DataArray([np.NaN, 2, 4]) - weights = xr.DataArray([8, 1, 1]) + data = xr.DataArray([np.NaN, 2, 4]) + weights = xr.DataArray([8, 1, 1]) - data.weighted(weights).mean() + data.weighted(weights).mean() Using ``(data * weights).sum() / weights.sum()`` would (incorrectly) result in 0.6. @@ -309,16 +314,16 @@ If the weights add up to to 0, ``sum`` returns 0: .. ipython:: python - data = xr.DataArray([1.0, 1.0]) - weights = xr.DataArray([-1.0, 1.0]) + data = xr.DataArray([1.0, 1.0]) + weights = xr.DataArray([-1.0, 1.0]) - data.weighted(weights).sum() + data.weighted(weights).sum() and ``mean`` returns ``NaN``: .. ipython:: python - data.weighted(weights).mean() + data.weighted(weights).mean() .. note:: @@ -336,18 +341,21 @@ methods. This supports the block aggregation along multiple dimensions, .. ipython:: python - x = np.linspace(0, 10, 300) - t = pd.date_range('15/12/1999', periods=364) - da = xr.DataArray(np.sin(x) * np.cos(np.linspace(0, 1, 364)[:, np.newaxis]), - dims=['time', 'x'], coords={'time': t, 'x': x}) - da + x = np.linspace(0, 10, 300) + t = pd.date_range("15/12/1999", periods=364) + da = xr.DataArray( + np.sin(x) * np.cos(np.linspace(0, 1, 364)[:, np.newaxis]), + dims=["time", "x"], + coords={"time": t, "x": x}, + ) + da In order to take a block mean for every 7 days along ``time`` dimension and every 2 points along ``x`` dimension, .. ipython:: python - da.coarsen(time=7, x=2).mean() + da.coarsen(time=7, x=2).mean() :py:meth:`~xarray.DataArray.coarsen` raises an ``ValueError`` if the data length is not a multiple of the corresponding window size. @@ -356,14 +364,14 @@ the excess entries or padding ``nan`` to insufficient entries, .. ipython:: python - da.coarsen(time=30, x=2, boundary='trim').mean() + da.coarsen(time=30, x=2, boundary="trim").mean() If you want to apply a specific function to coordinate, you can pass the function or method name to ``coord_func`` option, .. ipython:: python - da.coarsen(time=7, x=2, coord_func={'time': 'min'}).mean() + da.coarsen(time=7, x=2, coord_func={"time": "min"}).mean() .. _compute.using_coordinates: @@ -377,24 +385,25 @@ central finite differences using their coordinates, .. ipython:: python - a = xr.DataArray([0, 1, 2, 3], dims=['x'], coords=[[0.1, 0.11, 0.2, 0.3]]) + a = xr.DataArray([0, 1, 2, 3], dims=["x"], coords=[[0.1, 0.11, 0.2, 0.3]]) a - a.differentiate('x') + a.differentiate("x") This method can be used also for multidimensional arrays, .. ipython:: python - a = xr.DataArray(np.arange(8).reshape(4, 2), dims=['x', 'y'], - coords={'x': [0.1, 0.11, 0.2, 0.3]}) - a.differentiate('x') + a = xr.DataArray( + np.arange(8).reshape(4, 2), dims=["x", "y"], coords={"x": [0.1, 0.11, 0.2, 0.3]} + ) + a.differentiate("x") :py:meth:`~xarray.DataArray.integrate` computes integration based on trapezoidal rule using their coordinates, .. ipython:: python - a.integrate('x') + a.integrate("x") .. note:: These methods are limited to simple cartesian geometry. Differentiation @@ -412,9 +421,9 @@ best fitting coefficients along a given dimension and for a given order, .. ipython:: python - x = xr.DataArray(np.arange(10), dims=['x'], name='x') - a = xr.DataArray(3 + 4 * x, dims=['x'], coords={'x': x}) - out = a.polyfit(dim='x', deg=1, full=True) + x = xr.DataArray(np.arange(10), dims=["x"], name="x") + a = xr.DataArray(3 + 4 * x, dims=["x"], coords={"x": x}) + out = a.polyfit(dim="x", deg=1, full=True) out The method outputs a dataset containing the coefficients (and more if `full=True`). @@ -443,9 +452,9 @@ arrays with different sizes aligned along different dimensions: .. ipython:: python - a = xr.DataArray([1, 2], [('x', ['a', 'b'])]) + a = xr.DataArray([1, 2], [("x", ["a", "b"])]) a - b = xr.DataArray([-1, -2, -3], [('y', [10, 20, 30])]) + b = xr.DataArray([-1, -2, -3], [("y", [10, 20, 30])]) b With xarray, we can apply binary mathematical operations to these arrays, and @@ -460,7 +469,7 @@ appeared: .. ipython:: python - c = xr.DataArray(np.arange(6).reshape(3, 2), [b['y'], a['x']]) + c = xr.DataArray(np.arange(6).reshape(3, 2), [b["y"], a["x"]]) c a + c @@ -494,7 +503,7 @@ operations. The default result of a binary operation is by the *intersection* .. ipython:: python - arr = xr.DataArray(np.arange(3), [('x', range(3))]) + arr = xr.DataArray(np.arange(3), [("x", range(3))]) arr + arr[:-1] If coordinate values for a dimension are missing on either argument, all @@ -503,7 +512,7 @@ matching dimensions must have the same size: .. ipython:: :verbatim: - In [1]: arr + xr.DataArray([1, 2], dims='x') + In [1]: arr + xr.DataArray([1, 2], dims="x") ValueError: arguments without labels along dimension 'x' cannot be aligned because they have different dimension size(s) {2} than the size of the aligned dimension labels: 3 @@ -562,16 +571,20 @@ variables: .. ipython:: python - ds = xr.Dataset({'x_and_y': (('x', 'y'), np.random.randn(3, 5)), - 'x_only': ('x', np.random.randn(3))}, - coords=arr.coords) + ds = xr.Dataset( + { + "x_and_y": (("x", "y"), np.random.randn(3, 5)), + "x_only": ("x", np.random.randn(3)), + }, + coords=arr.coords, + ) ds > 0 Datasets support most of the same methods found on data arrays: .. ipython:: python - ds.mean(dim='x') + ds.mean(dim="x") abs(ds) Datasets also support NumPy ufuncs (requires NumPy v1.13 or newer), or @@ -594,7 +607,7 @@ Arithmetic between two datasets matches data variables of the same name: .. ipython:: python - ds2 = xr.Dataset({'x_and_y': 0, 'x_only': 100}) + ds2 = xr.Dataset({"x_and_y": 0, "x_only": 100}) ds - ds2 Similarly to index based alignment, the result has the intersection of all @@ -638,7 +651,7 @@ any additional arguments: .. ipython:: python squared_error = lambda x, y: (x - y) ** 2 - arr1 = xr.DataArray([0, 1, 2, 3], dims='x') + arr1 = xr.DataArray([0, 1, 2, 3], dims="x") xr.apply_ufunc(squared_error, arr1, 1) For using more complex operations that consider some array values collectively, @@ -658,21 +671,21 @@ to set ``axis=-1``. As an example, here is how we would wrap .. code-block:: python def vector_norm(x, dim, ord=None): - return xr.apply_ufunc(np.linalg.norm, x, - input_core_dims=[[dim]], - kwargs={'ord': ord, 'axis': -1}) + return xr.apply_ufunc( + np.linalg.norm, x, input_core_dims=[[dim]], kwargs={"ord": ord, "axis": -1} + ) .. ipython:: python - :suppress: + :suppress: def vector_norm(x, dim, ord=None): - return xr.apply_ufunc(np.linalg.norm, x, - input_core_dims=[[dim]], - kwargs={'ord': ord, 'axis': -1}) + return xr.apply_ufunc( + np.linalg.norm, x, input_core_dims=[[dim]], kwargs={"ord": ord, "axis": -1} + ) .. ipython:: python - vector_norm(arr1, dim='x') + vector_norm(arr1, dim="x") Because ``apply_ufunc`` follows a standard convention for ufuncs, it plays nicely with tools for building vectorized functions, like diff --git a/doc/conf.py b/doc/conf.py index 578f9cf550d..d3d126cb33f 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -20,6 +20,12 @@ import sys from contextlib import suppress +# --------- autosummary templates ------------------ +# TODO: eventually replace this with a sphinx.ext.auto_accessor module +import sphinx +from sphinx.ext.autodoc import AttributeDocumenter, Documenter, MethodDocumenter +from sphinx.util import rpartition + # make sure the source version is preferred (#3567) root = pathlib.Path(__file__).absolute().parent.parent os.environ["PYTHONPATH"] = str(root) @@ -79,7 +85,6 @@ "sphinx.ext.extlinks", "sphinx.ext.mathjax", "sphinx.ext.napoleon", - "numpydoc", "IPython.sphinxext.ipython_directive", "IPython.sphinxext.ipython_console_highlighting", "nbsphinx", @@ -352,10 +357,120 @@ "python": ("https://docs.python.org/3/", None), "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None), "iris": ("https://scitools.org.uk/iris/docs/latest", None), - "numpy": ("https://docs.scipy.org/doc/numpy", None), + "numpy": ("https://numpy.org/doc/stable", None), "scipy": ("https://docs.scipy.org/doc/scipy/reference", None), "numba": ("https://numba.pydata.org/numba-doc/latest", None), "matplotlib": ("https://matplotlib.org", None), "dask": ("https://docs.dask.org/en/latest", None), "cftime": ("https://unidata.github.io/cftime", None), } + + +# --------- autosummary templates ------------------ +# TODO: eventually replace this with a sphinx.ext.auto_accessor module +class AccessorDocumenter(MethodDocumenter): + """ + Specialized Documenter subclass for accessors. + """ + + objtype = "accessor" + directivetype = "method" + + # lower than MethodDocumenter so this is not chosen for normal methods + priority = 0.6 + + def format_signature(self): + # this method gives an error/warning for the accessors, therefore + # overriding it (accessor has no arguments) + return "" + + +class AccessorLevelDocumenter(Documenter): + """ + Specialized Documenter subclass for objects on accessor level (methods, + attributes). + """ + + # This is the simple straightforward version + # modname is None, base the last elements (eg 'hour') + # and path the part before (eg 'Series.dt') + # def resolve_name(self, modname, parents, path, base): + # modname = 'pandas' + # mod_cls = path.rstrip('.') + # mod_cls = mod_cls.split('.') + # + # return modname, mod_cls + [base] + + def resolve_name(self, modname, parents, path, base): + if modname is None: + if path: + mod_cls = path.rstrip(".") + else: + mod_cls = None + # if documenting a class-level object without path, + # there must be a current class, either from a parent + # auto directive ... + mod_cls = self.env.temp_data.get("autodoc:class") + # ... or from a class directive + if mod_cls is None: + mod_cls = self.env.temp_data.get("py:class") + # ... if still None, there's no way to know + if mod_cls is None: + return None, [] + # HACK: this is added in comparison to ClassLevelDocumenter + # mod_cls still exists of class.accessor, so an extra + # rpartition is needed + modname, accessor = rpartition(mod_cls, ".") + modname, cls = rpartition(modname, ".") + parents = [cls, accessor] + # if the module name is still missing, get it like above + if not modname: + modname = self.env.temp_data.get("autodoc:module") + if not modname: + if sphinx.__version__ > "1.3": + modname = self.env.ref_context.get("py:module") + else: + modname = self.env.temp_data.get("py:module") + # ... else, it stays None, which means invalid + return modname, parents + [base] + + +class AccessorAttributeDocumenter(AccessorLevelDocumenter, AttributeDocumenter): + + objtype = "accessorattribute" + directivetype = "attribute" + + # lower than AttributeDocumenter so this is not chosen for normal attributes + priority = 0.6 + + +class AccessorMethodDocumenter(AccessorLevelDocumenter, MethodDocumenter): + + objtype = "accessormethod" + directivetype = "method" + + # lower than MethodDocumenter so this is not chosen for normal methods + priority = 0.6 + + +class AccessorCallableDocumenter(AccessorLevelDocumenter, MethodDocumenter): + """ + This documenter lets us removes .__call__ from the method signature for + callable accessors like Series.plot + """ + + objtype = "accessorcallable" + directivetype = "method" + + # lower than MethodDocumenter; otherwise the doc build prints warnings + priority = 0.5 + + def format_name(self): + return MethodDocumenter.format_name(self).rstrip(".__call__") + + +def setup(app): + app.add_autodocumenter(AccessorDocumenter) + app.add_autodocumenter(AccessorAttributeDocumenter) + app.add_autodocumenter(AccessorMethodDocumenter) + app.add_autodocumenter(AccessorCallableDocumenter) diff --git a/doc/contributing.rst b/doc/contributing.rst index f581bcd9741..9e6a3c250e9 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -148,7 +148,7 @@ We'll now kick off a two-step process: 1. Install the build dependencies 2. Build and install xarray -.. code-block:: none +.. code-block:: sh # Create and activate the build environment # This is for Linux and MacOS. On Windows, use py37-windows.yml instead. @@ -162,7 +162,10 @@ We'll now kick off a two-step process: # Build and install xarray pip install -e . -At this point you should be able to import *xarray* from your locally built version:: +At this point you should be able to import *xarray* from your locally +built version: + +.. code-block:: sh $ python # start an interpreter >>> import xarray @@ -256,18 +259,20 @@ Some other important things to know about the docs: - The tutorials make heavy use of the `ipython directive `_ sphinx extension. This directive lets you put code in the documentation which will be run - during the doc build. For example:: + during the doc build. For example: + + .. code:: rst .. ipython:: python x = 2 - x**3 + x ** 3 will be rendered as:: In [1]: x = 2 - In [2]: x**3 + In [2]: x ** 3 Out[2]: 8 Almost all code examples in the docs are run (and the output saved) during the @@ -290,7 +295,7 @@ Requirements Make sure to follow the instructions on :ref:`creating a development environment above `, but to build the docs you need to use the environment file ``ci/requirements/doc.yml``. -.. code-block:: none +.. code-block:: sh # Create and activate the docs environment conda env create -f ci/requirements/doc.yml @@ -347,7 +352,10 @@ Code Formatting xarray uses several tools to ensure a consistent code format throughout the project: -- `Black `_ for standardized code formatting +- `Black `_ for standardized + code formatting +- `blackdoc `_ for + standardized code formatting in documentation - `Flake8 `_ for general code quality - `isort `_ for standardized order in imports. See also `flake8-isort `_. @@ -356,12 +364,13 @@ xarray uses several tools to ensure a consistent code format throughout the proj ``pip``:: - pip install black flake8 isort mypy + pip install black flake8 isort mypy blackdoc and then run from the root of the Xarray repository:: isort -rc . black -t py36 . + blackdoc -t py36 . flake8 mypy . @@ -467,7 +476,7 @@ typically find tests wrapped in a class. .. code-block:: python class TestReallyCoolFeature: - .... + ... Going forward, we are moving to a more *functional* style using the `pytest `__ framework, which offers a richer @@ -477,7 +486,7 @@ writing test classes, we will write test functions like this: .. code-block:: python def test_really_cool_feature(): - .... + ... Using ``pytest`` ~~~~~~~~~~~~~~~~ @@ -508,17 +517,23 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place from xarray.testing import assert_equal - @pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64']) + @pytest.mark.parametrize("dtype", ["int8", "int16", "int32", "int64"]) def test_dtypes(dtype): assert str(np.dtype(dtype)) == dtype - @pytest.mark.parametrize('dtype', ['float32', - pytest.param('int16', marks=pytest.mark.skip), - pytest.param('int32', marks=pytest.mark.xfail( - reason='to show how it works'))]) + @pytest.mark.parametrize( + "dtype", + [ + "float32", + pytest.param("int16", marks=pytest.mark.skip), + pytest.param( + "int32", marks=pytest.mark.xfail(reason="to show how it works") + ), + ], + ) def test_mark(dtype): - assert str(np.dtype(dtype)) == 'float32' + assert str(np.dtype(dtype)) == "float32" @pytest.fixture @@ -526,7 +541,7 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place return xr.DataArray([1, 2, 3]) - @pytest.fixture(params=['int8', 'int16', 'int32', 'int64']) + @pytest.fixture(params=["int8", "int16", "int32", "int64"]) def dtype(request): return request.param diff --git a/doc/dask.rst b/doc/dask.rst index 07b3939af6e..de25ee2200e 100644 --- a/doc/dask.rst +++ b/doc/dask.rst @@ -56,19 +56,26 @@ argument to :py:func:`~xarray.open_dataset` or using the import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) np.set_printoptions(precision=3, linewidth=100, threshold=100, edgeitems=3) - ds = xr.Dataset({'temperature': (('time', 'latitude', 'longitude'), - np.random.randn(30, 180, 180)), - 'time': pd.date_range('2015-01-01', periods=30), - 'longitude': np.arange(180), - 'latitude': np.arange(89.5, -90.5, -1)}) - ds.to_netcdf('example-data.nc') + ds = xr.Dataset( + { + "temperature": ( + ("time", "latitude", "longitude"), + np.random.randn(30, 180, 180), + ), + "time": pd.date_range("2015-01-01", periods=30), + "longitude": np.arange(180), + "latitude": np.arange(89.5, -90.5, -1), + } + ) + ds.to_netcdf("example-data.nc") .. ipython:: python - ds = xr.open_dataset('example-data.nc', chunks={'time': 10}) + ds = xr.open_dataset("example-data.nc", chunks={"time": 10}) ds In this example ``latitude`` and ``longitude`` do not appear in the ``chunks`` @@ -106,7 +113,7 @@ usual way. .. ipython:: python - ds.to_netcdf('manipulated-example-data.nc') + ds.to_netcdf("manipulated-example-data.nc") By setting the ``compute`` argument to ``False``, :py:meth:`~xarray.Dataset.to_netcdf` will return a ``dask.delayed`` object that can be computed later. @@ -114,8 +121,9 @@ will return a ``dask.delayed`` object that can be computed later. .. ipython:: python from dask.diagnostics import ProgressBar + # or distributed.progress when using the distributed scheduler - delayed_obj = ds.to_netcdf('manipulated-example-data.nc', compute=False) + delayed_obj = ds.to_netcdf("manipulated-example-data.nc", compute=False) with ProgressBar(): results = delayed_obj.compute() @@ -141,8 +149,9 @@ Dask DataFrames do not support multi-indexes so the coordinate variables from th :suppress: import os - os.remove('example-data.nc') - os.remove('manipulated-example-data.nc') + + os.remove("example-data.nc") + os.remove("manipulated-example-data.nc") Using Dask with xarray ---------------------- @@ -199,7 +208,7 @@ Dask arrays using the :py:meth:`~xarray.Dataset.persist` method: .. ipython:: python - ds = ds.persist() + ds = ds.persist() :py:meth:`~xarray.Dataset.persist` is particularly useful when using a distributed cluster because the data will be loaded into distributed memory @@ -224,11 +233,11 @@ sizes of Dask arrays is done with the :py:meth:`~xarray.Dataset.chunk` method: .. ipython:: python :suppress: - ds = ds.chunk({'time': 10}) + ds = ds.chunk({"time": 10}) .. ipython:: python - rechunked = ds.chunk({'latitude': 100, 'longitude': 100}) + rechunked = ds.chunk({"latitude": 100, "longitude": 100}) You can view the size of existing chunks on an array by viewing the :py:attr:`~xarray.Dataset.chunks` attribute: @@ -256,6 +265,7 @@ lazy Dask arrays, in the :ref:`xarray.ufuncs ` module: .. ipython:: python import xarray.ufuncs as xu + xu.sin(rechunked) To access Dask arrays directly, use the new @@ -274,12 +284,21 @@ loaded into Dask or not: .. _dask.automatic-parallelization: -Automatic parallelization -------------------------- +Automatic parallelization with ``apply_ufunc`` and ``map_blocks`` +----------------------------------------------------------------- Almost all of xarray's built-in operations work on Dask arrays. If you want to -use a function that isn't wrapped by xarray, one option is to extract Dask -arrays from xarray objects (``.data``) and use Dask directly. +use a function that isn't wrapped by xarray, and have it applied in parallel on +each block of your xarray object, you have three options: + +1. Extract Dask arrays from xarray objects (``.data``) and use Dask directly. +2. Use :py:func:`~xarray.apply_ufunc` to apply functions that consume and return NumPy arrays. +3. Use :py:func:`~xarray.map_blocks`, :py:meth:`Dataset.map_blocks` or :py:meth:`DataArray.map_blocks` + to apply functions that consume and return xarray objects. + + +``apply_ufunc`` +~~~~~~~~~~~~~~~ Another option is to use xarray's :py:func:`~xarray.apply_ufunc`, which can automate `embarrassingly parallel @@ -302,24 +321,32 @@ we use to calculate `Spearman's rank-correlation coefficient ` and @@ -453,15 +470,15 @@ dataset variables: .. ipython:: python - ds.rename({'temperature': 'temp', 'precipitation': 'precip'}) + ds.rename({"temperature": "temp", "precipitation": "precip"}) The related :py:meth:`~xarray.Dataset.swap_dims` method allows you do to swap dimension and non-dimension variables: .. ipython:: python - ds.coords['day'] = ('time', [6, 7, 8]) - ds.swap_dims({'time': 'day'}) + ds.coords["day"] = ("time", [6, 7, 8]) + ds.swap_dims({"time": "day"}) .. _coordinates: @@ -519,8 +536,8 @@ To convert back and forth between data and coordinates, you can use the .. ipython:: python ds.reset_coords() - ds.set_coords(['temperature', 'precipitation']) - ds['temperature'].reset_coords(drop=True) + ds.set_coords(["temperature", "precipitation"]) + ds["temperature"].reset_coords(drop=True) Notice that these operations skip coordinates with names given by dimensions, as used for indexing. This mostly because we are not entirely sure how to @@ -544,7 +561,7 @@ logic used for merging coordinates in arithmetic operations .. ipython:: python - alt = xr.Dataset(coords={'z': [10], 'lat': 0, 'lon': 0}) + alt = xr.Dataset(coords={"z": [10], "lat": 0, "lon": 0}) ds.coords.merge(alt.coords) The ``coords.merge`` method may be useful if you want to implement your own @@ -560,7 +577,7 @@ To convert a coordinate (or any ``DataArray``) into an actual .. ipython:: python - ds['time'].to_index() + ds["time"].to_index() A useful shortcut is the ``indexes`` property (on both ``DataArray`` and ``Dataset``), which lazily constructs a dictionary whose keys are given by each @@ -577,9 +594,10 @@ Xarray supports labeling coordinate values with a :py:class:`pandas.MultiIndex`: .. ipython:: python - midx = pd.MultiIndex.from_arrays([['R', 'R', 'V', 'V'], [.1, .2, .7, .9]], - names=('band', 'wn')) - mda = xr.DataArray(np.random.rand(4), coords={'spec': midx}, dims='spec') + midx = pd.MultiIndex.from_arrays( + [["R", "R", "V", "V"], [0.1, 0.2, 0.7, 0.9]], names=("band", "wn") + ) + mda = xr.DataArray(np.random.rand(4), coords={"spec": midx}, dims="spec") mda For convenience multi-index levels are directly accessible as "virtual" or @@ -587,8 +605,8 @@ For convenience multi-index levels are directly accessible as "virtual" or .. ipython:: python - mda['band'] - mda.wn + mda["band"] + mda.wn Indexing with multi-index levels is also possible using the ``sel`` method (see :ref:`multi-level indexing`). diff --git a/doc/faq.rst b/doc/faq.rst index 576cec5c2b1..a2b8be47e06 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -4,11 +4,12 @@ Frequently Asked Questions ========================== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) @@ -103,21 +104,21 @@ code fragment .. ipython:: python arr = xr.DataArray([1, 2, 3]) - pd.Series({'x': arr[0], 'mean': arr.mean(), 'std': arr.std()}) + pd.Series({"x": arr[0], "mean": arr.mean(), "std": arr.std()}) does not yield the pandas DataFrame we expected. We need to specify the type conversion ourselves: .. ipython:: python - pd.Series({'x': arr[0], 'mean': arr.mean(), 'std': arr.std()}, dtype=float) + pd.Series({"x": arr[0], "mean": arr.mean(), "std": arr.std()}, dtype=float) Alternatively, we could use the ``item`` method or the ``float`` constructor to convert values one at a time .. ipython:: python - pd.Series({'x': arr[0].item(), 'mean': float(arr.mean())}) + pd.Series({"x": arr[0].item(), "mean": float(arr.mean())}) .. _approach to metadata: diff --git a/doc/groupby.rst b/doc/groupby.rst index 223185bd0d5..c72a26c45ea 100644 --- a/doc/groupby.rst +++ b/doc/groupby.rst @@ -26,11 +26,12 @@ Split Let's create a simple example dataset: .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) .. ipython:: python @@ -47,20 +48,20 @@ use a DataArray directly), we get back a ``GroupBy`` object: .. ipython:: python - ds.groupby('letters') + ds.groupby("letters") This object works very similarly to a pandas GroupBy object. You can view the group indices with the ``groups`` attribute: .. ipython:: python - ds.groupby('letters').groups + ds.groupby("letters").groups You can also iterate over groups in ``(label, group)`` pairs: .. ipython:: python - list(ds.groupby('letters')) + list(ds.groupby("letters")) Just like in pandas, creating a GroupBy object is cheap: it does not actually split the data until you access particular values. @@ -75,8 +76,8 @@ a customized coordinate, but xarray facilitates this via the .. ipython:: python - x_bins = [0,25,50] - ds.groupby_bins('x', x_bins).groups + x_bins = [0, 25, 50] + ds.groupby_bins("x", x_bins).groups The binning is implemented via :func:`pandas.cut`, whose documentation details how the bins are assigned. As seen in the example above, by default, the bins are @@ -86,8 +87,8 @@ choose `float` labels which identify the bin centers: .. ipython:: python - x_bin_labels = [12.5,37.5] - ds.groupby_bins('x', x_bins, labels=x_bin_labels).groups + x_bin_labels = [12.5, 37.5] + ds.groupby_bins("x", x_bins, labels=x_bin_labels).groups Apply @@ -102,7 +103,8 @@ concatenated back together along the group axis: def standardize(x): return (x - x.mean()) / x.std() - arr.groupby('letters').map(standardize) + + arr.groupby("letters").map(standardize) GroupBy objects also have a :py:meth:`~xarray.core.groupby.DatasetGroupBy.reduce` method and methods like :py:meth:`~xarray.core.groupby.DatasetGroupBy.mean` as shortcuts for applying an @@ -110,14 +112,14 @@ aggregation function: .. ipython:: python - arr.groupby('letters').mean(dim='x') + arr.groupby("letters").mean(dim="x") Using a groupby is thus also a convenient shortcut for aggregating over all dimensions *other than* the provided one: .. ipython:: python - ds.groupby('x').std(...) + ds.groupby("x").std(...) .. note:: @@ -134,7 +136,7 @@ values for group along the grouped dimension: .. ipython:: python - ds.groupby('letters').first(...) + ds.groupby("letters").first(...) By default, they skip missing values (control this with ``skipna``). @@ -149,9 +151,9 @@ coordinates. For example: .. ipython:: python - alt = arr.groupby('letters').mean(...) + alt = arr.groupby("letters").mean(...) alt - ds.groupby('letters') - alt + ds.groupby("letters") - alt This last line is roughly equivalent to the following:: @@ -169,11 +171,11 @@ the ``squeeze`` parameter: .. ipython:: python - next(iter(arr.groupby('x'))) + next(iter(arr.groupby("x"))) .. ipython:: python - next(iter(arr.groupby('x', squeeze=False))) + next(iter(arr.groupby("x", squeeze=False))) Although xarray will attempt to automatically :py:attr:`~xarray.DataArray.transpose` dimensions back into their original order @@ -197,13 +199,17 @@ __ http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dimen .. ipython:: python - da = xr.DataArray([[0,1],[2,3]], - coords={'lon': (['ny','nx'], [[30,40],[40,50]] ), - 'lat': (['ny','nx'], [[10,10],[20,20]] ),}, - dims=['ny','nx']) + da = xr.DataArray( + [[0, 1], [2, 3]], + coords={ + "lon": (["ny", "nx"], [[30, 40], [40, 50]]), + "lat": (["ny", "nx"], [[10, 10], [20, 20]]), + }, + dims=["ny", "nx"], + ) da - da.groupby('lon').sum(...) - da.groupby('lon').map(lambda x: x - x.mean(), shortcut=False) + da.groupby("lon").sum(...) + da.groupby("lon").map(lambda x: x - x.mean(), shortcut=False) Because multidimensional groups have the ability to generate a very large number of bins, coarse-binning via :py:meth:`~xarray.Dataset.groupby_bins` @@ -211,7 +217,7 @@ may be desirable: .. ipython:: python - da.groupby_bins('lon', [0,45,50]).sum() + da.groupby_bins("lon", [0, 45, 50]).sum() These methods group by `lon` values. It is also possible to groupby each cell in a grid, regardless of value, by stacking multiple dimensions, @@ -219,5 +225,5 @@ applying your function, and then unstacking the result: .. ipython:: python - stacked = da.stack(gridcell=['ny', 'nx']) - stacked.groupby('gridcell').sum(...).unstack('gridcell') + stacked = da.stack(gridcell=["ny", "nx"]) + stacked.groupby("gridcell").sum(...).unstack("gridcell") \ No newline at end of file diff --git a/doc/indexing.rst b/doc/indexing.rst index cfbb84a8343..af8e44fb80b 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -4,11 +4,12 @@ Indexing and selecting data =========================== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) xarray offers extremely flexible indexing routines that combine the best @@ -60,9 +61,13 @@ DataArray: .. ipython:: python - da = xr.DataArray(np.random.rand(4, 3), - [('time', pd.date_range('2000-01-01', periods=4)), - ('space', ['IA', 'IL', 'IN'])]) + da = xr.DataArray( + np.random.rand(4, 3), + [ + ("time", pd.date_range("2000-01-01", periods=4)), + ("space", ["IA", "IL", "IN"]), + ], + ) da[:2] da[0, 0] da[:, [2, 1]] @@ -81,7 +86,7 @@ fast. To do label based indexing, use the :py:attr:`~xarray.DataArray.loc` attri .. ipython:: python - da.loc['2000-01-01':'2000-01-02', 'IA'] + da.loc["2000-01-01":"2000-01-02", "IA"] In this example, the selected is a subpart of the array in the range '2000-01-01':'2000-01-02' along the first coordinate `time` @@ -98,7 +103,7 @@ Setting values with label based indexing is also supported: .. ipython:: python - da.loc['2000-01-01', ['IL', 'IN']] = -10 + da.loc["2000-01-01", ["IL", "IN"]] = -10 da @@ -117,7 +122,7 @@ use them explicitly to slice data. There are two ways to do this: da[dict(space=0, time=slice(None, 2))] # index by dimension coordinate labels - da.loc[dict(time=slice('2000-01-01', '2000-01-02'))] + da.loc[dict(time=slice("2000-01-01", "2000-01-02"))] 2. Use the :py:meth:`~xarray.DataArray.sel` and :py:meth:`~xarray.DataArray.isel` convenience methods: @@ -128,7 +133,7 @@ use them explicitly to slice data. There are two ways to do this: da.isel(space=0, time=slice(None, 2)) # index by dimension coordinate labels - da.sel(time=slice('2000-01-01', '2000-01-02')) + da.sel(time=slice("2000-01-01", "2000-01-02")) The arguments to these methods can be any objects that could index the array along the dimension given by the keyword, e.g., labels for an individual value, @@ -156,16 +161,16 @@ enabling nearest neighbor (inexact) lookups by use of the methods ``'pad'``, .. ipython:: python - da = xr.DataArray([1, 2, 3], [('x', [0, 1, 2])]) - da.sel(x=[1.1, 1.9], method='nearest') - da.sel(x=0.1, method='backfill') - da.reindex(x=[0.5, 1, 1.5, 2, 2.5], method='pad') + da = xr.DataArray([1, 2, 3], [("x", [0, 1, 2])]) + da.sel(x=[1.1, 1.9], method="nearest") + da.sel(x=0.1, method="backfill") + da.reindex(x=[0.5, 1, 1.5, 2, 2.5], method="pad") Tolerance limits the maximum distance for valid matches with an inexact lookup: .. ipython:: python - da.reindex(x=[1.1, 1.5], method='nearest', tolerance=0.2) + da.reindex(x=[1.1, 1.5], method="nearest", tolerance=0.2) The method parameter is not yet supported if any of the arguments to ``.sel()`` is a ``slice`` object: @@ -173,7 +178,7 @@ to ``.sel()`` is a ``slice`` object: .. ipython:: :verbatim: - In [1]: da.sel(x=slice(1, 3), method='nearest') + In [1]: da.sel(x=slice(1, 3), method="nearest") NotImplementedError However, you don't need to use ``method`` to do inexact slicing. Slicing @@ -182,15 +187,15 @@ labels are monotonic increasing: .. ipython:: python - da.sel(x=slice(0.9, 3.1)) + da.sel(x=slice(0.9, 3.1)) Indexing axes with monotonic decreasing labels also works, as long as the ``slice`` or ``.loc`` arguments are also decreasing: .. ipython:: python - reversed_da = da[::-1] - reversed_da.loc[3.1:0.9] + reversed_da = da[::-1] + reversed_da.loc[3.1:0.9] .. note:: @@ -227,7 +232,7 @@ arrays). However, you can do normal indexing with dimension names: .. ipython:: python ds[dict(space=[0], time=[0])] - ds.loc[dict(time='2000-01-01')] + ds.loc[dict(time="2000-01-01")] Using indexing to *assign* values to a subset of dataset (e.g., ``ds[dict(space=0)] = 1``) is not yet supported. @@ -240,7 +245,7 @@ index labels along a dimension dropped: .. ipython:: python - ds.drop_sel(space=['IN', 'IL']) + ds.drop_sel(space=["IN", "IL"]) ``drop_sel`` is both a ``Dataset`` and ``DataArray`` method. @@ -249,7 +254,7 @@ Any variables with these dimensions are also dropped: .. ipython:: python - ds.drop_dims('time') + ds.drop_dims("time") .. _masking with where: @@ -263,7 +268,7 @@ xarray, use :py:meth:`~xarray.DataArray.where`: .. ipython:: python - da = xr.DataArray(np.arange(16).reshape(4, 4), dims=['x', 'y']) + da = xr.DataArray(np.arange(16).reshape(4, 4), dims=["x", "y"]) da.where(da.x + da.y < 4) This is particularly useful for ragged indexing of multi-dimensional data, @@ -296,7 +301,7 @@ multiple values, use :py:meth:`~xarray.DataArray.isin`: .. ipython:: python - da = xr.DataArray([1, 2, 3, 4, 5], dims=['x']) + da = xr.DataArray([1, 2, 3, 4, 5], dims=["x"]) da.isin([2, 4]) :py:meth:`~xarray.DataArray.isin` works particularly well with @@ -305,7 +310,7 @@ already labels of an array: .. ipython:: python - lookup = xr.DataArray([-1, -2, -3, -4, -5], dims=['x']) + lookup = xr.DataArray([-1, -2, -3, -4, -5], dims=["x"]) da.where(lookup.isin([-2, -4]), drop=True) However, some caution is in order: when done repeatedly, this type of indexing @@ -328,7 +333,6 @@ MATLAB, or after using the :py:func:`numpy.ix_` helper: .. ipython:: python - da = xr.DataArray( np.arange(12).reshape((3, 4)), dims=["x", "y"], @@ -344,8 +348,8 @@ dimensions: .. ipython:: python - ind_x = xr.DataArray([0, 1], dims=['x']) - ind_y = xr.DataArray([0, 1], dims=['y']) + ind_x = xr.DataArray([0, 1], dims=["x"]) + ind_y = xr.DataArray([0, 1], dims=["y"]) da[ind_x, ind_y] # orthogonal indexing da[ind_x, ind_x] # vectorized indexing @@ -364,7 +368,7 @@ indexers' dimension: .. ipython:: python - ind = xr.DataArray([[0, 1], [0, 1]], dims=['a', 'b']) + ind = xr.DataArray([[0, 1], [0, 1]], dims=["a", "b"]) da[ind] Similar to how NumPy's `advanced indexing`_ works, vectorized @@ -378,18 +382,18 @@ Vectorized indexing also works with ``isel``, ``loc``, and ``sel``: .. ipython:: python - ind = xr.DataArray([[0, 1], [0, 1]], dims=['a', 'b']) + ind = xr.DataArray([[0, 1], [0, 1]], dims=["a", "b"]) da.isel(y=ind) # same as da[:, ind] - ind = xr.DataArray([['a', 'b'], ['b', 'a']], dims=['a', 'b']) + ind = xr.DataArray([["a", "b"], ["b", "a"]], dims=["a", "b"]) da.loc[:, ind] # same as da.sel(y=ind) These methods may also be applied to ``Dataset`` objects .. ipython:: python - ds = da.to_dataset(name='bar') - ds.isel(x=xr.DataArray([0, 1, 2], dims=['points'])) + ds = da.to_dataset(name="bar") + ds.isel(x=xr.DataArray([0, 1, 2], dims=["points"])) .. tip:: @@ -476,8 +480,8 @@ Like ``numpy.ndarray``, value assignment sometimes works differently from what o .. ipython:: python - da = xr.DataArray([0, 1, 2, 3], dims=['x']) - ind = xr.DataArray([0, 0, 0], dims=['x']) + da = xr.DataArray([0, 1, 2, 3], dims=["x"]) + ind = xr.DataArray([0, 0, 0], dims=["x"]) da[ind] -= 1 da @@ -511,7 +515,7 @@ __ https://docs.scipy.org/doc/numpy/user/basics.indexing.html#assigning-values-t .. ipython:: python - da = xr.DataArray([0, 1, 2, 3], dims=['x']) + da = xr.DataArray([0, 1, 2, 3], dims=["x"]) # DO NOT do this da.isel(x=[0, 1, 2])[1] = -1 da @@ -581,15 +585,15 @@ To reindex a particular dimension, use :py:meth:`~xarray.DataArray.reindex`: .. ipython:: python - da.reindex(space=['IA', 'CA']) + da.reindex(space=["IA", "CA"]) The :py:meth:`~xarray.DataArray.reindex_like` method is a useful shortcut. To demonstrate, we will make a subset DataArray with new values: .. ipython:: python - foo = da.rename('foo') - baz = (10 * da[:2, :2]).rename('baz') + foo = da.rename("foo") + baz = (10 * da[:2, :2]).rename("baz") baz Reindexing ``foo`` with ``baz`` selects out the first two values along each @@ -611,8 +615,8 @@ The :py:func:`~xarray.align` function lets us perform more flexible database-lik .. ipython:: python - xr.align(foo, baz, join='inner') - xr.align(foo, baz, join='outer') + xr.align(foo, baz, join="inner") + xr.align(foo, baz, join="outer") Both ``reindex_like`` and ``align`` work interchangeably between :py:class:`~xarray.DataArray` and :py:class:`~xarray.Dataset` objects, and with any number of matching dimension names: @@ -621,7 +625,7 @@ Both ``reindex_like`` and ``align`` work interchangeably between ds ds.reindex_like(baz) - other = xr.DataArray(['a', 'b', 'c'], dims='other') + other = xr.DataArray(["a", "b", "c"], dims="other") # this is a no-op, because there are no shared dimension names ds.reindex_like(other) @@ -636,7 +640,7 @@ integer-based indexing as a fallback for dimensions without a coordinate label: .. ipython:: python - da = xr.DataArray([1, 2, 3], dims='x') + da = xr.DataArray([1, 2, 3], dims="x") da.sel(x=[0, -1]) Alignment between xarray objects where one or both do not have coordinate labels @@ -675,9 +679,9 @@ labels: .. ipython:: python - da = xr.DataArray([1, 2, 3], dims='x') + da = xr.DataArray([1, 2, 3], dims="x") da - da.get_index('x') + da.get_index("x") .. _copies_vs_views: @@ -721,7 +725,6 @@ pandas: .. ipython:: python - midx = pd.MultiIndex.from_product([list("abc"), [0, 1]], names=("one", "two")) mda = xr.DataArray(np.random.rand(6, 3), [("x", midx), ("y", range(3))]) mda @@ -732,20 +735,20 @@ a slice of tuples: .. ipython:: python - mda.sel(x=[('a', 0), ('b', 1)]) + mda.sel(x=[("a", 0), ("b", 1)]) Additionally, xarray supports dictionaries: .. ipython:: python - mda.sel(x={'one': 'a', 'two': 0}) + mda.sel(x={"one": "a", "two": 0}) For convenience, ``sel`` also accepts multi-index levels directly as keyword arguments: .. ipython:: python - mda.sel(one='a', two=0) + mda.sel(one="a", two=0) Note that using ``sel`` it is not possible to mix a dimension indexer with level indexers for that dimension @@ -757,7 +760,7 @@ multi-index is reduced to a single index. .. ipython:: python - mda.loc[{'one': 'a'}, ...] + mda.loc[{"one": "a"}, ...] Unlike pandas, xarray does not guess whether you provide index levels or dimensions when using ``loc`` in some ambiguous cases. For example, for diff --git a/doc/internals.rst b/doc/internals.rst index a4870f2316a..46c117e312b 100644 --- a/doc/internals.rst +++ b/doc/internals.rst @@ -46,11 +46,12 @@ Extending xarray ---------------- .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) xarray is designed as a general purpose library, and hence tries to avoid @@ -87,11 +88,12 @@ defined that returns an instance of your class: .. code-block:: python - class Dataset: - ... - @property - def geo(self) - return GeoAccessor(self) + class Dataset: + ... + + @property + def geo(self): + return GeoAccessor(self) However, using the register accessor decorators is preferable to simply adding your own ad-hoc property (i.e., ``Dataset.geo = property(...)``), for several @@ -116,14 +118,13 @@ reasons: Back in an interactive IPython session, we can use these properties: .. ipython:: python - :suppress: + :suppress: - exec(open("examples/_code/accessor_example.py").read()) + exec(open("examples/_code/accessor_example.py").read()) .. ipython:: python - ds = xr.Dataset({'longitude': np.linspace(0, 10), - 'latitude': np.linspace(0, 20)}) + ds = xr.Dataset({"longitude": np.linspace(0, 10), "latitude": np.linspace(0, 20)}) ds.geo.center ds.geo.plot() @@ -137,3 +138,54 @@ To help users keep things straight, please `let us know `_ if you plan to write a new accessor for an open source library. In the future, we will maintain a list of accessors and the libraries that implement them on this page. + +.. _zarr_encoding: + +Zarr Encoding Specification +--------------------------- + +In implementing support for the `Zarr `_ storage +format, Xarray developers made some *ad hoc* choices about how to store +NetCDF data in Zarr. +Future versions of the Zarr spec will likely include a more formal convention +for the storage of the NetCDF data model in Zarr; see +`Zarr spec repo `_ for ongoing +discussion. + +First, Xarray can only read and write Zarr groups. There is currently no support +for reading / writting individual Zarr arrays. Zarr groups are mapped to +Xarray ``Dataset`` objects. + +Second, from Xarray's point of view, the key difference between +NetCDF and Zarr is that all NetCDF arrays have *dimension names* while Zarr +arrays do not. Therefore, in order to store NetCDF data in Zarr, Xarray must +somehow encode and decode the name of each array's dimensions. + +To accomplish this, Xarray developers decided to define a special Zarr array +attribute: ``_ARRAY_DIMENSIONS``. The value of this attribute is a list of +dimension names (strings), for example ``["time", "lon", "lat"]``. When writing +data to Zarr, Xarray sets this attribute on all variables based on the variable +dimensions. When reading a Zarr group, Xarray looks for this attribute on all +arrays, raising an error if it can't be found. The attribute is used to define +the variable dimension names and then removed from the attributes dictionary +returned to the user. + +Because of these choices, Xarray cannot read arbitrary array data, but only +Zarr data with valid ``_ARRAY_DIMENSIONS`` attributes on each array. + +After decoding the ``_ARRAY_DIMENSIONS`` attribute and assigning the variable +dimensions, Xarray proceeds to [optionally] decode each variable using its +standard CF decoding machinery used for NetCDF data (see :py:func:`decode_cf`). + +As a concrete example, here we write a tutorial dataset to Zarr and then +re-open it directly with Zarr: + +.. ipython:: python + + ds = xr.tutorial.load_dataset("rasm") + ds.to_zarr("rasm.zarr", mode="w") + import zarr + + zgroup = zarr.open("rasm.zarr") + print(zgroup.tree()) + dict(zgroup["Tair"].attrs) \ No newline at end of file diff --git a/doc/interpolation.rst b/doc/interpolation.rst index 4cf39807e5a..c2922813e15 100644 --- a/doc/interpolation.rst +++ b/doc/interpolation.rst @@ -4,11 +4,12 @@ Interpolating data ================== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) xarray offers flexible interpolation routines, which have a similar interface @@ -27,9 +28,10 @@ indexing of a :py:class:`~xarray.DataArray`, .. ipython:: python - da = xr.DataArray(np.sin(0.3 * np.arange(12).reshape(4, 3)), - [('time', np.arange(4)), - ('space', [0.1, 0.2, 0.3])]) + da = xr.DataArray( + np.sin(0.3 * np.arange(12).reshape(4, 3)), + [("time", np.arange(4)), ("space", [0.1, 0.2, 0.3])], + ) # label lookup da.sel(time=3) @@ -52,16 +54,17 @@ To interpolate data with a :py:doc:`numpy.datetime64 .. ipython:: python - da_dt64 = xr.DataArray([1, 3], - [('time', pd.date_range('1/1/2000', '1/3/2000', periods=2))]) - da_dt64.interp(time='2000-01-02') + da_dt64 = xr.DataArray( + [1, 3], [("time", pd.date_range("1/1/2000", "1/3/2000", periods=2))] + ) + da_dt64.interp(time="2000-01-02") The interpolated data can be merged into the original :py:class:`~xarray.DataArray` by specifying the time periods required. .. ipython:: python - da_dt64.interp(time=pd.date_range('1/1/2000', '1/3/2000', periods=3)) + da_dt64.interp(time=pd.date_range("1/1/2000", "1/3/2000", periods=3)) Interpolation of data indexed by a :py:class:`~xarray.CFTimeIndex` is also allowed. See :ref:`CFTimeIndex` for examples. @@ -108,9 +111,10 @@ different coordinates, .. ipython:: python - other = xr.DataArray(np.sin(0.4 * np.arange(9).reshape(3, 3)), - [('time', [0.9, 1.9, 2.9]), - ('space', [0.15, 0.25, 0.35])]) + other = xr.DataArray( + np.sin(0.4 * np.arange(9).reshape(3, 3)), + [("time", [0.9, 1.9, 2.9]), ("space", [0.15, 0.25, 0.35])], + ) it might be a good idea to first interpolate ``da`` so that it will stay on the same coordinates of ``other``, and then subtract it. @@ -118,9 +122,9 @@ same coordinates of ``other``, and then subtract it. .. ipython:: python - # interpolate da along other's coordinates - interpolated = da.interp_like(other) - interpolated + # interpolate da along other's coordinates + interpolated = da.interp_like(other) + interpolated It is now possible to safely compute the difference ``other - interpolated``. @@ -135,12 +139,15 @@ The interpolation method can be specified by the optional ``method`` argument. .. ipython:: python - da = xr.DataArray(np.sin(np.linspace(0, 2 * np.pi, 10)), dims='x', - coords={'x': np.linspace(0, 1, 10)}) + da = xr.DataArray( + np.sin(np.linspace(0, 2 * np.pi, 10)), + dims="x", + coords={"x": np.linspace(0, 1, 10)}, + ) - da.plot.line('o', label='original') - da.interp(x=np.linspace(0, 1, 100)).plot.line(label='linear (default)') - da.interp(x=np.linspace(0, 1, 100), method='cubic').plot.line(label='cubic') + da.plot.line("o", label="original") + da.interp(x=np.linspace(0, 1, 100)).plot.line(label="linear (default)") + da.interp(x=np.linspace(0, 1, 100), method="cubic").plot.line(label="cubic") @savefig interpolation_sample1.png width=4in plt.legend() @@ -149,15 +156,16 @@ Additional keyword arguments can be passed to scipy's functions. .. ipython:: python # fill 0 for the outside of the original coordinates. - da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={'fill_value': 0.0}) + da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={"fill_value": 0.0}) # 1-dimensional extrapolation - da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={'fill_value': 'extrapolate'}) + da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={"fill_value": "extrapolate"}) # multi-dimensional extrapolation - da = xr.DataArray(np.sin(0.3 * np.arange(12).reshape(4, 3)), - [('time', np.arange(4)), - ('space', [0.1, 0.2, 0.3])]) + da = xr.DataArray( + np.sin(0.3 * np.arange(12).reshape(4, 3)), + [("time", np.arange(4)), ("space", [0.1, 0.2, 0.3])], + ) - da.interp(time=4, space=np.linspace(-0.1, 0.5, 10), kwargs={'fill_value': None}) + da.interp(time=4, space=np.linspace(-0.1, 0.5, 10), kwargs={"fill_value": None}) Advanced Interpolation @@ -181,17 +189,18 @@ For example: .. ipython:: python - da = xr.DataArray(np.sin(0.3 * np.arange(20).reshape(5, 4)), - [('x', np.arange(5)), - ('y', [0.1, 0.2, 0.3, 0.4])]) + da = xr.DataArray( + np.sin(0.3 * np.arange(20).reshape(5, 4)), + [("x", np.arange(5)), ("y", [0.1, 0.2, 0.3, 0.4])], + ) # advanced indexing - x = xr.DataArray([0, 2, 4], dims='z') - y = xr.DataArray([0.1, 0.2, 0.3], dims='z') + x = xr.DataArray([0, 2, 4], dims="z") + y = xr.DataArray([0.1, 0.2, 0.3], dims="z") da.sel(x=x, y=y) # advanced interpolation - x = xr.DataArray([0.5, 1.5, 2.5], dims='z') - y = xr.DataArray([0.15, 0.25, 0.35], dims='z') + x = xr.DataArray([0.5, 1.5, 2.5], dims="z") + y = xr.DataArray([0.15, 0.25, 0.35], dims="z") da.interp(x=x, y=y) where values on the original coordinates @@ -203,9 +212,8 @@ If you want to add a coordinate to the new dimension ``z``, you can supply .. ipython:: python - x = xr.DataArray([0.5, 1.5, 2.5], dims='z', coords={'z': ['a', 'b','c']}) - y = xr.DataArray([0.15, 0.25, 0.35], dims='z', - coords={'z': ['a', 'b','c']}) + x = xr.DataArray([0.5, 1.5, 2.5], dims="z", coords={"z": ["a", "b", "c"]}) + y = xr.DataArray([0.15, 0.25, 0.35], dims="z", coords={"z": ["a", "b", "c"]}) da.interp(x=x, y=y) For the details of the advanced indexing, @@ -224,19 +232,18 @@ while other methods such as ``cubic`` or ``quadratic`` return all NaN arrays. .. ipython:: python - da = xr.DataArray([0, 2, np.nan, 3, 3.25], dims='x', - coords={'x': range(5)}) + da = xr.DataArray([0, 2, np.nan, 3, 3.25], dims="x", coords={"x": range(5)}) da.interp(x=[0.5, 1.5, 2.5]) - da.interp(x=[0.5, 1.5, 2.5], method='cubic') + da.interp(x=[0.5, 1.5, 2.5], method="cubic") To avoid this, you can drop NaN by :py:meth:`~xarray.DataArray.dropna`, and then make the interpolation .. ipython:: python - dropped = da.dropna('x') + dropped = da.dropna("x") dropped - dropped.interp(x=[0.5, 1.5, 2.5], method='cubic') + dropped.interp(x=[0.5, 1.5, 2.5], method="cubic") If NaNs are distributed randomly in your multidimensional array, dropping all the columns containing more than one NaNs by @@ -246,7 +253,7 @@ which is similar to :py:meth:`pandas.Series.interpolate`. .. ipython:: python - filled = da.interpolate_na(dim='x') + filled = da.interpolate_na(dim="x") filled This fills NaN by interpolating along the specified dimension. @@ -254,7 +261,7 @@ After filling NaNs, you can interpolate: .. ipython:: python - filled.interp(x=[0.5, 1.5, 2.5], method='cubic') + filled.interp(x=[0.5, 1.5, 2.5], method="cubic") For the details of :py:meth:`~xarray.DataArray.interpolate_na`, see :ref:`Missing values `. @@ -268,18 +275,18 @@ Let's see how :py:meth:`~xarray.DataArray.interp` works on real data. .. ipython:: python # Raw data - ds = xr.tutorial.open_dataset('air_temperature').isel(time=0) + ds = xr.tutorial.open_dataset("air_temperature").isel(time=0) fig, axes = plt.subplots(ncols=2, figsize=(10, 4)) ds.air.plot(ax=axes[0]) - axes[0].set_title('Raw data') + axes[0].set_title("Raw data") # Interpolated data - new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.dims['lon'] * 4) - new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.dims['lat'] * 4) + new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.dims["lon"] * 4) + new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.dims["lat"] * 4) dsi = ds.interp(lat=new_lat, lon=new_lon) dsi.air.plot(ax=axes[1]) @savefig interpolation_sample3.png width=8in - axes[1].set_title('Interpolated data') + axes[1].set_title("Interpolated data") Our advanced interpolation can be used to remap the data to the new coordinate. Consider the new coordinates x and z on the two dimensional plane. @@ -291,20 +298,23 @@ The remapping can be done as follows x = np.linspace(240, 300, 100) z = np.linspace(20, 70, 100) # relation between new and original coordinates - lat = xr.DataArray(z, dims=['z'], coords={'z': z}) - lon = xr.DataArray((x[:, np.newaxis]-270)/np.cos(z*np.pi/180)+270, - dims=['x', 'z'], coords={'x': x, 'z': z}) + lat = xr.DataArray(z, dims=["z"], coords={"z": z}) + lon = xr.DataArray( + (x[:, np.newaxis] - 270) / np.cos(z * np.pi / 180) + 270, + dims=["x", "z"], + coords={"x": x, "z": z}, + ) fig, axes = plt.subplots(ncols=2, figsize=(10, 4)) ds.air.plot(ax=axes[0]) # draw the new coordinate on the original coordinates. for idx in [0, 33, 66, 99]: - axes[0].plot(lon.isel(x=idx), lat, '--k') + axes[0].plot(lon.isel(x=idx), lat, "--k") for idx in [0, 33, 66, 99]: - axes[0].plot(*xr.broadcast(lon.isel(z=idx), lat.isel(z=idx)), '--k') - axes[0].set_title('Raw data') + axes[0].plot(*xr.broadcast(lon.isel(z=idx), lat.isel(z=idx)), "--k") + axes[0].set_title("Raw data") dsi = ds.interp(lon=lon, lat=lat) dsi.air.plot(ax=axes[1]) @savefig interpolation_sample4.png width=8in - axes[1].set_title('Remapped data') + axes[1].set_title("Remapped data") \ No newline at end of file diff --git a/doc/io.rst b/doc/io.rst index 0c666099df8..4aac5e0b6f7 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -9,11 +9,12 @@ simple :ref:`io.pickle` files to the more flexible :ref:`io.netcdf` format (recommended). .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) .. _io.netcdf: @@ -52,12 +53,16 @@ We can save a Dataset to disk using the .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y'), np.random.rand(4, 5))}, - coords={'x': [10, 20, 30, 40], - 'y': pd.date_range('2000-01-01', periods=5), - 'z': ('x', list('abcd'))}) + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.rand(4, 5))}, + coords={ + "x": [10, 20, 30, 40], + "y": pd.date_range("2000-01-01", periods=5), + "z": ("x", list("abcd")), + }, + ) - ds.to_netcdf('saved_on_disk.nc') + ds.to_netcdf("saved_on_disk.nc") By default, the file is saved as netCDF4 (assuming netCDF4-Python is installed). You can control the format and engine used to write the file with @@ -76,7 +81,7 @@ We can load netCDF files to create a new Dataset using .. ipython:: python - ds_disk = xr.open_dataset('saved_on_disk.nc') + ds_disk = xr.open_dataset("saved_on_disk.nc") ds_disk Similarly, a DataArray can be saved to disk using the @@ -117,7 +122,7 @@ netCDF file. However, it's often cleaner to use a ``with`` statement: .. ipython:: python # this automatically closes the dataset after use - with xr.open_dataset('saved_on_disk.nc') as ds: + with xr.open_dataset("saved_on_disk.nc") as ds: print(ds.keys()) Although xarray provides reasonable support for incremental reads of files on @@ -171,7 +176,7 @@ You can view this encoding information (among others) in the .. ipython:: :verbatim: - In [1]: ds_disk['y'].encoding + In [1]: ds_disk["y"].encoding Out[1]: {'zlib': False, 'shuffle': False, @@ -458,7 +463,7 @@ This is not CF-compliant but again facilitates roundtripping of xarray datasets. Invalid netCDF files ~~~~~~~~~~~~~~~~~~~~ -The library ``h5netcdf`` allows writing some dtypes (booleans, complex, ...) that aren't +The library ``h5netcdf`` allows writing some dtypes (booleans, complex, ...) that aren't allowed in netCDF4 (see `h5netcdf documentation `_). This feature is availabe through :py:meth:`DataArray.to_netcdf` and @@ -469,7 +474,7 @@ and currently raises a warning unless ``invalid_netcdf=True`` is set: :okwarning: # Writing complex valued data - da = xr.DataArray([1.+1.j, 2.+2.j, 3.+3.j]) + da = xr.DataArray([1.0 + 1.0j, 2.0 + 2.0j, 3.0 + 3.0j]) da.to_netcdf("complex.nc", engine="h5netcdf", invalid_netcdf=True) # Reading it back @@ -479,7 +484,8 @@ and currently raises a warning unless ``invalid_netcdf=True`` is set: :suppress: import os - os.remove('complex.nc') + + os.remove("complex.nc") .. warning:: @@ -499,9 +505,11 @@ installed xarray can convert a ``DataArray`` into a ``Cube`` using .. ipython:: python - da = xr.DataArray(np.random.rand(4, 5), dims=['x', 'y'], - coords=dict(x=[10, 20, 30, 40], - y=pd.date_range('2000-01-01', periods=5))) + da = xr.DataArray( + np.random.rand(4, 5), + dims=["x", "y"], + coords=dict(x=[10, 20, 30, 40], y=pd.date_range("2000-01-01", periods=5)), + ) cube = da.to_iris() cube @@ -548,8 +556,9 @@ __ http://iri.columbia.edu/ :verbatim: In [3]: remote_data = xr.open_dataset( - ...: 'http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods', - ...: decode_times=False) + ...: "http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods", + ...: decode_times=False, + ...: ) In [4]: remote_data Out[4]: @@ -587,7 +596,7 @@ over the network until we look at particular values: .. ipython:: :verbatim: - In [4]: tmax = remote_data['tmax'][:500, ::3, ::3] + In [4]: tmax = remote_data["tmax"][:500, ::3, ::3] In [5]: tmax Out[5]: @@ -715,7 +724,8 @@ search indices or other automated data discovery tools. :suppress: import os - os.remove('saved_on_disk.nc') + + os.remove("saved_on_disk.nc") .. _io.rasterio: @@ -729,7 +739,7 @@ rasterio is installed. Here is an example of how to use .. ipython:: :verbatim: - In [7]: rio = xr.open_rasterio('RGB.byte.tif') + In [7]: rio = xr.open_rasterio("RGB.byte.tif") In [8]: rio Out[8]: @@ -769,7 +779,7 @@ GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIF In [1]: import rioxarray - In [2]: rds = rioxarray.open_rasterio('RGB.byte.tif') + In [2]: rds = rioxarray.open_rasterio("RGB.byte.tif") In [3]: rds Out[3]: @@ -794,12 +804,12 @@ GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIF In [4]: rds.rio.crs Out[4]: CRS.from_epsg(32618) - In [5]: rds4326 = rio.rio.reproject("epsg:4326") + In [5]: rds4326 = rds.rio.reproject("epsg:4326") In [6]: rds4326.rio.crs Out[6]: CRS.from_epsg(4326) - In [7]: rds4326.rio.to_raster('RGB.byte.4326.tif') + In [7]: rds4326.rio.to_raster("RGB.byte.4326.tif") .. _rasterio: https://rasterio.readthedocs.io/en/latest/ @@ -827,21 +837,27 @@ Xarray's Zarr backend allows xarray to leverage these capabilities. Xarray can't open just any zarr dataset, because xarray requires special metadata (attributes) describing the dataset dimensions and coordinates. At this time, xarray can only open zarr datasets that have been written by -xarray. To write a dataset with zarr, we use the :py:attr:`Dataset.to_zarr` method. +xarray. For implementation details, see :ref:`zarr_encoding`. + +To write a dataset with zarr, we use the :py:attr:`Dataset.to_zarr` method. To write to a local directory, we pass a path to a directory .. ipython:: python - :suppress: + :suppress: ! rm -rf path/to/directory.zarr .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y'), np.random.rand(4, 5))}, - coords={'x': [10, 20, 30, 40], - 'y': pd.date_range('2000-01-01', periods=5), - 'z': ('x', list('abcd'))}) - ds.to_zarr('path/to/directory.zarr') + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.rand(4, 5))}, + coords={ + "x": [10, 20, 30, 40], + "y": pd.date_range("2000-01-01", periods=5), + "z": ("x", list("abcd")), + }, + ) + ds.to_zarr("path/to/directory.zarr") (The suffix ``.zarr`` is optional--just a reminder that a zarr store lives there.) If the directory does not exist, it will be created. If a zarr @@ -854,22 +870,30 @@ It is also possible to append to an existing store. For that, set can be omitted as it will internally be set to ``'a'``. .. ipython:: python - :suppress: + :suppress: ! rm -rf path/to/directory.zarr .. ipython:: python - ds1 = xr.Dataset({'foo': (('x', 'y', 't'), np.random.rand(4, 5, 2))}, - coords={'x': [10, 20, 30, 40], - 'y': [1,2,3,4,5], - 't': pd.date_range('2001-01-01', periods=2)}) - ds1.to_zarr('path/to/directory.zarr') - ds2 = xr.Dataset({'foo': (('x', 'y', 't'), np.random.rand(4, 5, 2))}, - coords={'x': [10, 20, 30, 40], - 'y': [1,2,3,4,5], - 't': pd.date_range('2001-01-03', periods=2)}) - ds2.to_zarr('path/to/directory.zarr', append_dim='t') + ds1 = xr.Dataset( + {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, + coords={ + "x": [10, 20, 30, 40], + "y": [1, 2, 3, 4, 5], + "t": pd.date_range("2001-01-01", periods=2), + }, + ) + ds1.to_zarr("path/to/directory.zarr") + ds2 = xr.Dataset( + {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, + coords={ + "x": [10, 20, 30, 40], + "y": [1, 2, 3, 4, 5], + "t": pd.date_range("2001-01-03", periods=2), + }, + ) + ds2.to_zarr("path/to/directory.zarr", append_dim="t") To store variable length strings use ``dtype=object``. @@ -878,7 +902,7 @@ To read back a zarr dataset that has been created this way, we use the .. ipython:: python - ds_zarr = xr.open_zarr('path/to/directory.zarr') + ds_zarr = xr.open_zarr("path/to/directory.zarr") ds_zarr Cloud Storage Buckets @@ -912,15 +936,16 @@ These options can be passed to the ``to_zarr`` method as variable encoding. For example: .. ipython:: python - :suppress: + :suppress: ! rm -rf foo.zarr .. ipython:: python import zarr - compressor = zarr.Blosc(cname='zstd', clevel=3, shuffle=2) - ds.to_zarr('foo.zarr', encoding={'foo': {'compressor': compressor}}) + + compressor = zarr.Blosc(cname="zstd", clevel=3, shuffle=2) + ds.to_zarr("foo.zarr", encoding={"foo": {"compressor": compressor}}) .. note:: @@ -959,31 +984,30 @@ be done directly from zarr, as described in the .. _io.cfgrib: .. ipython:: python - :suppress: + :suppress: import shutil - shutil.rmtree('foo.zarr') - shutil.rmtree('path/to/directory.zarr') + + shutil.rmtree("foo.zarr") + shutil.rmtree("path/to/directory.zarr") GRIB format via cfgrib ---------------------- -xarray supports reading GRIB files via ECMWF cfgrib_ python driver and ecCodes_ -C-library, if they are installed. To open a GRIB file supply ``engine='cfgrib'`` +xarray supports reading GRIB files via ECMWF cfgrib_ python driver, +if it is installed. To open a GRIB file supply ``engine='cfgrib'`` to :py:func:`open_dataset`: .. ipython:: :verbatim: - In [1]: ds_grib = xr.open_dataset('example.grib', engine='cfgrib') + In [1]: ds_grib = xr.open_dataset("example.grib", engine="cfgrib") -We recommend installing ecCodes via conda:: +We recommend installing cfgrib via conda:: - conda install -c conda-forge eccodes - pip install cfgrib + conda install -c conda-forge cfgrib .. _cfgrib: https://github.com/ecmwf/cfgrib -.. _ecCodes: https://confluence.ecmwf.int/display/ECC/ecCodes+Home .. _io.pynio: diff --git a/doc/pandas.rst b/doc/pandas.rst index b0ec2a117dc..acf1d16b6ee 100644 --- a/doc/pandas.rst +++ b/doc/pandas.rst @@ -20,6 +20,7 @@ __ http://seaborn.pydata.org/ import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) Hierarchical and tidy data @@ -47,10 +48,15 @@ To convert any dataset to a ``DataFrame`` in tidy form, use the .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y'), np.random.randn(2, 3))}, - coords={'x': [10, 20], 'y': ['a', 'b', 'c'], - 'along_x': ('x', np.random.randn(2)), - 'scalar': 123}) + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.randn(2, 3))}, + coords={ + "x": [10, 20], + "y": ["a", "b", "c"], + "along_x": ("x", np.random.randn(2)), + "scalar": 123, + }, + ) ds df = ds.to_dataframe() df @@ -91,7 +97,7 @@ DataFrames: .. ipython:: python - s = ds['foo'].to_series() + s = ds["foo"].to_series() s # or equivalently, with Series.to_xarray() xr.DataArray.from_series(s) @@ -117,8 +123,9 @@ available in pandas (i.e., a 1D array is converted to a .. ipython:: python - arr = xr.DataArray(np.random.randn(2, 3), - coords=[('x', [10, 20]), ('y', ['a', 'b', 'c'])]) + arr = xr.DataArray( + np.random.randn(2, 3), coords=[("x", [10, 20]), ("y", ["a", "b", "c"])] + ) df = arr.to_pandas() df @@ -136,9 +143,10 @@ preserve all use of multi-indexes: .. ipython:: python - index = pd.MultiIndex.from_arrays([['a', 'a', 'b'], [0, 1, 2]], - names=['one', 'two']) - df = pd.DataFrame({'x': 1, 'y': 2}, index=index) + index = pd.MultiIndex.from_arrays( + [["a", "a", "b"], [0, 1, 2]], names=["one", "two"] + ) + df = pd.DataFrame({"x": 1, "y": 2}, index=index) ds = xr.Dataset(df) ds @@ -175,9 +183,9 @@ Let's take a look: .. ipython:: python data = np.random.RandomState(0).rand(2, 3, 4) - items = list('ab') - major_axis = list('mno') - minor_axis = pd.date_range(start='2000', periods=4, name='date') + items = list("ab") + major_axis = list("mno") + minor_axis = pd.date_range(start="2000", periods=4, name="date") With old versions of pandas (prior to 0.25), this could stored in a ``Panel``: @@ -207,7 +215,7 @@ You can also easily convert this data into ``Dataset``: .. ipython:: python - array.to_dataset(dim='dim_0') + array.to_dataset(dim="dim_0") Here, there are two data variables, each representing a DataFrame on panel's ``items`` axis, and labeled as such. Each variable is a 2D array of the diff --git a/doc/plotting.rst b/doc/plotting.rst index f3d9c0213de..72248e31b1e 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -13,7 +13,7 @@ labels can also be used to easily create informative plots. xarray's plotting capabilities are centered around :py:class:`DataArray` objects. To plot :py:class:`Dataset` objects -simply access the relevant DataArrays, ie ``dset['var1']``. +simply access the relevant DataArrays, i.e. ``dset['var1']``. Dataset specific plotting routines are also available (see :ref:`plot-dataset`). Here we focus mostly on arrays 2d or larger. If your data fits nicely into a pandas DataFrame then you're better off using one of the more @@ -56,6 +56,7 @@ Imports # Use defaults so we don't get gridlines in generated docs import matplotlib as mpl + mpl.rcdefaults() The following imports are necessary for all of the examples. @@ -71,7 +72,7 @@ For these examples we'll use the North American air temperature dataset. .. ipython:: python - airtemps = xr.tutorial.open_dataset('air_temperature') + airtemps = xr.tutorial.open_dataset("air_temperature") airtemps # Convert to celsius @@ -79,7 +80,7 @@ For these examples we'll use the North American air temperature dataset. # copy attributes to get nice figure labels and change Kelvin to Celsius air.attrs = airtemps.air.attrs - air.attrs['units'] = 'deg C' + air.attrs["units"] = "deg C" .. note:: Until :issue:`1614` is solved, you might need to copy over the metadata in ``attrs`` to get informative figure labels (as was done above). @@ -126,7 +127,7 @@ can be used: .. ipython:: python @savefig plotting_1d_additional_args.png width=4in - air1d[:200].plot.line('b-^') + air1d[:200].plot.line("b-^") .. note:: Not all xarray plotting methods support passing positional arguments @@ -138,7 +139,7 @@ Keyword arguments work the same way, and are more explicit. .. ipython:: python @savefig plotting_example_sin3.png width=4in - air1d[:200].plot.line(color='purple', marker='o') + air1d[:200].plot.line(color="purple", marker="o") ========================= Adding to Existing Axis @@ -208,6 +209,44 @@ entire figure (as for matplotlib's ``figsize`` argument). .. _plotting.multiplelines: +========================= + Determine x-axis values +========================= + +Per default dimension coordinates are used for the x-axis (here the time coordinates). +However, you can also use non-dimension coordinates, MultiIndex levels, and dimensions +without coordinates along the x-axis. To illustrate this, let's calculate a 'decimal day' (epoch) +from the time and assign it as a non-dimension coordinate: + +.. ipython:: python + + decimal_day = (air1d.time - air1d.time[0]) / pd.Timedelta("1d") + air1d_multi = air1d.assign_coords(decimal_day=("time", decimal_day)) + air1d_multi + +To use ``'decimal_day'`` as x coordinate it must be explicitly specified: + +.. ipython:: python + + air1d_multi.plot(x="decimal_day") + +Creating a new MultiIndex named ``'date'`` from ``'time'`` and ``'decimal_day'``, +it is also possible to use a MultiIndex level as x-axis: + +.. ipython:: python + + air1d_multi = air1d_multi.set_index(date=("time", "decimal_day")) + air1d_multi.plot(x="decimal_day") + +Finally, if a dataset does not have any coordinates it enumerates all data points: + +.. ipython:: python + + air1d_multi = air1d_multi.drop("date") + air1d_multi.plot() + +The same applies to 2D plots below. + ==================================================== Multiple lines showing variation along a dimension ==================================================== @@ -219,7 +258,7 @@ plots to check the variation of air temperature at three different latitudes alo .. ipython:: python @savefig plotting_example_multiple_lines_x_kwarg.png - air.isel(lon=10, lat=[19,21,22]).plot.line(x='time') + air.isel(lon=10, lat=[19, 21, 22]).plot.line(x="time") It is required to explicitly specify either @@ -240,7 +279,7 @@ It is also possible to make line plots such that the data are on the x-axis and .. ipython:: python @savefig plotting_example_xy_kwarg.png - air.isel(time=10, lon=[10, 11]).plot(y='lat', hue='lon') + air.isel(time=10, lon=[10, 11]).plot(y="lat", hue="lon") ============ Step plots @@ -253,7 +292,7 @@ made using 1D data. :okwarning: @savefig plotting_example_step.png width=4in - air1d[:20].plot.step(where='mid') + air1d[:20].plot.step(where="mid") The argument ``where`` defines where the steps should be placed, options are ``'pre'`` (default), ``'post'``, and ``'mid'``. This is particularly handy @@ -261,15 +300,15 @@ when plotting data grouped with :py:meth:`Dataset.groupby_bins`. .. ipython:: python - air_grp = air.mean(['time','lon']).groupby_bins('lat',[0,23.5,66.5,90]) + air_grp = air.mean(["time", "lon"]).groupby_bins("lat", [0, 23.5, 66.5, 90]) air_mean = air_grp.mean() air_std = air_grp.std() air_mean.plot.step() - (air_mean + air_std).plot.step(ls=':') - (air_mean - air_std).plot.step(ls=':') - plt.ylim(-20,30) + (air_mean + air_std).plot.step(ls=":") + (air_mean - air_std).plot.step(ls=":") + plt.ylim(-20, 30) @savefig plotting_example_step_groupby.png width=4in - plt.title('Zonal mean temperature') + plt.title("Zonal mean temperature") In this case, the actual boundaries of the bins are used and the ``where`` argument is ignored. @@ -284,7 +323,9 @@ The keyword arguments ``xincrease`` and ``yincrease`` let you control the axes d .. ipython:: python @savefig plotting_example_xincrease_yincrease_kwarg.png - air.isel(time=10, lon=[10, 11]).plot.line(y='lat', hue='lon', xincrease=False, yincrease=False) + air.isel(time=10, lon=[10, 11]).plot.line( + y="lat", hue="lon", xincrease=False, yincrease=False + ) In addition, one can use ``xscale, yscale`` to set axes scaling; ``xticks, yticks`` to set axes ticks and ``xlim, ylim`` to set axes limits. These accept the same values as the matplotlib methods ``Axes.set_(x,y)scale()``, ``Axes.set_(x,y)ticks()``, ``Axes.set_(x,y)lim()`` respectively. @@ -348,7 +389,7 @@ produce plots with nonuniform coordinates. b = air2d.copy() # Apply a nonlinear transformation to one of the coords - b.coords['lat'] = np.log(b.coords['lat']) + b.coords["lat"] = np.log(b.coords["lat"]) @savefig plotting_nonuniform_coords.png width=4in b.plot() @@ -363,9 +404,9 @@ matplotlib is available. .. ipython:: python air2d.plot(cmap=plt.cm.Blues) - plt.title('These colors prove North America\nhas fallen in the ocean') - plt.ylabel('latitude') - plt.xlabel('longitude') + plt.title("These colors prove North America\nhas fallen in the ocean") + plt.ylabel("latitude") + plt.xlabel("longitude") plt.tight_layout() @savefig plotting_2d_call_matplotlib.png width=4in @@ -381,7 +422,7 @@ matplotlib is available. .. ipython:: python - plt.xlabel('Never gonna see this.') + plt.xlabel("Never gonna see this.") air2d.plot() @savefig plotting_2d_call_matplotlib2.png width=4in @@ -473,10 +514,10 @@ if using ``imshow`` or ``pcolormesh`` (but not with ``contour`` or ``contourf``, since levels are chosen automatically). .. ipython:: python - :okwarning: + :okwarning: @savefig plotting_seaborn_palette.png width=4in - air2d.plot(levels=10, cmap='husl') + air2d.plot(levels=10, cmap="husl") plt.draw() .. _plotting.faceting: @@ -520,14 +561,16 @@ arguments to the xarray plotting methods/functions. This returns a .. ipython:: python @savefig plot_facet_dataarray.png - g_simple = t.plot(x='lon', y='lat', col='time', col_wrap=3) + g_simple = t.plot(x="lon", y="lat", col="time", col_wrap=3) Faceting also works for line plots. .. ipython:: python @savefig plot_facet_dataarray_line.png - g_simple_line = t.isel(lat=slice(0,None,4)).plot(x='lon', hue='lat', col='time', col_wrap=3) + g_simple_line = t.isel(lat=slice(0, None, 4)).plot( + x="lon", hue="lat", col="time", col_wrap=3 + ) =============== 4 dimensional @@ -541,12 +584,12 @@ one were much hotter. .. ipython:: python t2 = t.isel(time=slice(0, 2)) - t4d = xr.concat([t2, t2 + 40], pd.Index(['normal', 'hot'], name='fourth_dim')) + t4d = xr.concat([t2, t2 + 40], pd.Index(["normal", "hot"], name="fourth_dim")) # This is a 4d array t4d.coords @savefig plot_facet_4d.png - t4d.plot(x='lon', y='lat', col='time', row='fourth_dim') + t4d.plot(x="lon", y="lat", col="time", row="fourth_dim") ================ Other features @@ -555,9 +598,9 @@ one were much hotter. Faceted plotting supports other arguments common to xarray 2d plots. .. ipython:: python - :suppress: + :suppress: - plt.close('all') + plt.close("all") .. ipython:: python @@ -566,9 +609,15 @@ Faceted plotting supports other arguments common to xarray 2d plots. hasoutliers[-1, -1, -1] = 400 @savefig plot_facet_robust.png - g = hasoutliers.plot.pcolormesh('lon', 'lat', col='time', col_wrap=3, - robust=True, cmap='viridis', - cbar_kwargs={'label': 'this has outliers'}) + g = hasoutliers.plot.pcolormesh( + "lon", + "lat", + col="time", + col_wrap=3, + robust=True, + cmap="viridis", + cbar_kwargs={"label": "this has outliers"}, + ) =================== FacetGrid Objects @@ -594,20 +643,20 @@ It's possible to select the :py:class:`xarray.DataArray` or .. ipython:: python - g.data.loc[g.name_dicts[0, 0]] + g.data.loc[g.name_dicts[0, 0]] Here is an example of using the lower level API and then modifying the axes after they have been plotted. .. ipython:: python - g = t.plot.imshow('lon', 'lat', col='time', col_wrap=3, robust=True) + g = t.plot.imshow("lon", "lat", col="time", col_wrap=3, robust=True) for i, ax in enumerate(g.axes.flat): - ax.set_title('Air Temperature %d' % i) + ax.set_title("Air Temperature %d" % i) bottomright = g.axes[-1, -1] - bottomright.annotate('bottom right', (240, 40)) + bottomright.annotate("bottom right", (240, 40)) @savefig plot_facet_iterator.png plt.draw() @@ -632,8 +681,8 @@ Consider this dataset .. ipython:: python - ds = xr.tutorial.scatter_example_dataset() - ds + ds = xr.tutorial.scatter_example_dataset() + ds Suppose we want to scatter ``A`` against ``B`` @@ -641,14 +690,14 @@ Suppose we want to scatter ``A`` against ``B`` .. ipython:: python @savefig ds_simple_scatter.png - ds.plot.scatter(x='A', y='B') + ds.plot.scatter(x="A", y="B") The ``hue`` kwarg lets you vary the color by variable value .. ipython:: python @savefig ds_hue_scatter.png - ds.plot.scatter(x='A', y='B', hue='w') + ds.plot.scatter(x="A", y="B", hue="w") When ``hue`` is specified, a colorbar is added for numeric ``hue`` DataArrays by default and a legend is added for non-numeric ``hue`` DataArrays (as above). @@ -659,21 +708,21 @@ Additionally, the boolean kwarg ``add_guide`` can be used to prevent the display ds = ds.assign(w=[1, 2, 3, 5]) @savefig ds_discrete_legend_hue_scatter.png - ds.plot.scatter(x='A', y='B', hue='w', hue_style='discrete') + ds.plot.scatter(x="A", y="B", hue="w", hue_style="discrete") The ``markersize`` kwarg lets you vary the point's size by variable value. You can additionally pass ``size_norm`` to control how the variable's values are mapped to point sizes. .. ipython:: python @savefig ds_hue_size_scatter.png - ds.plot.scatter(x='A', y='B', hue='z', hue_style='discrete', markersize='z') + ds.plot.scatter(x="A", y="B", hue="z", hue_style="discrete", markersize="z") Faceting is also possible .. ipython:: python @savefig ds_facet_scatter.png - ds.plot.scatter(x='A', y='B', col='x', row='z', hue='w', hue_style='discrete') + ds.plot.scatter(x="A", y="B", col="x", row="z", hue="w", hue_style="discrete") For more advanced scatter plots, we recommend converting the relevant data variables to a pandas DataFrame and using the extensive plotting capabilities of ``seaborn``. @@ -691,25 +740,33 @@ This script will plot the air temperature on a map. .. ipython:: python import cartopy.crs as ccrs - air = xr.tutorial.open_dataset('air_temperature').air + + air = xr.tutorial.open_dataset("air_temperature").air + ax = plt.axes(projection=ccrs.Orthographic(-80, 35)) - air.isel(time=0).plot.contourf(ax=ax, transform=ccrs.PlateCarree()); + air.isel(time=0).plot.contourf(ax=ax, transform=ccrs.PlateCarree()) + ax.set_global() + @savefig plotting_maps_cartopy.png width=100% - ax.set_global(); ax.coastlines(); + ax.coastlines() When faceting on maps, the projection can be transferred to the ``plot`` function using the ``subplot_kws`` keyword. The axes for the subplots created by faceting are accessible in the object returned by ``plot``: .. ipython:: python + :okwarning: - p = air.isel(time=[0, 4]).plot(transform=ccrs.PlateCarree(), col='time', - subplot_kws={'projection': ccrs.Orthographic(-80, 35)}) + p = air.isel(time=[0, 4]).plot( + transform=ccrs.PlateCarree(), + col="time", + subplot_kws={"projection": ccrs.Orthographic(-80, 35)}, + ) for ax in p.axes.flat: ax.coastlines() ax.gridlines() @savefig plotting_maps_cartopy_facetting.png width=100% - plt.draw(); + plt.draw() Details @@ -732,6 +789,7 @@ These are provided for user convenience; they all call the same code. .. ipython:: python import xarray.plot as xplt + da = xr.DataArray(range(5)) fig, axes = plt.subplots(ncols=2, nrows=2) da.plot(ax=axes[0, 0]) @@ -766,8 +824,7 @@ read on. .. ipython:: python - a0 = xr.DataArray(np.zeros((4, 3, 2)), dims=('y', 'x', 'z'), - name='temperature') + a0 = xr.DataArray(np.zeros((4, 3, 2)), dims=("y", "x", "z"), name="temperature") a0[0, 0, 0] = 1 a = a0.isel(z=0) a @@ -801,14 +858,16 @@ instead of the default ones: .. ipython:: python lon, lat = np.meshgrid(np.linspace(-20, 20, 5), np.linspace(0, 30, 4)) - lon += lat/10 - lat += lon/10 - da = xr.DataArray(np.arange(20).reshape(4, 5), dims=['y', 'x'], - coords = {'lat': (('y', 'x'), lat), - 'lon': (('y', 'x'), lon)}) + lon += lat / 10 + lat += lon / 10 + da = xr.DataArray( + np.arange(20).reshape(4, 5), + dims=["y", "x"], + coords={"lat": (("y", "x"), lat), "lon": (("y", "x"), lon)}, + ) @savefig plotting_example_2d_irreg.png width=4in - da.plot.pcolormesh('lon', 'lat'); + da.plot.pcolormesh("lon", "lat") Note that in this case, xarray still follows the pixel centered convention. This might be undesirable in some cases, for example when your data is defined @@ -818,22 +877,25 @@ this convention when plotting on a map: .. ipython:: python import cartopy.crs as ccrs - ax = plt.subplot(projection=ccrs.PlateCarree()); - da.plot.pcolormesh('lon', 'lat', ax=ax); - ax.scatter(lon, lat, transform=ccrs.PlateCarree()); + + ax = plt.subplot(projection=ccrs.PlateCarree()) + da.plot.pcolormesh("lon", "lat", ax=ax) + ax.scatter(lon, lat, transform=ccrs.PlateCarree()) + ax.coastlines() @savefig plotting_example_2d_irreg_map.png width=4in - ax.coastlines(); ax.gridlines(draw_labels=True); + ax.gridlines(draw_labels=True) You can however decide to infer the cell boundaries and use the ``infer_intervals`` keyword: .. ipython:: python - ax = plt.subplot(projection=ccrs.PlateCarree()); - da.plot.pcolormesh('lon', 'lat', ax=ax, infer_intervals=True); - ax.scatter(lon, lat, transform=ccrs.PlateCarree()); + ax = plt.subplot(projection=ccrs.PlateCarree()) + da.plot.pcolormesh("lon", "lat", ax=ax, infer_intervals=True) + ax.scatter(lon, lat, transform=ccrs.PlateCarree()) + ax.coastlines() @savefig plotting_example_2d_irreg_map_infer.png width=4in - ax.coastlines(); ax.gridlines(draw_labels=True); + ax.gridlines(draw_labels=True) .. note:: The data model of xarray does not support datasets with `cell boundaries`_ @@ -847,6 +909,6 @@ One can also make line plots with multidimensional coordinates. In this case, `` .. ipython:: python f, ax = plt.subplots(2, 1) - da.plot.line(x='lon', hue='y', ax=ax[0]); + da.plot.line(x="lon", hue="y", ax=ax[0]) @savefig plotting_example_2d_hue_xy.png - da.plot.line(x='lon', hue='x', ax=ax[1]); + da.plot.line(x="lon", hue="x", ax=ax[1]) \ No newline at end of file diff --git a/doc/quick-overview.rst b/doc/quick-overview.rst index 741b3d1a5fe..09b0d4c6fbb 100644 --- a/doc/quick-overview.rst +++ b/doc/quick-overview.rst @@ -22,16 +22,14 @@ array or list, with optional *dimensions* and *coordinates*: .. ipython:: python - data = xr.DataArray(np.random.randn(2, 3), - dims=('x', 'y'), - coords={'x': [10, 20]}) + data = xr.DataArray(np.random.randn(2, 3), dims=("x", "y"), coords={"x": [10, 20]}) data In this case, we have generated a 2D array, assigned the names *x* and *y* to the two dimensions respectively and associated two *coordinate labels* '10' and '20' with the two locations along the x dimension. If you supply a pandas :py:class:`~pandas.Series` or :py:class:`~pandas.DataFrame`, metadata is copied directly: .. ipython:: python - xr.DataArray(pd.Series(range(3), index=list('abc'), name='foo')) + xr.DataArray(pd.Series(range(3), index=list("abc"), name="foo")) Here are the key properties for a ``DataArray``: @@ -75,13 +73,13 @@ While you're setting up your DataArray, it's often a good idea to set metadata a .. ipython:: python - data.attrs['long_name'] = 'random velocity' - data.attrs['units'] = 'metres/sec' - data.attrs['description'] = 'A random variable created as an example.' - data.attrs['random_attribute'] = 123 + data.attrs["long_name"] = "random velocity" + data.attrs["units"] = "metres/sec" + data.attrs["description"] = "A random variable created as an example." + data.attrs["random_attribute"] = 123 data.attrs # you can add metadata to coordinates too - data.x.attrs['units'] = 'x units' + data.x.attrs["units"] = "x units" Computation @@ -102,15 +100,15 @@ numbers: .. ipython:: python - data.mean(dim='x') + data.mean(dim="x") Arithmetic operations broadcast based on dimension name. This means you don't need to insert dummy dimensions for alignment: .. ipython:: python - a = xr.DataArray(np.random.randn(3), [data.coords['y']]) - b = xr.DataArray(np.random.randn(4), dims='z') + a = xr.DataArray(np.random.randn(3), [data.coords["y"]]) + b = xr.DataArray(np.random.randn(4), dims="z") a b @@ -139,9 +137,9 @@ xarray supports grouped operations using a very similar API to pandas (see :ref: .. ipython:: python - labels = xr.DataArray(['E', 'F', 'E'], [data.coords['y']], name='labels') + labels = xr.DataArray(["E", "F", "E"], [data.coords["y"]], name="labels") labels - data.groupby(labels).mean('y') + data.groupby(labels).mean("y") data.groupby(labels).map(lambda x: x - x.min()) Plotting @@ -178,7 +176,7 @@ objects. You can think of it as a multi-dimensional generalization of the .. ipython:: python - ds = xr.Dataset({'foo': data, 'bar': ('x', [1, 2]), 'baz': np.pi}) + ds = xr.Dataset({"foo": data, "bar": ("x", [1, 2]), "baz": np.pi}) ds @@ -186,7 +184,7 @@ This creates a dataset with three DataArrays named ``foo``, ``bar`` and ``baz``. .. ipython:: python - ds['foo'] + ds["foo"] ds.foo @@ -216,14 +214,15 @@ You can directly read and write xarray objects to disk using :py:meth:`~xarray.D .. ipython:: python - ds.to_netcdf('example.nc') - xr.open_dataset('example.nc') + ds.to_netcdf("example.nc") + xr.open_dataset("example.nc") .. ipython:: python - :suppress: + :suppress: import os - os.remove('example.nc') + + os.remove("example.nc") It is common for datasets to be distributed across multiple files (commonly one file per timestep). xarray supports this use-case by providing the :py:meth:`~xarray.open_mfdataset` and the :py:meth:`~xarray.save_mfdataset` methods. For more, see :ref:`io`. diff --git a/doc/related-projects.rst b/doc/related-projects.rst index 57b8da0c447..9891f1a6bc2 100644 --- a/doc/related-projects.rst +++ b/doc/related-projects.rst @@ -62,6 +62,7 @@ Extend xarray capabilities - `eofs `_: EOF analysis in Python. - `hypothesis-gufunc `_: Extension to hypothesis. Makes it easy to write unit tests with xarray objects as input. - `nxarray `_: NeXus input/output capability for xarray. +- `xarray-custom `_: Data classes for custom xarray creation. - `xarray_extras `_: Advanced algorithms for xarray objects (e.g. integrations/interpolations). - `xpublish `_: Publish Xarray Datasets via a Zarr compatible REST API. - `xrft `_: Fourier transforms for xarray data. @@ -75,6 +76,7 @@ Visualization - `Datashader `_, `geoviews `_, `holoviews `_, : visualization packages for large data. - `hvplot `_ : A high-level plotting API for the PyData ecosystem built on HoloViews. - `psyplot `_: Interactive data visualization with python. +- `xarray-leaflet `_: An xarray extension for tiles map plotting based on ipyleaflet. Non-Python projects ~~~~~~~~~~~~~~~~~~~ diff --git a/doc/reshaping.rst b/doc/reshaping.rst index 465ca14dfc2..40de9ea799a 100644 --- a/doc/reshaping.rst +++ b/doc/reshaping.rst @@ -7,11 +7,12 @@ Reshaping and reorganizing data These methods allow you to reorganize .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) Reordering dimensions @@ -23,9 +24,9 @@ ellipsis (`...`) can be use to represent all other dimensions: .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y', 'z'), [[[42]]]), 'bar': (('y', 'z'), [[24]])}) - ds.transpose('y', 'z', 'x') - ds.transpose(..., 'x') # equivalent + ds = xr.Dataset({"foo": (("x", "y", "z"), [[[42]]]), "bar": (("y", "z"), [[24]])}) + ds.transpose("y", "z", "x") + ds.transpose(..., "x") # equivalent ds.transpose() # reverses all dimensions Expand and squeeze dimensions @@ -37,7 +38,7 @@ use :py:meth:`~xarray.DataArray.expand_dims` .. ipython:: python - expanded = ds.expand_dims('w') + expanded = ds.expand_dims("w") expanded This method attaches a new dimension with size 1 to all data variables. @@ -48,7 +49,7 @@ use :py:meth:`~xarray.DataArray.squeeze` .. ipython:: python - expanded.squeeze('w') + expanded.squeeze("w") Converting between datasets and arrays -------------------------------------- @@ -69,14 +70,14 @@ To convert back from a DataArray to a Dataset, use .. ipython:: python - arr.to_dataset(dim='variable') + arr.to_dataset(dim="variable") The broadcasting behavior of ``to_array`` means that the resulting array includes the union of data variable dimensions: .. ipython:: python - ds2 = xr.Dataset({'a': 0, 'b': ('x', [3, 4, 5])}) + ds2 = xr.Dataset({"a": 0, "b": ("x", [3, 4, 5])}) # the input dataset has 4 elements ds2 @@ -90,7 +91,7 @@ If you use ``to_dataset`` without supplying the ``dim`` argument, the DataArray .. ipython:: python - arr.to_dataset(name='combined') + arr.to_dataset(name="combined") .. _reshape.stack: @@ -103,11 +104,12 @@ implemented :py:meth:`~xarray.DataArray.stack` and .. ipython:: python - array = xr.DataArray(np.random.randn(2, 3), - coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) - stacked = array.stack(z=('x', 'y')) + array = xr.DataArray( + np.random.randn(2, 3), coords=[("x", ["a", "b"]), ("y", [0, 1, 2])] + ) + stacked = array.stack(z=("x", "y")) stacked - stacked.unstack('z') + stacked.unstack("z") As elsewhere in xarray, an ellipsis (`...`) can be used to represent all unlisted dimensions: @@ -128,15 +130,15 @@ possible levels. Missing levels are filled in with ``NaN`` in the resulting obje stacked2 = stacked[::2] stacked2 - stacked2.unstack('z') + stacked2.unstack("z") However, xarray's ``stack`` has an important difference from pandas: unlike pandas, it does not automatically drop missing values. Compare: .. ipython:: python - array = xr.DataArray([[np.nan, 1], [2, 3]], dims=['x', 'y']) - array.stack(z=('x', 'y')) + array = xr.DataArray([[np.nan, 1], [2, 3]], dims=["x", "y"]) + array.stack(z=("x", "y")) array.to_pandas().stack() We departed from pandas's behavior here because predictable shapes for new @@ -166,16 +168,15 @@ like this: .. ipython:: python - data = xr.Dataset( - data_vars={'a': (('x', 'y'), [[0, 1, 2], [3, 4, 5]]), - 'b': ('x', [6, 7])}, - coords={'y': ['u', 'v', 'w']} - ) - data - stacked = data.to_stacked_array("z", sample_dims=['x']) - stacked - unstacked = stacked.to_unstacked_dataset("z") - unstacked + data = xr.Dataset( + data_vars={"a": (("x", "y"), [[0, 1, 2], [3, 4, 5]]), "b": ("x", [6, 7])}, + coords={"y": ["u", "v", "w"]}, + ) + data + stacked = data.to_stacked_array("z", sample_dims=["x"]) + stacked + unstacked = stacked.to_unstacked_dataset("z") + unstacked In this example, ``stacked`` is a two dimensional array that we can easily pass to a scikit-learn or another generic numerical method. @@ -202,19 +203,23 @@ coordinates using :py:meth:`~xarray.DataArray.set_index`: .. ipython:: python - da = xr.DataArray(np.random.rand(4), - coords={'band': ('x', ['a', 'a', 'b', 'b']), - 'wavenumber': ('x', np.linspace(200, 400, 4))}, - dims='x') - da - mda = da.set_index(x=['band', 'wavenumber']) - mda + da = xr.DataArray( + np.random.rand(4), + coords={ + "band": ("x", ["a", "a", "b", "b"]), + "wavenumber": ("x", np.linspace(200, 400, 4)), + }, + dims="x", + ) + da + mda = da.set_index(x=["band", "wavenumber"]) + mda These coordinates can now be used for indexing, e.g., .. ipython:: python - mda.sel(band='a') + mda.sel(band="a") Conversely, you can use :py:meth:`~xarray.DataArray.reset_index` to extract multi-index levels as coordinates (this is mainly useful @@ -222,14 +227,14 @@ for serialization): .. ipython:: python - mda.reset_index('x') + mda.reset_index("x") :py:meth:`~xarray.DataArray.reorder_levels` allows changing the order of multi-index levels: .. ipython:: python - mda.reorder_levels(x=['wavenumber', 'band']) + mda.reorder_levels(x=["wavenumber", "band"]) As of xarray v0.9 coordinate labels for each dimension are optional. You can also use ``.set_index`` / ``.reset_index`` to add / remove @@ -237,12 +242,12 @@ labels for one or several dimensions: .. ipython:: python - array = xr.DataArray([1, 2, 3], dims='x') + array = xr.DataArray([1, 2, 3], dims="x") array - array['c'] = ('x', ['a', 'b', 'c']) - array.set_index(x='c') - array = array.set_index(x='c') - array = array.reset_index('x', drop=True) + array["c"] = ("x", ["a", "b", "c"]) + array.set_index(x="c") + array = array.set_index(x="c") + array = array.reset_index("x", drop=True) .. _reshape.shift_and_roll: @@ -254,9 +259,9 @@ To adjust coordinate labels, you can use the :py:meth:`~xarray.Dataset.shift` an .. ipython:: python - array = xr.DataArray([1, 2, 3, 4], dims='x') - array.shift(x=2) - array.roll(x=2, roll_coords=True) + array = xr.DataArray([1, 2, 3, 4], dims="x") + array.shift(x=2) + array.roll(x=2, roll_coords=True) .. _reshape.sort: @@ -269,17 +274,18 @@ One may sort a DataArray/Dataset via :py:meth:`~xarray.DataArray.sortby` and .. ipython:: python - ds = xr.Dataset({'A': (('x', 'y'), [[1, 2], [3, 4]]), - 'B': (('x', 'y'), [[5, 6], [7, 8]])}, - coords={'x': ['b', 'a'], 'y': [1, 0]}) - dax = xr.DataArray([100, 99], [('x', [0, 1])]) - day = xr.DataArray([90, 80], [('y', [0, 1])]) - ds.sortby([day, dax]) + ds = xr.Dataset( + {"A": (("x", "y"), [[1, 2], [3, 4]]), "B": (("x", "y"), [[5, 6], [7, 8]])}, + coords={"x": ["b", "a"], "y": [1, 0]}, + ) + dax = xr.DataArray([100, 99], [("x", [0, 1])]) + day = xr.DataArray([90, 80], [("y", [0, 1])]) + ds.sortby([day, dax]) As a shortcut, you can refer to existing coordinates by name: .. ipython:: python - ds.sortby('x') - ds.sortby(['y', 'x']) - ds.sortby(['y', 'x'], ascending=False) + ds.sortby("x") + ds.sortby(["y", "x"]) + ds.sortby(["y", "x"], ascending=False) \ No newline at end of file diff --git a/doc/time-series.rst b/doc/time-series.rst index d838dbbd4cd..96a2edc0ea5 100644 --- a/doc/time-series.rst +++ b/doc/time-series.rst @@ -10,11 +10,12 @@ data in pandas such a joy to xarray. In most cases, we rely on pandas for the core functionality. .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) Creating datetime64 data @@ -29,8 +30,8 @@ using :py:func:`pandas.to_datetime` and :py:func:`pandas.date_range`: .. ipython:: python - pd.to_datetime(['2000-01-01', '2000-02-02']) - pd.date_range('2000-01-01', periods=365) + pd.to_datetime(["2000-01-01", "2000-02-02"]) + pd.date_range("2000-01-01", periods=365) Alternatively, you can supply arrays of Python ``datetime`` objects. These get converted automatically when used as arguments in xarray objects: @@ -38,7 +39,8 @@ converted automatically when used as arguments in xarray objects: .. ipython:: python import datetime - xr.Dataset({'time': datetime.datetime(2000, 1, 1)}) + + xr.Dataset({"time": datetime.datetime(2000, 1, 1)}) When reading or writing netCDF files, xarray automatically decodes datetime and timedelta arrays using `CF conventions`_ (that is, by using a ``units`` @@ -62,8 +64,8 @@ You can manual decode arrays in this form by passing a dataset to .. ipython:: python - attrs = {'units': 'hours since 2000-01-01'} - ds = xr.Dataset({'time': ('time', [0, 1, 2, 3], attrs)}) + attrs = {"units": "hours since 2000-01-01"} + ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) xr.decode_cf(ds) One unfortunate limitation of using ``datetime64[ns]`` is that it limits the @@ -87,10 +89,10 @@ items and with the `slice` object: .. ipython:: python - time = pd.date_range('2000-01-01', freq='H', periods=365 * 24) - ds = xr.Dataset({'foo': ('time', np.arange(365 * 24)), 'time': time}) - ds.sel(time='2000-01') - ds.sel(time=slice('2000-06-01', '2000-06-10')) + time = pd.date_range("2000-01-01", freq="H", periods=365 * 24) + ds = xr.Dataset({"foo": ("time", np.arange(365 * 24)), "time": time}) + ds.sel(time="2000-01") + ds.sel(time=slice("2000-06-01", "2000-06-10")) You can also select a particular time by indexing with a :py:class:`datetime.time` object: @@ -113,8 +115,8 @@ given ``DataArray`` can be quickly computed using a special ``.dt`` accessor. .. ipython:: python - time = pd.date_range('2000-01-01', freq='6H', periods=365 * 4) - ds = xr.Dataset({'foo': ('time', np.arange(365 * 4)), 'time': time}) + time = pd.date_range("2000-01-01", freq="6H", periods=365 * 4) + ds = xr.Dataset({"foo": ("time", np.arange(365 * 4)), "time": time}) ds.time.dt.hour ds.time.dt.dayofweek @@ -130,16 +132,16 @@ __ http://pandas.pydata.org/pandas-docs/stable/api.html#time-date-components .. ipython:: python - ds['time.month'] - ds['time.dayofyear'] + ds["time.month"] + ds["time.dayofyear"] For use as a derived coordinate, xarray adds ``'season'`` to the list of datetime components supported by pandas: .. ipython:: python - ds['time.season'] - ds['time'].dt.season + ds["time.season"] + ds["time"].dt.season The set of valid seasons consists of 'DJF', 'MAM', 'JJA' and 'SON', labeled by the first letters of the corresponding months. @@ -152,7 +154,7 @@ __ http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases .. ipython:: python - ds['time'].dt.floor('D') + ds["time"].dt.floor("D") The ``.dt`` accessor can also be used to generate formatted datetime strings for arrays utilising the same formatting as the standard `datetime.strftime`_. @@ -161,7 +163,7 @@ for arrays utilising the same formatting as the standard `datetime.strftime`_. .. ipython:: python - ds['time'].dt.strftime('%a, %b %d %H:%M') + ds["time"].dt.strftime("%a, %b %d %H:%M") .. _resampling: @@ -173,9 +175,9 @@ Datetime components couple particularly well with grouped operations (see calculate the mean by time of day: .. ipython:: python - :okwarning: + :okwarning: - ds.groupby('time.hour').mean() + ds.groupby("time.hour").mean() For upsampling or downsampling temporal resolutions, xarray offers a :py:meth:`~xarray.Dataset.resample` method building on the core functionality @@ -187,25 +189,25 @@ same api as ``resample`` `in pandas`_. For example, we can downsample our dataset from hourly to 6-hourly: .. ipython:: python - :okwarning: + :okwarning: - ds.resample(time='6H') + ds.resample(time="6H") This will create a specialized ``Resample`` object which saves information necessary for resampling. All of the reduction methods which work with ``Resample`` objects can also be used for resampling: .. ipython:: python - :okwarning: + :okwarning: - ds.resample(time='6H').mean() + ds.resample(time="6H").mean() You can also supply an arbitrary reduction function to aggregate over each resampling group: .. ipython:: python - ds.resample(time='6H').reduce(np.mean) + ds.resample(time="6H").reduce(np.mean) For upsampling, xarray provides six methods: ``asfreq``, ``ffill``, ``bfill``, ``pad``, ``nearest`` and ``interpolate``. ``interpolate`` extends ``scipy.interpolate.interp1d`` @@ -218,7 +220,7 @@ Data that has indices outside of the given ``tolerance`` are set to ``NaN``. .. ipython:: python - ds.resample(time='1H').nearest(tolerance='1H') + ds.resample(time="1H").nearest(tolerance="1H") For more examples of using grouped operations on a time dimension, see diff --git a/doc/weather-climate.rst b/doc/weather-climate.rst index 768cf6556f9..f03dfd14c73 100644 --- a/doc/weather-climate.rst +++ b/doc/weather-climate.rst @@ -4,7 +4,7 @@ Weather and climate data ======================== .. ipython:: python - :suppress: + :suppress: import xarray as xr @@ -56,11 +56,14 @@ coordinate with dates from a no-leap calendar and a .. ipython:: python - from itertools import product - from cftime import DatetimeNoLeap - dates = [DatetimeNoLeap(year, month, 1) for year, month in - product(range(1, 3), range(1, 13))] - da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo') + from itertools import product + from cftime import DatetimeNoLeap + + dates = [ + DatetimeNoLeap(year, month, 1) + for year, month in product(range(1, 3), range(1, 13)) + ] + da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo") xarray also includes a :py:func:`~xarray.cftime_range` function, which enables creating a :py:class:`~xarray.CFTimeIndex` with regularly-spaced dates. For @@ -68,8 +71,17 @@ instance, we can create the same dates and DataArray we created above using: .. ipython:: python - dates = xr.cftime_range(start='0001', periods=24, freq='MS', calendar='noleap') - da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo') + dates = xr.cftime_range(start="0001", periods=24, freq="MS", calendar="noleap") + da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo") + +Mirroring pandas' method with the same name, :py:meth:`~xarray.infer_freq` allows one to +infer the sampling frequency of a :py:class:`~xarray.CFTimeIndex` or a 1-D +:py:class:`~xarray.DataArray` containing cftime objects. It also works transparently with +``np.datetime64[ns]`` and ``np.timedelta64[ns]`` data. + +.. ipython:: python + + xr.infer_freq(dates) With :py:meth:`~xarray.CFTimeIndex.strftime` we can also easily generate formatted strings from the datetime values of a :py:class:`~xarray.CFTimeIndex` directly or through the @@ -80,8 +92,8 @@ using the same formatting as the standard `datetime.strftime`_ convention . .. ipython:: python - dates.strftime('%c') - da['time'].dt.strftime('%Y%m%d') + dates.strftime("%c") + da["time"].dt.strftime("%Y%m%d") For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: @@ -90,8 +102,8 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: .. ipython:: python - da.sel(time='0001') - da.sel(time=slice('0001-05', '0002-02')) + da.sel(time="0001") + da.sel(time=slice("0001-05", "0002-02")) - Access of basic datetime components via the ``dt`` accessor (in this case just "year", "month", "day", "hour", "minute", "second", "microsecond", @@ -99,64 +111,65 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: .. ipython:: python - da.time.dt.year - da.time.dt.month - da.time.dt.season - da.time.dt.dayofyear - da.time.dt.dayofweek - da.time.dt.days_in_month + da.time.dt.year + da.time.dt.month + da.time.dt.season + da.time.dt.dayofyear + da.time.dt.dayofweek + da.time.dt.days_in_month - Rounding of datetimes to fixed frequencies via the ``dt`` accessor: .. ipython:: python - da.time.dt.ceil('3D') - da.time.dt.floor('5D') - da.time.dt.round('2D') + da.time.dt.ceil("3D") + da.time.dt.floor("5D") + da.time.dt.round("2D") - Group-by operations based on datetime accessor attributes (e.g. by month of the year): .. ipython:: python - da.groupby('time.month').sum() + da.groupby("time.month").sum() - Interpolation using :py:class:`cftime.datetime` objects: .. ipython:: python - da.interp(time=[DatetimeNoLeap(1, 1, 15), DatetimeNoLeap(1, 2, 15)]) + da.interp(time=[DatetimeNoLeap(1, 1, 15), DatetimeNoLeap(1, 2, 15)]) - Interpolation using datetime strings: .. ipython:: python - da.interp(time=['0001-01-15', '0001-02-15']) + da.interp(time=["0001-01-15", "0001-02-15"]) - Differentiation: .. ipython:: python - da.differentiate('time') + da.differentiate("time") - Serialization: .. ipython:: python - da.to_netcdf('example-no-leap.nc') - xr.open_dataset('example-no-leap.nc') + da.to_netcdf("example-no-leap.nc") + xr.open_dataset("example-no-leap.nc") .. ipython:: python :suppress: import os - os.remove('example-no-leap.nc') + + os.remove("example-no-leap.nc") - And resampling along the time dimension for data indexed by a :py:class:`~xarray.CFTimeIndex`: .. ipython:: python - da.resample(time='81T', closed='right', label='right', base=3).mean() + da.resample(time="81T", closed="right", label="right", base=3).mean() .. note:: @@ -168,13 +181,13 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: method: .. ipython:: python - :okwarning: + :okwarning: - modern_times = xr.cftime_range('2000', periods=24, freq='MS', calendar='noleap') - da = xr.DataArray(range(24), [('time', modern_times)]) + modern_times = xr.cftime_range("2000", periods=24, freq="MS", calendar="noleap") + da = xr.DataArray(range(24), [("time", modern_times)]) da - datetimeindex = da.indexes['time'].to_datetimeindex() - da['time'] = datetimeindex + datetimeindex = da.indexes["time"].to_datetimeindex() + da["time"] = datetimeindex However in this case one should use caution to only perform operations which do not depend on differences between dates (e.g. differentiation, diff --git a/doc/whats-new.rst b/doc/whats-new.rst index fc95e26dabd..086cddee0a0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -4,13 +4,14 @@ What's New ========== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xray import xarray import xarray as xr + np.random.seed(123456) .. _whats-new.0.16.0: @@ -20,16 +21,54 @@ v0.16.0 (unreleased) Breaking changes ~~~~~~~~~~~~~~~~ + +- ``groupby`` operations will restore coord dimension order. Pass ``restore_coord_dims=False`` + to revert to previous behavior. +- :meth:`DataArray.transpose` will now transpose coordinates by default. + Pass ``transpose_coords=False`` to revert to previous behaviour. + By `Maximilian Roos `_ - Alternate draw styles for :py:meth:`plot.step` must be passed using the ``drawstyle`` (or ``ds``) keyword argument, instead of the ``linestyle`` (or ``ls``) keyword argument, in line with the `upstream change in Matplotlib `_. (:pull:`3274`) By `Elliott Sales de Andrade `_ +- The old :py:func:`auto_combine` function has now been removed in + favour of the :py:func:`combine_by_coords` and + :py:func:`combine_nested` functions. This also means that + the default behaviour of :py:func:`open_mfdataset` has changed to use + ``combine='by_coords'`` as the default argument value. (:issue:`2616`, :pull:`3926`) + By `Tom Nicholas `_. +- The ``DataArray`` and ``Variable`` HTML reprs now expand the data section by + default (:issue:`4176`) + By `Stephan Hoyer `_. + +Enhancements +~~~~~~~~~~~~ +- Performance improvement of :py:meth:`DataArray.interp` and :py:func:`Dataset.interp` + For orthogonal linear- and nearest-neighbor interpolation, we do 1d-interpolation sequentially + rather than interpolating in multidimensional space. (:issue:`2223`) + By `Keisuke Fujii `_. +- :py:meth:`DataArray.reset_index` and :py:meth:`Dataset.reset_index` now keep + coordinate attributes (:pull:`4103`). By `Oriol Abril `_. New Features ~~~~~~~~~~~~ -- Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`) +- :py:meth:`DataArray.argmin` and :py:meth:`DataArray.argmax` now support + sequences of 'dim' arguments, and if a sequence is passed return a dict + (which can be passed to :py:meth:`isel` to get the value of the minimum) of + the indices for each dimension of the minimum or maximum of a DataArray. + (:pull:`3936`) + By `John Omotani `_, thanks to `Keisuke Fujii + `_ for work in :pull:`1469`. +- Added :py:meth:`xarray.infer_freq` for extending frequency inferring to CFTime indexes and data (:pull:`4033`). + By `Pascal Bourgault `_. +- ``chunks='auto'`` is now supported in the ``chunks`` argument of + :py:meth:`Dataset.chunk`. (:issue:`4055`) + By `Andrew Williams `_ +- Added :py:func:`xarray.cov` and :py:func:`xarray.corr` (:issue:`3784`, :pull:`3550`, :pull:`4089`). + By `Andrew Williams `_ and `Robin Beer `_. +- Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`, :pull:`3733`, :pull:`4099`) By `Pascal Bourgault `_. - Control over attributes of result in :py:func:`merge`, :py:func:`concat`, :py:func:`combine_by_coords` and :py:func:`combine_nested` using @@ -43,11 +82,24 @@ New Features - Limited the length of array items with long string reprs to a reasonable width (:pull:`3900`) By `Maximilian Roos `_ +- Limited the number of lines of large arrays when numpy reprs would have greater than 40. + (:pull:`3905`) + By `Maximilian Roos `_ - Implement :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:issue:`60`, :pull:`3871`) By `Todd Jennings `_ +- Support dask handling for :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, + :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:pull:`3922`, :pull:`4135`) + By `Kai Mühlbauer `_ and `Pascal Bourgault `_. +- More support for unit aware arrays with pint (:pull:`3643`, :pull:`3975`) + By `Justus Magin `_. +- Support overriding existing variables in ``to_zarr()`` with ``mode='a'`` even + without ``append_dim``, as long as dimension sizes do not change. + By `Stephan Hoyer `_. - Allow plotting of boolean arrays. (:pull:`3766`) By `Marek Jacob `_ +- Enable using MultiIndex levels as cordinates in 1D and 2D plots (:issue:`3927`). + By `Mathias Hauser `_. - A ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex`, analogous to the ``days_in_month`` accessor for a :py:class:`pandas.DatetimeIndex`, which returns the days in the month each datetime in the index. Now days in month @@ -55,9 +107,32 @@ New Features the :py:class:`~core.accessor_dt.DatetimeAccessor` (:pull:`3935`). This feature requires cftime version 1.1.0 or greater. By `Spencer Clark `_. +- For the netCDF3 backend, added dtype coercions for unsigned integer types. + (:issue:`4014`, :pull:`4018`) + By `Yunus Sevinchan `_ +- :py:meth:`map_blocks` now accepts a ``template`` kwarg. This allows use cases + where the result of a computation could not be inferred automatically. + By `Deepak Cherian `_ +- :py:meth:`map_blocks` can now handle dask-backed xarray objects in ``args``. (:pull:`3818`) + By `Deepak Cherian `_ +- Add keyword ``decode_timedelta`` to :py:func:`xarray.open_dataset`, + (:py:func:`xarray.open_dataarray`, :py:func:`xarray.open_dataarray`, + :py:func:`xarray.decode_cf`) that allows to disable/enable the decoding of timedeltas + independently of time decoding (:issue:`1621`) + `Aureliana Barghini ` Bug fixes ~~~~~~~~~ +- Fix errors combining attrs in :py:func:`open_mfdataset` (:issue:`4009`, :pull:`4173`) + By `John Omotani `_ +- If groupby receives a ``DataArray`` with name=None, assign a default name (:issue:`158`) + By `Phil Butcher `_. +- Support dark mode in VS code (:issue:`4024`) + By `Keisuke Fujii `_. +- Fix bug when converting multiindexed Pandas objects to sparse xarray objects. (:issue:`4019`) + By `Deepak Cherian `_. +- ``ValueError`` is raised when ``fill_value`` is not a scalar in :py:meth:`full_like`. (:issue:`3977`) + By `Huite Bootsma `_. - Fix wrong order in converting a ``pd.Series`` with a MultiIndex to ``DataArray``. (:issue:`3951`) By `Keisuke Fujii `_. - Fix renaming of coords when one or more stacked coords is not in @@ -72,15 +147,28 @@ Bug fixes By `Deepak Cherian `_ - Fix :py:class:`~xarray.plot.FacetGrid` when ``vmin == vmax``. (:issue:`3734`) By `Deepak Cherian `_ +- Fix plotting when ``levels`` is a scalar and ``norm`` is provided. (:issue:`3735`) + By `Deepak Cherian `_ - Fix bug where plotting line plots with 2D coordinates depended on dimension order. (:issue:`3933`) By `Tom Nicholas `_. - Fix ``RasterioDeprecationWarning`` when using a ``vrt`` in ``open_rasterio``. (:issue:`3964`) By `Taher Chegini `_. +- Fix ``AttributeError`` on displaying a :py:class:`Variable` + in a notebook context. (:issue:`3972`, :pull:`3973`) + By `Ian Castleden `_. - Fix bug causing :py:meth:`DataArray.interpolate_na` to always drop attributes, and added `keep_attrs` argument. (:issue:`3968`) By `Tom Nicholas `_. - +- Fix bug in time parsing failing to fall back to cftime. This was causing time + variables with a time unit of `'msecs'` to fail to parse. (:pull:`3998`) + By `Ryan May `_. +- Fix weighted mean when passing boolean weights (:issue:`4074`). + By `Mathias Hauser `_. +- Fix html repr in untrusted notebooks: fallback to plain text repr. (:pull:`4053`) + By `Benoit Bovy `_. +- Fix :py:func:`open_rasterio` for ``WarpedVRT`` with specified ``src_crs``. (:pull:`4104`) + By `Dave Cole `_. Documentation ~~~~~~~~~~~~~ @@ -102,6 +190,14 @@ Documentation of ``kwargs`` in :py:meth:`Dataset.interp` and :py:meth:`DataArray.interp` for 1-d and n-d interpolation (:pull:`3956`). By `Matthias Riße `_. +- Apply ``black`` to all the code in the documentation (:pull:`4012`) + By `Justus Magin `_. +- Narrative documentation now describes :py:meth:`map_blocks`: :ref:`dask.automatic-parallelization`. + By `Deepak Cherian `_. +- Document ``.plot``, ``.dt``, ``.str`` accessors the way they are called. (:issue:`3625`, :pull:`3988`) + By `Justus Magin `_. +- Add documentation for the parameters and return values of :py:meth:`DataArray.sel`. + By `Justus Magin `_. Internal Changes ~~~~~~~~~~~~~~~~ @@ -110,12 +206,17 @@ Internal Changes - Run the ``isort`` pre-commit hook only on python source files and update the ``flake8`` version. (:issue:`3750`, :pull:`3711`) By `Justus Magin `_. +- Add `blackdoc `_ to the list of + checkers for development. (:pull:`4177`) + By `Justus Magin `_. - Add a CI job that runs the tests with every optional dependency except ``dask``. (:issue:`3794`, :pull:`3919`) By `Justus Magin `_. - Use ``async`` / ``await`` for the asynchronous distributed tests. (:issue:`3987`, :pull:`3989`) By `Justus Magin `_. +- Various internal code clean-ups (:pull:`4026`, :pull:`4038`). + By `Prajjwal Nijhara `_. .. _whats-new.0.15.1: @@ -140,7 +241,7 @@ New Features - Weighted array reductions are now supported via the new :py:meth:`DataArray.weighted` and :py:meth:`Dataset.weighted` methods. See :ref:`comput.weighted`. (:issue:`422`, :pull:`2922`). - By `Mathias Hauser `_ + By `Mathias Hauser `_. - The new jupyter notebook repr (``Dataset._repr_html_`` and ``DataArray._repr_html_``) (introduced in 0.14.1) is now on by default. To disable, use ``xarray.set_options(display_style="text")``. @@ -174,6 +275,8 @@ New Features :py:meth:`core.groupby.DatasetGroupBy.quantile`, :py:meth:`core.groupby.DataArrayGroupBy.quantile` (:issue:`3843`, :pull:`3844`) By `Aaron Spring `_. +- Add a diff summary for `testing.assert_allclose`. (:issue:`3617`, :pull:`3847`) + By `Justus Magin `_. Bug fixes ~~~~~~~~~ @@ -1958,8 +2061,8 @@ Enhancements .. ipython:: python - ds = xr.Dataset({'a': 1}) - np.sin(ds) + ds = xr.Dataset({"a": 1}) + np.sin(ds) This obliviates the need for the ``xarray.ufuncs`` module, which will be deprecated in the future when xarray drops support for older versions of @@ -2050,8 +2153,8 @@ Enhancements .. ipython:: python - da = xr.DataArray(np.array([True, False, np.nan], dtype=object), dims='x') - da.sum() + da = xr.DataArray(np.array([True, False, np.nan], dtype=object), dims="x") + da.sum() (:issue:`1866`) By `Keisuke Fujii `_. @@ -2205,7 +2308,7 @@ Breaking changes .. ipython:: :verbatim: - In [1]: ds.resample('24H', dim='time', how='max') + In [1]: ds.resample("24H", dim="time", how="max") Out[1]: [...] @@ -2215,7 +2318,7 @@ Breaking changes .. ipython:: :verbatim: - In [1]: ds.resample(time='24H').max() + In [1]: ds.resample(time="24H").max() Out[1]: [...] @@ -2285,9 +2388,9 @@ Enhancements In [1]: import xarray as xr - In [2]: arr = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=('x', 'y')) + In [2]: arr = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=("x", "y")) - In [3]: xr.where(arr % 2, 'even', 'odd') + In [3]: xr.where(arr % 2, "even", "odd") Out[3]: array([['even', 'odd', 'even'], @@ -2808,7 +2911,7 @@ Breaking changes .. ipython:: :verbatim: - In [1]: xr.Dataset({'foo': (('x', 'y'), [[1, 2]])}) + In [1]: xr.Dataset({"foo": (("x", "y"), [[1, 2]])}) Out[1]: Dimensions: (x: 1, y: 2) @@ -3265,10 +3368,10 @@ Enhancements .. ipython:: :verbatim: - In [1]: import xarray as xr; import numpy as np + In [1]: import xarray as xr + ...: import numpy as np - In [2]: arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), - dims=('x', 'y')) + In [2]: arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), dims=("x", "y")) In [3]: arr Out[3]: @@ -3407,7 +3510,7 @@ Breaking changes .. ipython:: :verbatim: - In [2]: xray.DataArray([4, 5, 6], dims='x', name='x') + In [2]: xray.DataArray([4, 5, 6], dims="x", name="x") Out[2]: array([4, 5, 6]) @@ -3419,7 +3522,7 @@ Breaking changes .. ipython:: :verbatim: - In [2]: xray.DataArray([4, 5, 6], dims='x', name='x') + In [2]: xray.DataArray([4, 5, 6], dims="x", name="x") Out[2]: array([4, 5, 6]) @@ -3442,13 +3545,11 @@ Enhancements .. ipython:: :verbatim: - In [7]: df = pd.DataFrame({'foo': range(3), - ...: 'x': ['a', 'b', 'b'], - ...: 'y': [0, 0, 1]}) + In [7]: df = pd.DataFrame({"foo": range(3), "x": ["a", "b", "b"], "y": [0, 0, 1]}) - In [8]: s = df.set_index(['x', 'y'])['foo'] + In [8]: s = df.set_index(["x", "y"])["foo"] - In [12]: arr = xray.DataArray(s, dims='z') + In [12]: arr = xray.DataArray(s, dims="z") In [13]: arr Out[13]: @@ -3457,13 +3558,13 @@ Enhancements Coordinates: * z (z) object ('a', 0) ('b', 0) ('b', 1) - In [19]: arr.indexes['z'] + In [19]: arr.indexes["z"] Out[19]: MultiIndex(levels=[[u'a', u'b'], [0, 1]], labels=[[0, 1, 1], [0, 0, 1]], names=[u'x', u'y']) - In [14]: arr.unstack('z') + In [14]: arr.unstack("z") Out[14]: array([[ 0., nan], @@ -3472,7 +3573,7 @@ Enhancements * x (x) object 'a' 'b' * y (y) int64 0 1 - In [26]: arr.unstack('z').stack(z=('x', 'y')) + In [26]: arr.unstack("z").stack(z=("x", "y")) Out[26]: array([ 0., nan, 1., 2.]) @@ -3500,9 +3601,9 @@ Enhancements for shifting/rotating datasets or arrays along a dimension: .. ipython:: python - :okwarning: + :okwarning: - array = xray.DataArray([5, 6, 7, 8], dims='x') + array = xray.DataArray([5, 6, 7, 8], dims="x") array.shift(x=2) array.roll(x=2) @@ -3517,8 +3618,8 @@ Enhancements .. ipython:: python - a = xray.DataArray([1, 2, 3], dims='x') - b = xray.DataArray([5, 6], dims='y') + a = xray.DataArray([1, 2, 3], dims="x") + b = xray.DataArray([5, 6], dims="y") a b a2, b2 = xray.broadcast(a, b) @@ -3588,9 +3689,9 @@ Enhancements .. ipython:: :verbatim: - In [5]: array = xray.DataArray([1, 2, 3], dims='x') + In [5]: array = xray.DataArray([1, 2, 3], dims="x") - In [6]: array.reindex(x=[0.9, 1.5], method='nearest', tolerance=0.2) + In [6]: array.reindex(x=[0.9, 1.5], method="nearest", tolerance=0.2) Out[6]: array([ 2., nan]) @@ -3670,10 +3771,11 @@ Enhancements .. ipython:: :verbatim: - In [1]: da = xray.DataArray(np.arange(56).reshape((7, 8)), - ...: coords={'x': list('abcdefg'), - ...: 'y': 10 * np.arange(8)}, - ...: dims=['x', 'y']) + In [1]: da = xray.DataArray( + ...: np.arange(56).reshape((7, 8)), + ...: coords={"x": list("abcdefg"), "y": 10 * np.arange(8)}, + ...: dims=["x", "y"], + ...: ) In [2]: da Out[2]: @@ -3690,7 +3792,7 @@ Enhancements * x (x) |S1 'a' 'b' 'c' 'd' 'e' 'f' 'g' # we can index by position along each dimension - In [3]: da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim='points') + In [3]: da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim="points") Out[3]: array([ 0, 9, 48]) @@ -3700,7 +3802,7 @@ Enhancements * points (points) int64 0 1 2 # or equivalently by label - In [9]: da.sel_points(x=['a', 'b', 'g'], y=[0, 10, 0], dim='points') + In [9]: da.sel_points(x=["a", "b", "g"], y=[0, 10, 0], dim="points") Out[9]: array([ 0, 9, 48]) @@ -3714,11 +3816,11 @@ Enhancements .. ipython:: python - ds = xray.Dataset(coords={'x': range(100), 'y': range(100)}) - ds['distance'] = np.sqrt(ds.x ** 2 + ds.y ** 2) + ds = xray.Dataset(coords={"x": range(100), "y": range(100)}) + ds["distance"] = np.sqrt(ds.x ** 2 + ds.y ** 2) - @savefig where_example.png width=4in height=4in - ds.distance.where(ds.distance < 100).plot() + @savefig where_example.png width=4in height=4in + ds.distance.where(ds.distance < 100).plot() - Added new methods ``xray.DataArray.diff`` and ``xray.Dataset.diff`` for finite difference calculations along a given axis. @@ -3728,9 +3830,9 @@ Enhancements .. ipython:: python - da = xray.DataArray(np.random.random_sample(size=(5, 4))) - da.where(da < 0.5) - da.where(da < 0.5).to_masked_array(copy=True) + da = xray.DataArray(np.random.random_sample(size=(5, 4))) + da.where(da < 0.5) + da.where(da < 0.5).to_masked_array(copy=True) - Added new flag "drop_variables" to ``xray.open_dataset`` for excluding variables from being parsed. This may be useful to drop @@ -3788,9 +3890,9 @@ Enhancements .. ipython:: :verbatim: - In [1]: years, datasets = zip(*ds.groupby('time.year')) + In [1]: years, datasets = zip(*ds.groupby("time.year")) - In [2]: paths = ['%s.nc' % y for y in years] + In [2]: paths = ["%s.nc" % y for y in years] In [3]: xray.save_mfdataset(datasets, paths) @@ -3863,9 +3965,9 @@ Backwards incompatible changes .. ipython:: :verbatim: - In [1]: ds = xray.Dataset({'x': 0}) + In [1]: ds = xray.Dataset({"x": 0}) - In [2]: xray.concat([ds, ds], dim='y') + In [2]: xray.concat([ds, ds], dim="y") Out[2]: Dimensions: () @@ -3877,13 +3979,13 @@ Backwards incompatible changes Now, the default always concatenates data variables: .. ipython:: python - :suppress: + :suppress: - ds = xray.Dataset({'x': 0}) + ds = xray.Dataset({"x": 0}) .. ipython:: python - xray.concat([ds, ds], dim='y') + xray.concat([ds, ds], dim="y") To obtain the old behavior, supply the argument ``concat_over=[]``. @@ -3896,17 +3998,20 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'a': 1, 'b': ('x', [1, 2, 3])}, - coords={'c': 42}, attrs={'Conventions': 'None'}) + ds = xray.Dataset( + {"a": 1, "b": ("x", [1, 2, 3])}, + coords={"c": 42}, + attrs={"Conventions": "None"}, + ) ds.to_array() - ds.to_array().to_dataset(dim='variable') + ds.to_array().to_dataset(dim="variable") - New ``xray.Dataset.fillna`` method to fill missing values, modeled off the pandas method of the same name: .. ipython:: python - array = xray.DataArray([np.nan, 1, np.nan, 3], dims='x') + array = xray.DataArray([np.nan, 1, np.nan, 3], dims="x") array.fillna(0) ``fillna`` works on both ``Dataset`` and ``DataArray`` objects, and uses @@ -3919,9 +4024,9 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'y': ('x', [1, 2, 3])}) - ds.assign(z = lambda ds: ds.y ** 2) - ds.assign_coords(z = ('x', ['a', 'b', 'c'])) + ds = xray.Dataset({"y": ("x", [1, 2, 3])}) + ds.assign(z=lambda ds: ds.y ** 2) + ds.assign_coords(z=("x", ["a", "b", "c"])) These methods return a new Dataset (or DataArray) with updated data or coordinate variables. @@ -3934,7 +4039,7 @@ Enhancements .. ipython:: :verbatim: - In [12]: ds.sel(x=1.1, method='nearest') + In [12]: ds.sel(x=1.1, method="nearest") Out[12]: Dimensions: () @@ -3943,7 +4048,7 @@ Enhancements Data variables: y int64 2 - In [13]: ds.sel(x=[1.1, 2.1], method='pad') + In [13]: ds.sel(x=[1.1, 2.1], method="pad") Out[13]: Dimensions: (x: 2) @@ -3969,7 +4074,7 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'x': np.arange(1000)}) + ds = xray.Dataset({"x": np.arange(1000)}) with xray.set_options(display_width=40): print(ds) @@ -4007,42 +4112,42 @@ Enhancements need to supply the time dimension explicitly: .. ipython:: python - :verbatim: + :verbatim: - time = pd.date_range('2000-01-01', freq='6H', periods=10) - array = xray.DataArray(np.arange(10), [('time', time)]) - array.resample('1D', dim='time') + time = pd.date_range("2000-01-01", freq="6H", periods=10) + array = xray.DataArray(np.arange(10), [("time", time)]) + array.resample("1D", dim="time") You can specify how to do the resampling with the ``how`` argument and other options such as ``closed`` and ``label`` let you control labeling: .. ipython:: python - :verbatim: + :verbatim: - array.resample('1D', dim='time', how='sum', label='right') + array.resample("1D", dim="time", how="sum", label="right") If the desired temporal resolution is higher than the original data (upsampling), xray will insert missing values: .. ipython:: python - :verbatim: + :verbatim: - array.resample('3H', 'time') + array.resample("3H", "time") - ``first`` and ``last`` methods on groupby objects let you take the first or last examples from each group along the grouped axis: .. ipython:: python - :verbatim: + :verbatim: - array.groupby('time.day').first() + array.groupby("time.day").first() These methods combine well with ``resample``: .. ipython:: python - :verbatim: + :verbatim: - array.resample('1D', dim='time', how='first') + array.resample("1D", dim="time", how="first") - ``xray.Dataset.swap_dims`` allows for easily swapping one dimension @@ -4050,9 +4155,9 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'x': range(3), 'y': ('x', list('abc'))}) - ds - ds.swap_dims({'x': 'y'}) + ds = xray.Dataset({"x": range(3), "y": ("x", list("abc"))}) + ds + ds.swap_dims({"x": "y"}) This was possible in earlier versions of xray, but required some contortions. - ``xray.open_dataset`` and ``xray.Dataset.to_netcdf`` now @@ -4098,8 +4203,8 @@ Breaking changes .. ipython:: python - lhs = xray.DataArray([1, 2, 3], [('x', [0, 1, 2])]) - rhs = xray.DataArray([2, 3, 4], [('x', [1, 2, 3])]) + lhs = xray.DataArray([1, 2, 3], [("x", [0, 1, 2])]) + rhs = xray.DataArray([2, 3, 4], [("x", [1, 2, 3])]) lhs + rhs :ref:`For dataset construction and merging`, we align based on the @@ -4107,14 +4212,14 @@ Breaking changes .. ipython:: python - xray.Dataset({'foo': lhs, 'bar': rhs}) + xray.Dataset({"foo": lhs, "bar": rhs}) :ref:`For update and __setitem__`, we align based on the **original** object: .. ipython:: python - lhs.coords['rhs'] = rhs + lhs.coords["rhs"] = rhs lhs - Aggregations like ``mean`` or ``median`` now skip missing values by default: @@ -4137,8 +4242,8 @@ Breaking changes .. ipython:: python - a = xray.DataArray([1, 2], coords={'c': 0}, dims='x') - b = xray.DataArray([1, 2], coords={'c': ('x', [0, 0])}, dims='x') + a = xray.DataArray([1, 2], coords={"c": 0}, dims="x") + b = xray.DataArray([1, 2], coords={"c": ("x", [0, 0])}, dims="x") (a + b).coords This functionality can be controlled through the ``compat`` option, which @@ -4149,9 +4254,10 @@ Breaking changes .. ipython:: python - time = xray.DataArray(pd.date_range('2000-01-01', periods=365), - dims='time', name='time') - counts = time.groupby('time.month').count() + time = xray.DataArray( + pd.date_range("2000-01-01", periods=365), dims="time", name="time" + ) + counts = time.groupby("time.month").count() counts.sel(month=2) Previously, you would need to use something like @@ -4161,8 +4267,8 @@ Breaking changes .. ipython:: python - ds = xray.Dataset({'t': pd.date_range('2000-01-01', periods=12, freq='M')}) - ds['t.season'] + ds = xray.Dataset({"t": pd.date_range("2000-01-01", periods=12, freq="M")}) + ds["t.season"] Previously, it returned numbered seasons 1 through 4. - We have updated our use of the terms of "coordinates" and "variables". What @@ -4185,8 +4291,8 @@ Enhancements .. ipython:: python - data = xray.DataArray([1, 2, 3], [('x', range(3))]) - data.reindex(x=[0.5, 1, 1.5, 2, 2.5], method='pad') + data = xray.DataArray([1, 2, 3], [("x", range(3))]) + data.reindex(x=[0.5, 1, 1.5, 2, 2.5], method="pad") This will be especially useful once pandas 0.16 is released, at which point xray will immediately support reindexing with @@ -4205,15 +4311,15 @@ Enhancements makes it easy to drop explicitly listed variables or index labels: .. ipython:: python - :okwarning: + :okwarning: # drop variables - ds = xray.Dataset({'x': 0, 'y': 1}) - ds.drop('x') + ds = xray.Dataset({"x": 0, "y": 1}) + ds.drop("x") # drop index labels - arr = xray.DataArray([1, 2, 3], coords=[('x', list('abc'))]) - arr.drop(['a', 'c'], dim='x') + arr = xray.DataArray([1, 2, 3], coords=[("x", list("abc"))]) + arr.drop(["a", "c"], dim="x") - ``xray.Dataset.broadcast_equals`` has been added to correspond to the new ``compat`` option. @@ -4281,7 +4387,8 @@ Backwards incompatible changes .. ipython:: python from datetime import datetime - xray.Dataset({'t': [datetime(2000, 1, 1)]}) + + xray.Dataset({"t": [datetime(2000, 1, 1)]}) - xray now has support (including serialization to netCDF) for :py:class:`~pandas.TimedeltaIndex`. :py:class:`datetime.timedelta` objects @@ -4297,8 +4404,8 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'tmin': ([], 25, {'units': 'celsius'})}) - ds.tmin.units + ds = xray.Dataset({"tmin": ([], 25, {"units": "celsius"})}) + ds.tmin.units Tab-completion for these variables should work in editors such as IPython. However, setting variables or attributes in this fashion is not yet @@ -4308,7 +4415,7 @@ Enhancements .. ipython:: python - array = xray.DataArray(np.zeros(5), dims=['x']) + array = xray.DataArray(np.zeros(5), dims=["x"]) array[dict(x=slice(3))] = 1 array diff --git a/xarray/__init__.py b/xarray/__init__.py index 0fead57e5fb..3886edc60e6 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -13,11 +13,12 @@ from .backends.zarr import open_zarr from .coding.cftime_offsets import cftime_range from .coding.cftimeindex import CFTimeIndex +from .coding.frequencies import infer_freq from .conventions import SerializationWarning, decode_cf from .core.alignment import align, broadcast -from .core.combine import auto_combine, combine_by_coords, combine_nested +from .core.combine import combine_by_coords, combine_nested from .core.common import ALL_DIMS, full_like, ones_like, zeros_like -from .core.computation import apply_ufunc, dot, polyval, where +from .core.computation import apply_ufunc, corr, cov, dot, polyval, where from .core.concat import concat from .core.dataarray import DataArray from .core.dataset import Dataset @@ -46,7 +47,6 @@ "align", "apply_ufunc", "as_variable", - "auto_combine", "broadcast", "cftime_range", "combine_by_coords", @@ -54,7 +54,10 @@ "concat", "decode_cf", "dot", + "cov", + "corr", "full_like", + "infer_freq", "load_dataarray", "load_dataset", "map_blocks", diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 7e81870d653..610731568df 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -5,7 +5,6 @@ from io import BytesIO from numbers import Number from pathlib import Path -from textwrap import dedent from typing import ( TYPE_CHECKING, Callable, @@ -24,7 +23,6 @@ from ..core.combine import ( _infer_concat_order_from_positions, _nested_combine, - auto_combine, combine_by_coords, ) from ..core.dataarray import DataArray @@ -304,6 +302,7 @@ def open_dataset( drop_variables=None, backend_kwargs=None, use_cftime=None, + decode_timedelta=None, ): """Open and decode a dataset from a file or file-like object. @@ -384,9 +383,11 @@ def open_dataset( represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. - overwrite_encoded_chunks: bool, optional - Whether to drop the zarr chunks encoded for each variable when a - dataset is loaded with specified chunk sizes (default: False) + decode_timedelta : bool, optional + If True, decode variables and coordinates with time units in + {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of decode_time. Returns @@ -442,6 +443,7 @@ def open_dataset( decode_times = False concat_characters = False decode_coords = False + decode_timedelta = False if cache is None: cache = chunks is None @@ -458,6 +460,7 @@ def maybe_decode_store(store, lock=False): decode_coords=decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, + decode_timedelta=decode_timedelta, ) _protect_dataset_variables_inplace(ds, cache) @@ -470,16 +473,14 @@ def maybe_decode_store(store, lock=False): if isinstance(filename_or_obj, AbstractDataStore): store = filename_or_obj - if isinstance(filename_or_obj, MutableMapping): - if engine == 'zarr': - # on ZarrStore, mode='r', synchronizer=None, group=None, - # consolidated=False. - overwrite_encoded_chunks = backend_kwargs.pop("overwrite_encoded_chunks", None) - store = backends.ZarrStore.open_group( - filename_or_obj, - group=group, - **backend_kwargs - ) + if isinstance(filename_or_obj, MutableMapping) and engine == "zarr": + # on ZarrStore, mode='r', synchronizer=None, group=None, + # consolidated=False. + _backend_kwargs = backend_kwargs.copy() + overwrite_encoded_chunks = _backend_kwargs.pop("overwrite_encoded_chunks", None) + store = backends.ZarrStore.open_group( + filename_or_obj, group=group, **_backend_kwargs + ) elif isinstance(filename_or_obj, str): filename_or_obj = _normalize_path(filename_or_obj) @@ -508,14 +509,15 @@ def maybe_decode_store(store, lock=False): store = backends.CfGribDataStore( filename_or_obj, lock=lock, **backend_kwargs ) - elif engine == 'zarr': + elif engine == "zarr": # on ZarrStore, mode='r', synchronizer=None, group=None, # consolidated=False. - overwrite_encoded_chunks = backend_kwargs.pop("overwrite_encoded_chunks", None) + _backend_kwargs = backend_kwargs.copy() + overwrite_encoded_chunks = _backend_kwargs.pop( + "overwrite_encoded_chunks", None + ) store = backends.ZarrStore.open_group( - filename_or_obj, - group=group, - **backend_kwargs + filename_or_obj, group=group, **_backend_kwargs ) else: if engine not in [None, "scipy", "h5netcdf"]: @@ -541,7 +543,8 @@ def maybe_decode_store(store, lock=False): if chunks is not None: from dask.base import tokenize - if engine != 'zarr': + + if engine != "zarr": # if passed an actual file path, augment the token with # the file modification time @@ -579,7 +582,10 @@ def maybe_decode_store(store, lock=False): if isinstance(chunks, int): chunks = dict.fromkeys(ds.dims, chunks) - variables = {k: backends.ZarrStore.open_group.maybe_chunk(k, v, chunks) for k, v in ds.variables.items()} + variables = { + k: store.maybe_chunk(k, v, chunks, overwrite_encoded_chunks) + for k, v in ds.variables.items() + } ds2 = ds._replace_vars_and_dims(variables) return ds2 else: @@ -603,6 +609,7 @@ def open_dataarray( drop_variables=None, backend_kwargs=None, use_cftime=None, + decode_timedelta=None, ): """Open an DataArray from a file or file-like object containing a single data variable. @@ -682,6 +689,11 @@ def open_dataarray( represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. + decode_timedelta : bool, optional + If True, decode variables and coordinates with time units in + {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of decode_time. Notes ----- @@ -713,6 +725,7 @@ def open_dataarray( drop_variables=drop_variables, backend_kwargs=backend_kwargs, use_cftime=use_cftime, + decode_timedelta=decode_timedelta, ) if len(dataset.data_vars) != 1: @@ -752,14 +765,14 @@ def close(self): def open_mfdataset( paths, chunks=None, - concat_dim="_not_supplied", + concat_dim=None, compat="no_conflicts", preprocess=None, engine=None, lock=None, data_vars="all", coords="different", - combine="_old_auto", + combine="by_coords", autoclose=None, parallel=False, join="outer", @@ -772,9 +785,8 @@ def open_mfdataset( the datasets into one before returning the result, and if combine='nested' then ``combine_nested`` is used. The filepaths must be structured according to which combining function is used, the details of which are given in the documentation for - ``combine_by_coords`` and ``combine_nested``. By default the old (now deprecated) - ``auto_combine`` will be used, please specify either ``combine='by_coords'`` or - ``combine='nested'`` in future. Requires dask to be installed. See documentation for + ``combine_by_coords`` and ``combine_nested``. By default ``combine='by_coords'`` + will be used. Requires dask to be installed. See documentation for details on dask [1]_. Global attributes from the ``attrs_file`` are used for the combined dataset. @@ -784,7 +796,7 @@ def open_mfdataset( Either a string glob in the form ``"path/to/my/files/*.nc"`` or an explicit list of files to open. Paths can be given as strings or as pathlib Paths. If concatenation along more than one dimension is desired, then ``paths`` must be a - nested list-of-lists (see ``manual_combine`` for details). (A string glob will + nested list-of-lists (see ``combine_nested`` for details). (A string glob will be expanded to a 1-dimensional list.) chunks : int or dict, optional Dictionary with keys given by dimension names and values given by chunk sizes. @@ -794,15 +806,16 @@ def open_mfdataset( see the full documentation for more details [2]_. concat_dim : str, or list of str, DataArray, Index or None, optional Dimensions to concatenate files along. You only need to provide this argument - if any of the dimensions along which you want to concatenate is not a dimension - in the original datasets, e.g., if you want to stack a collection of 2D arrays - along a third dimension. Set ``concat_dim=[..., None, ...]`` explicitly to - disable concatenation along a particular dimension. + if ``combine='by_coords'``, and if any of the dimensions along which you want to + concatenate is not a dimension in the original datasets, e.g., if you want to + stack a collection of 2D arrays along a third dimension. Set + ``concat_dim=[..., None, ...]`` explicitly to disable concatenation along a + particular dimension. Default is None, which for a 1D list of filepaths is + equivalent to opening the files separately and then merging them with + ``xarray.merge``. combine : {'by_coords', 'nested'}, optional Whether ``xarray.combine_by_coords`` or ``xarray.combine_nested`` is used to - combine all the data. If this argument is not provided, `xarray.auto_combine` is - used, but in the future this behavior will switch to use - `xarray.combine_by_coords` by default. + combine all the data. Default is to use ``xarray.combine_by_coords``. compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts', 'override'}, optional String indicating how to compare variables of the same name for @@ -895,7 +908,6 @@ def open_mfdataset( -------- combine_by_coords combine_nested - auto_combine open_dataset References @@ -923,11 +935,8 @@ def open_mfdataset( # If combine='nested' then this creates a flat list which is easier to # iterate over, while saving the originally-supplied structure as "ids" if combine == "nested": - if str(concat_dim) == "_not_supplied": - raise ValueError("Must supply concat_dim when using " "combine='nested'") - else: - if isinstance(concat_dim, (str, DataArray)) or concat_dim is None: - concat_dim = [concat_dim] + if isinstance(concat_dim, (str, DataArray)) or concat_dim is None: + concat_dim = [concat_dim] combined_ids_paths = _infer_concat_order_from_positions(paths) ids, paths = (list(combined_ids_paths.keys()), list(combined_ids_paths.values())) @@ -959,30 +968,7 @@ def open_mfdataset( # Combine all datasets, closing them in case of a ValueError try: - if combine == "_old_auto": - # Use the old auto_combine for now - # Remove this after deprecation cycle from #2616 is complete - basic_msg = dedent( - """\ - In xarray version 0.15 the default behaviour of `open_mfdataset` - will change. To retain the existing behavior, pass - combine='nested'. To use future default behavior, pass - combine='by_coords'. See - http://xarray.pydata.org/en/stable/combining.html#combining-multi - """ - ) - warnings.warn(basic_msg, FutureWarning, stacklevel=2) - - combined = auto_combine( - datasets, - concat_dim=concat_dim, - compat=compat, - data_vars=data_vars, - coords=coords, - join=join, - from_openmfds=True, - ) - elif combine == "nested": + if combine == "nested": # Combined nested list by successive concat and merge operations # along each dimension, using structure given by "ids" combined = _nested_combine( @@ -993,12 +979,18 @@ def open_mfdataset( coords=coords, ids=ids, join=join, + combine_attrs="drop", ) elif combine == "by_coords": # Redo ordering from coordinates, ignoring how they were ordered # previously combined = combine_by_coords( - datasets, compat=compat, data_vars=data_vars, coords=coords, join=join + datasets, + compat=compat, + data_vars=data_vars, + coords=coords, + join=join, + combine_attrs="drop", ) else: raise ValueError( @@ -1321,18 +1313,35 @@ def _validate_append_dim_and_encoding( return if append_dim: if append_dim not in ds.dims: - raise ValueError(f"{append_dim} not a valid dimension in the Dataset") - for data_var in ds_to_append: - if data_var in ds: - if append_dim is None: + raise ValueError( + f"append_dim={append_dim!r} does not match any existing " + f"dataset dimensions {ds.dims}" + ) + for var_name in ds_to_append: + if var_name in ds: + if ds_to_append[var_name].dims != ds[var_name].dims: + raise ValueError( + f"variable {var_name!r} already exists with different " + f"dimension names {ds[var_name].dims} != " + f"{ds_to_append[var_name].dims}, but changing variable " + "dimensions is not supported by to_zarr()." + ) + existing_sizes = { + k: v for k, v in ds[var_name].sizes.items() if k != append_dim + } + new_sizes = { + k: v for k, v in ds_to_append[var_name].sizes.items() if k != append_dim + } + if existing_sizes != new_sizes: raise ValueError( - "variable '{}' already exists, but append_dim " - "was not set".format(data_var) + f"variable {var_name!r} already exists with different " + "dimension sizes: {existing_sizes} != {new_sizes}. " + "to_zarr() only supports changing dimension sizes when " + f"explicitly appending, but append_dim={append_dim!r}." ) - if data_var in encoding.keys(): + if var_name in encoding.keys(): raise ValueError( - "variable '{}' already exists, but encoding was" - "provided".format(data_var) + f"variable {var_name!r} already exists, but encoding was provided" ) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index fa3ee19f542..63c4c956f86 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -241,7 +241,7 @@ def encode_attribute(self, a): """encode one attribute""" return a - def set_dimension(self, d, l): # pragma: no cover + def set_dimension(self, dim, length): # pragma: no cover raise NotImplementedError() def set_attribute(self, k, v): # pragma: no cover diff --git a/xarray/backends/memory.py b/xarray/backends/memory.py index bee6521bce2..17095d09651 100644 --- a/xarray/backends/memory.py +++ b/xarray/backends/memory.py @@ -40,6 +40,6 @@ def set_attribute(self, k, v): # copy to imitate writing to disk. self._attributes[k] = copy.deepcopy(v) - def set_dimension(self, d, l, unlimited_dims=None): + def set_dimension(self, dim, length, unlimited_dims=None): # in this model, dimensions are accounted for in the variables pass diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py index c9c4baf9b01..51d7fce22a0 100644 --- a/xarray/backends/netcdf3.py +++ b/xarray/backends/netcdf3.py @@ -28,7 +28,14 @@ # These data-types aren't supported by netCDF3, so they are automatically # coerced instead as indicated by the "coerce_nc3_dtype" function -_nc3_dtype_coercions = {"int64": "int32", "bool": "int8"} +_nc3_dtype_coercions = { + "int64": "int32", + "uint64": "int32", + "uint32": "int32", + "uint16": "int16", + "uint8": "int8", + "bool": "int8", +} # encode all strings as UTF-8 STRING_ENCODING = "utf-8" @@ -37,12 +44,17 @@ def coerce_nc3_dtype(arr): """Coerce an array to a data type that can be stored in a netCDF-3 file - This function performs the following dtype conversions: - int64 -> int32 - bool -> int8 - - Data is checked for equality, or equivalence (non-NaN values) with - `np.allclose` with the default keyword arguments. + This function performs the dtype conversions as specified by the + ``_nc3_dtype_coercions`` mapping: + int64 -> int32 + uint64 -> int32 + uint32 -> int32 + uint16 -> int16 + uint8 -> int8 + bool -> int8 + + Data is checked for equality, or equivalence (non-NaN values) using the + ``(cast_array == original_array).all()``. """ dtype = str(arr.dtype) if dtype in _nc3_dtype_coercions: diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py index 77beffd09b1..661d5b5c6fc 100644 --- a/xarray/backends/rasterio_.py +++ b/xarray/backends/rasterio_.py @@ -221,14 +221,17 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc vrt = filename filename = vrt.src_dataset.name vrt_params = dict( + src_crs=vrt.src_crs.to_string(), crs=vrt.crs.to_string(), resampling=vrt.resampling, + tolerance=vrt.tolerance, src_nodata=vrt.src_nodata, nodata=vrt.nodata, - tolerance=vrt.tolerance, - transform=vrt.transform, width=vrt.width, height=vrt.height, + src_transform=vrt.src_transform, + transform=vrt.transform, + dtype=vrt.working_dtype, warp_extras=vrt.warp_extras, ) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index c6e4d1b362a..92c5893fb3f 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -358,8 +358,7 @@ def encode_variable(self, variable): def encode_attribute(self, a): return encode_zarr_attr_value(a) - - def get_chunk(name, var, chunks): + def get_chunk(self, name, var, chunks): chunk_spec = dict(zip(var.dims, var.encoding.get("chunks"))) # Coordinate labels aren't chunked @@ -388,15 +387,17 @@ def get_chunk(name, var, chunks): chunk_spec[dim] = chunks[dim] return chunk_spec - def maybe_chunk(name, var, chunks): - chunk_spec = get_chunk(name, var, chunks) + def maybe_chunk(self, name, var, chunks, overwrite_encoded_chunks): + chunk_spec = self.get_chunk(name, var, chunks) if (var.ndim > 0) and (chunk_spec is not None): + from dask.base import tokenize + # does this cause any data to be read? token2 = tokenize(name, var._data) name2 = "zarr-%s" % token2 var = var.chunk(chunk_spec, name=name2, lock=None) - if overwrite_encoded_chunks and var.chunks is not None: + if open_kwargs["overwrite_encoded_chunks"] and var.chunks is not None: var.encoding["chunks"] = tuple(x[0] for x in var.chunk) return var @@ -489,18 +490,23 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No fill_value = attrs.pop("_FillValue", None) if v.encoding == {"_FillValue": None} and fill_value is None: v.encoding = {} - if name in self.ds: + + if self.append_dim is not None and self.append_dim in dims: + # resize existing variable zarr_array = self.ds[name] - if self.append_dim in dims: - # this is the DataArray that has append_dim as a - # dimension - append_axis = dims.index(self.append_dim) - new_shape = list(zarr_array.shape) - new_shape[append_axis] += v.shape[append_axis] - new_region = [slice(None)] * len(new_shape) - new_region[append_axis] = slice(zarr_array.shape[append_axis], None) - zarr_array.resize(new_shape) - writer.add(v.data, zarr_array, region=tuple(new_region)) + append_axis = dims.index(self.append_dim) + + new_region = [slice(None)] * len(dims) + new_region[append_axis] = slice(zarr_array.shape[append_axis], None) + region = tuple(new_region) + + new_shape = list(zarr_array.shape) + new_shape[append_axis] += v.shape[append_axis] + zarr_array.resize(new_shape) + elif name in self.ds: + # override existing variable + zarr_array = self.ds[name] + region = None else: # new variable encoding = extract_zarr_variable_encoding( @@ -518,7 +524,9 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No name, shape=shape, dtype=dtype, fill_value=fill_value, **encoding ) zarr_array.attrs.put(encoded_attrs) - writer.add(v.data, zarr_array) + region = None + + writer.add(v.data, zarr_array, region=region) def close(self): if self._consolidate_on_close: @@ -540,6 +548,7 @@ def open_zarr( drop_variables=None, consolidated=False, overwrite_encoded_chunks=False, + decode_timedelta=None, **kwargs, ): """Load and decode a dataset from a Zarr store. @@ -599,6 +608,11 @@ def open_zarr( consolidated : bool, optional Whether to open the store using zarr's consolidated metadata capability. Only works for stores that have already been consolidated. + decode_timedelta : bool, optional + If True, decode variables and coordinates with time units in + {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of decode_time. Returns ------- diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 6fc28d213dd..2a7eaa99edb 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -578,7 +578,8 @@ def asi8(self): [ _total_microseconds(exact_cftime_datetime_difference(epoch, date)) for date in self.values - ] + ], + dtype=np.int64, ) def _round_via_method(self, freq, method): diff --git a/xarray/coding/frequencies.py b/xarray/coding/frequencies.py new file mode 100644 index 00000000000..86f84ba5fbd --- /dev/null +++ b/xarray/coding/frequencies.py @@ -0,0 +1,272 @@ +"""FrequencyInferer analog for cftime.datetime objects""" +# The infer_freq method and the _CFTimeFrequencyInferer +# subclass defined here were copied and adapted for +# use with cftime.datetime objects based on the source code in +# pandas.tseries.Frequencies._FrequencyInferer + +# For reference, here is a copy of the pandas copyright notice: + +# (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team +# All rights reserved. + +# Copyright (c) 2008-2011 AQR Capital Management, LLC +# All rights reserved. + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: + +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. + +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. + +# * Neither the name of the copyright holder nor the names of any +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. + +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +import pandas as pd + +from ..core.common import _contains_datetime_like_objects +from .cftime_offsets import _MONTH_ABBREVIATIONS +from .cftimeindex import CFTimeIndex + +_ONE_MICRO = 1 +_ONE_MILLI = _ONE_MICRO * 1000 +_ONE_SECOND = _ONE_MILLI * 1000 +_ONE_MINUTE = 60 * _ONE_SECOND +_ONE_HOUR = 60 * _ONE_MINUTE +_ONE_DAY = 24 * _ONE_HOUR + + +def infer_freq(index): + """ + Infer the most likely frequency given the input index. + + Parameters + ---------- + index : CFTimeIndex, DataArray, pd.DatetimeIndex, pd.TimedeltaIndex, pd.Series + If not passed a CFTimeIndex, this simply calls `pandas.infer_freq`. + If passed a Series or a DataArray will use the values of the series (NOT THE INDEX). + + Returns + ------- + str or None + None if no discernible frequency. + + Raises + ------ + TypeError + If the index is not datetime-like. + ValueError + If there are fewer than three values or the index is not 1D. + """ + from xarray.core.dataarray import DataArray + + if isinstance(index, (DataArray, pd.Series)): + if index.ndim != 1: + raise ValueError("'index' must be 1D") + elif not _contains_datetime_like_objects(DataArray(index)): + raise ValueError("'index' must contain datetime-like objects") + dtype = np.asarray(index).dtype + if dtype == "datetime64[ns]": + index = pd.DatetimeIndex(index.values) + elif dtype == "timedelta64[ns]": + index = pd.TimedeltaIndex(index.values) + else: + index = CFTimeIndex(index.values) + + if isinstance(index, CFTimeIndex): + inferer = _CFTimeFrequencyInferer(index) + return inferer.get_freq() + + return pd.infer_freq(index) + + +class _CFTimeFrequencyInferer: # (pd.tseries.frequencies._FrequencyInferer): + def __init__(self, index): + self.index = index + self.values = index.asi8 + + if len(index) < 3: + raise ValueError("Need at least 3 dates to infer frequency") + + self.is_monotonic = ( + self.index.is_monotonic_decreasing or self.index.is_monotonic_increasing + ) + + self._deltas = None + self._year_deltas = None + self._month_deltas = None + + def get_freq(self): + """Find the appropriate frequency string to describe the inferred frequency of self.index + + Adapted from `pandas.tsseries.frequencies._FrequencyInferer.get_freq` for CFTimeIndexes. + + Returns + ------- + str or None + """ + if not self.is_monotonic or not self.index.is_unique: + return None + + delta = self.deltas[0] # Smallest delta + if _is_multiple(delta, _ONE_DAY): + return self._infer_daily_rule() + # There is no possible intraday frequency with a non-unique delta + # Different from pandas: we don't need to manage DST and business offsets in cftime + elif not len(self.deltas) == 1: + return None + + if _is_multiple(delta, _ONE_HOUR): + return _maybe_add_count("H", delta / _ONE_HOUR) + elif _is_multiple(delta, _ONE_MINUTE): + return _maybe_add_count("T", delta / _ONE_MINUTE) + elif _is_multiple(delta, _ONE_SECOND): + return _maybe_add_count("S", delta / _ONE_SECOND) + elif _is_multiple(delta, _ONE_MILLI): + return _maybe_add_count("L", delta / _ONE_MILLI) + else: + return _maybe_add_count("U", delta / _ONE_MICRO) + + def _infer_daily_rule(self): + annual_rule = self._get_annual_rule() + if annual_rule: + nyears = self.year_deltas[0] + month = _MONTH_ABBREVIATIONS[self.index[0].month] + alias = f"{annual_rule}-{month}" + return _maybe_add_count(alias, nyears) + + quartely_rule = self._get_quartely_rule() + if quartely_rule: + nquarters = self.month_deltas[0] / 3 + mod_dict = {0: 12, 2: 11, 1: 10} + month = _MONTH_ABBREVIATIONS[mod_dict[self.index[0].month % 3]] + alias = f"{quartely_rule}-{month}" + return _maybe_add_count(alias, nquarters) + + monthly_rule = self._get_monthly_rule() + if monthly_rule: + return _maybe_add_count(monthly_rule, self.month_deltas[0]) + + if len(self.deltas) == 1: + # Daily as there is no "Weekly" offsets with CFTime + days = self.deltas[0] / _ONE_DAY + return _maybe_add_count("D", days) + + # CFTime has no business freq and no "week of month" (WOM) + return None + + def _get_annual_rule(self): + if len(self.year_deltas) > 1: + return None + + if len(np.unique(self.index.month)) > 1: + return None + + return {"cs": "AS", "ce": "A"}.get(month_anchor_check(self.index)) + + def _get_quartely_rule(self): + if len(self.month_deltas) > 1: + return None + + if not self.month_deltas[0] % 3 == 0: + return None + + return {"cs": "QS", "ce": "Q"}.get(month_anchor_check(self.index)) + + def _get_monthly_rule(self): + if len(self.month_deltas) > 1: + return None + + return {"cs": "MS", "ce": "M"}.get(month_anchor_check(self.index)) + + @property + def deltas(self): + """Sorted unique timedeltas as microseconds.""" + if self._deltas is None: + self._deltas = _unique_deltas(self.values) + return self._deltas + + @property + def year_deltas(self): + """Sorted unique year deltas.""" + if self._year_deltas is None: + self._year_deltas = _unique_deltas(self.index.year) + return self._year_deltas + + @property + def month_deltas(self): + """Sorted unique month deltas.""" + if self._month_deltas is None: + self._month_deltas = _unique_deltas(self.index.year * 12 + self.index.month) + return self._month_deltas + + +def _unique_deltas(arr): + """Sorted unique deltas of numpy array""" + return np.sort(np.unique(np.diff(arr))) + + +def _is_multiple(us, mult: int): + """Whether us is a multiple of mult""" + return us % mult == 0 + + +def _maybe_add_count(base: str, count: float): + """If count is greater than 1, add it to the base offset string""" + if count != 1: + assert count == int(count) + count = int(count) + return f"{count}{base}" + else: + return base + + +def month_anchor_check(dates): + """Return the monthly offset string. + + Return "cs" if all dates are the first days of the month, + "ce" if all dates are the last day of the month, + None otherwise. + + Replicated pandas._libs.tslibs.resolution.month_position_check + but without business offset handling. + """ + calendar_end = True + calendar_start = True + + for date in dates: + if calendar_start: + calendar_start &= date.day == 1 + + if calendar_end: + cal = date.day == date.daysinmonth + if calendar_end: + calendar_end &= cal + elif not calendar_start: + break + + if calendar_end: + return "ce" + elif calendar_start: + return "cs" + else: + return None diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 965ddd8f043..77b2d2c7937 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -80,8 +80,9 @@ def _decode_cf_datetime_dtype(data, units, calendar, use_cftime): "the default calendar" if calendar is None else "calendar %r" % calendar ) msg = ( - "unable to decode time units %r with %s. Try " - "opening your dataset with decode_times=False." % (units, calendar_msg) + f"unable to decode time units {units!r} with {calendar_msg!r}. Try " + "opening your dataset with decode_times=False or installing cftime " + "if it is not installed." ) raise ValueError(msg) else: @@ -155,9 +156,9 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None): if use_cftime is None: try: dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar) - except (OutOfBoundsDatetime, OverflowError): + except (KeyError, OutOfBoundsDatetime, OverflowError): dates = _decode_datetime_with_cftime( - flat_num_dates.astype(np.float), units, calendar + flat_num_dates.astype(float), units, calendar ) if ( @@ -178,7 +179,7 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None): dates = cftime_to_nptime(dates) elif use_cftime: dates = _decode_datetime_with_cftime( - flat_num_dates.astype(np.float), units, calendar + flat_num_dates.astype(float), units, calendar ) else: dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar) diff --git a/xarray/conventions.py b/xarray/conventions.py index df24d0d3d8d..fc0572944f3 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -116,7 +116,7 @@ def maybe_default_fill_value(var): def maybe_encode_bools(var): if ( - (var.dtype == np.bool) + (var.dtype == bool) and ("dtype" not in var.encoding) and ("dtype" not in var.attrs) ): @@ -266,6 +266,7 @@ def decode_cf_variable( decode_endianness=True, stack_char_dim=True, use_cftime=None, + decode_timedelta=None, ): """ Decodes a variable which may hold CF encoded information. @@ -315,6 +316,9 @@ def decode_cf_variable( var = as_variable(var) original_dtype = var.dtype + if decode_timedelta is None: + decode_timedelta = decode_times + if concat_characters: if stack_char_dim: var = strings.CharacterArrayCoder().decode(var, name=name) @@ -328,12 +332,10 @@ def decode_cf_variable( ]: var = coder.decode(var, name=name) + if decode_timedelta: + var = times.CFTimedeltaCoder().decode(var, name=name) if decode_times: - for coder in [ - times.CFTimedeltaCoder(), - times.CFDatetimeCoder(use_cftime=use_cftime), - ]: - var = coder.decode(var, name=name) + var = times.CFDatetimeCoder(use_cftime=use_cftime).decode(var, name=name) dimensions, data, attributes, encoding = variables.unpack_for_decoding(var) # TODO(shoyer): convert everything below to use coders @@ -442,6 +444,7 @@ def decode_cf_variables( decode_coords=True, drop_variables=None, use_cftime=None, + decode_timedelta=None, ): """ Decode several CF encoded variables. @@ -492,6 +495,7 @@ def stackable(dim): decode_times=decode_times, stack_char_dim=stack_char_dim, use_cftime=use_cftime, + decode_timedelta=decode_timedelta, ) if decode_coords: var_attrs = new_vars[k].attrs @@ -518,6 +522,7 @@ def decode_cf( decode_coords=True, drop_variables=None, use_cftime=None, + decode_timedelta=None, ): """Decode the given Dataset or Datastore according to CF conventions into a new Dataset. @@ -552,6 +557,11 @@ def decode_cf( represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. + decode_timedelta : bool, optional + If True, decode variables and coordinates with time units in + {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of decode_time. Returns ------- @@ -583,6 +593,7 @@ def decode_cf( decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, + decode_timedelta=decode_timedelta, ) ds = Dataset(vars, attrs=attrs) ds = ds.set_coords(coord_names.union(extra_coords).intersection(vars)) diff --git a/xarray/convert.py b/xarray/convert.py index 4974a55d8e2..0c86b090f34 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -229,11 +229,11 @@ def _iris_cell_methods_to_str(cell_methods_obj): """ cell_methods = [] for cell_method in cell_methods_obj: - names = "".join([f"{n}: " for n in cell_method.coord_names]) + names = "".join(f"{n}: " for n in cell_method.coord_names) intervals = " ".join( - [f"interval: {interval}" for interval in cell_method.intervals] + f"interval: {interval}" for interval in cell_method.intervals ) - comments = " ".join([f"comment: {comment}" for comment in cell_method.comments]) + comments = " ".join(f"comment: {comment}" for comment in cell_method.comments) extra = " ".join([intervals, comments]).strip() if extra: extra = f" ({extra})" diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 2977596036c..630aaee142f 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -240,12 +240,6 @@ class DatetimeAccessor(Properties): Fields can be accessed through the `.dt` attribute for applicable DataArrays. - Notes - ------ - Note that these fields are not calendar-aware; if your datetimes are encoded - with a non-Gregorian calendar (e.g. a 360-day calendar) using cftime, - then some fields like `dayofyear` may not be accurate. - Examples --------- >>> import xarray as xr diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 1f990457798..58bd7178fa2 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -1,7 +1,5 @@ import itertools -import warnings from collections import Counter -from textwrap import dedent import pandas as pd @@ -762,272 +760,3 @@ def combine_by_coords( join=join, combine_attrs=combine_attrs, ) - - -# Everything beyond here is only needed until the deprecation cycle in #2616 -# is completed - - -_CONCAT_DIM_DEFAULT = "__infer_concat_dim__" - - -def auto_combine( - datasets, - concat_dim="_not_supplied", - compat="no_conflicts", - data_vars="all", - coords="different", - fill_value=dtypes.NA, - join="outer", - from_openmfds=False, -): - """ - Attempt to auto-magically combine the given datasets into one. - - This entire function is deprecated in favour of ``combine_nested`` and - ``combine_by_coords``. - - This method attempts to combine a list of datasets into a single entity by - inspecting metadata and using a combination of concat and merge. - It does not concatenate along more than one dimension or sort data under - any circumstances. It does align coordinates, but different variables on - datasets can cause it to fail under some scenarios. In complex cases, you - may need to clean up your data and use ``concat``/``merge`` explicitly. - ``auto_combine`` works well if you have N years of data and M data - variables, and each combination of a distinct time period and set of data - variables is saved its own dataset. - - Parameters - ---------- - datasets : sequence of xarray.Dataset - Dataset objects to merge. - concat_dim : str or DataArray or Index, optional - Dimension along which to concatenate variables, as used by - :py:func:`xarray.concat`. You only need to provide this argument if - the dimension along which you want to concatenate is not a dimension - in the original datasets, e.g., if you want to stack a collection of - 2D arrays along a third dimension. - By default, xarray attempts to infer this argument by examining - component files. Set ``concat_dim=None`` explicitly to disable - concatenation. - compat : {'identical', 'equals', 'broadcast_equals', - 'no_conflicts', 'override'}, optional - String indicating how to compare variables of the same name for - potential conflicts: - - - 'broadcast_equals': all values must be equal when variables are - broadcast against each other to ensure common dimensions. - - 'equals': all values and dimensions must be the same. - - 'identical': all values, dimensions and attributes must be the - same. - - 'no_conflicts': only values which are not null in both datasets - must be equal. The returned dataset then contains the combination - of all non-null values. - - 'override': skip comparing and pick variable from first dataset - data_vars : {'minimal', 'different', 'all' or list of str}, optional - Details are in the documentation of concat - coords : {'minimal', 'different', 'all' o list of str}, optional - Details are in the documentation of concat - fill_value : scalar, optional - Value to use for newly missing values - join : {'outer', 'inner', 'left', 'right', 'exact'}, optional - String indicating how to combine differing indexes - (excluding concat_dim) in objects - - - 'outer': use the union of object indexes - - 'inner': use the intersection of object indexes - - 'left': use indexes from the first object with each dimension - - 'right': use indexes from the last object with each dimension - - 'exact': instead of aligning, raise `ValueError` when indexes to be - aligned are not equal - - 'override': if indexes are of same size, rewrite indexes to be - those of the first object with that dimension. Indexes for the same - dimension must have the same size in all objects. - - Returns - ------- - combined : xarray.Dataset - - See also - -------- - concat - Dataset.merge - """ - - if not from_openmfds: - basic_msg = dedent( - """\ - In xarray version 0.15 `auto_combine` will be deprecated. See - http://xarray.pydata.org/en/stable/combining.html#combining-multi""" - ) - warnings.warn(basic_msg, FutureWarning, stacklevel=2) - - if concat_dim == "_not_supplied": - concat_dim = _CONCAT_DIM_DEFAULT - message = "" - else: - message = dedent( - """\ - Also `open_mfdataset` will no longer accept a `concat_dim` argument. - To get equivalent behaviour from now on please use the new - `combine_nested` function instead (or the `combine='nested'` option to - `open_mfdataset`).""" - ) - - if _dimension_coords_exist(datasets): - message += dedent( - """\ - The datasets supplied have global dimension coordinates. You may want - to use the new `combine_by_coords` function (or the - `combine='by_coords'` option to `open_mfdataset`) to order the datasets - before concatenation. Alternatively, to continue concatenating based - on the order the datasets are supplied in future, please use the new - `combine_nested` function (or the `combine='nested'` option to - open_mfdataset).""" - ) - else: - message += dedent( - """\ - The datasets supplied do not have global dimension coordinates. In - future, to continue concatenating without supplying dimension - coordinates, please use the new `combine_nested` function (or the - `combine='nested'` option to open_mfdataset.""" - ) - - if _requires_concat_and_merge(datasets): - manual_dims = [concat_dim].append(None) - message += dedent( - """\ - The datasets supplied require both concatenation and merging. From - xarray version 0.15 this will operation will require either using the - new `combine_nested` function (or the `combine='nested'` option to - open_mfdataset), with a nested list structure such that you can combine - along the dimensions {}. Alternatively if your datasets have global - dimension coordinates then you can use the new `combine_by_coords` - function.""".format( - manual_dims - ) - ) - - warnings.warn(message, FutureWarning, stacklevel=2) - - return _old_auto_combine( - datasets, - concat_dim=concat_dim, - compat=compat, - data_vars=data_vars, - coords=coords, - fill_value=fill_value, - join=join, - ) - - -def _dimension_coords_exist(datasets): - """ - Check if the datasets have consistent global dimension coordinates - which would in future be used by `auto_combine` for concatenation ordering. - """ - - # Group by data vars - sorted_datasets = sorted(datasets, key=vars_as_keys) - grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys) - - # Simulates performing the multidimensional combine on each group of data - # variables before merging back together - try: - for vars, datasets_with_same_vars in grouped_by_vars: - _infer_concat_order_from_coords(list(datasets_with_same_vars)) - return True - except ValueError: - # ValueError means datasets don't have global dimension coordinates - # Or something else went wrong in trying to determine them - return False - - -def _requires_concat_and_merge(datasets): - """ - Check if the datasets require the use of both xarray.concat and - xarray.merge, which in future might require the user to use - `manual_combine` instead. - """ - # Group by data vars - sorted_datasets = sorted(datasets, key=vars_as_keys) - grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys) - - return len(list(grouped_by_vars)) > 1 - - -def _old_auto_combine( - datasets, - concat_dim=_CONCAT_DIM_DEFAULT, - compat="no_conflicts", - data_vars="all", - coords="different", - fill_value=dtypes.NA, - join="outer", -): - if concat_dim is not None: - dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim - - sorted_datasets = sorted(datasets, key=vars_as_keys) - grouped = itertools.groupby(sorted_datasets, key=vars_as_keys) - - concatenated = [ - _auto_concat( - list(datasets), - dim=dim, - data_vars=data_vars, - coords=coords, - compat=compat, - fill_value=fill_value, - join=join, - ) - for vars, datasets in grouped - ] - else: - concatenated = datasets - merged = merge(concatenated, compat=compat, fill_value=fill_value, join=join) - return merged - - -def _auto_concat( - datasets, - dim=None, - data_vars="all", - coords="different", - fill_value=dtypes.NA, - join="outer", - compat="no_conflicts", -): - if len(datasets) == 1 and dim is None: - # There is nothing more to combine, so kick out early. - return datasets[0] - else: - if dim is None: - ds0 = datasets[0] - ds1 = datasets[1] - concat_dims = set(ds0.dims) - if ds0.dims != ds1.dims: - dim_tuples = set(ds0.dims.items()) - set(ds1.dims.items()) - concat_dims = {i for i, _ in dim_tuples} - if len(concat_dims) > 1: - concat_dims = {d for d in concat_dims if not ds0[d].equals(ds1[d])} - if len(concat_dims) > 1: - raise ValueError( - "too many different dimensions to " "concatenate: %s" % concat_dims - ) - elif len(concat_dims) == 0: - raise ValueError( - "cannot infer dimension to concatenate: " - "supply the ``concat_dim`` argument " - "explicitly" - ) - (dim,) = concat_dims - return concat( - datasets, - dim=dim, - data_vars=data_vars, - coords=coords, - fill_value=fill_value, - compat=compat, - ) diff --git a/xarray/core/common.py b/xarray/core/common.py index 8f6d57e9f12..f759f4c32dd 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -25,7 +25,7 @@ from .options import OPTIONS, _get_keep_attrs from .pycompat import dask_array_type from .rolling_exp import RollingExp -from .utils import Frozen, either_dict_or_kwargs +from .utils import Frozen, either_dict_or_kwargs, is_scalar # Used as a sentinel value to indicate a all dimensions ALL_DIMS = ... @@ -447,7 +447,7 @@ def assign_coords(self, coords=None, **coords_kwargs): New coordinate can also be attached to an existing dimension: >>> lon_2 = np.array([300, 289, 0, 1]) - >>> da.assign_coords(lon_2=('lon', lon_2)) + >>> da.assign_coords(lon_2=("lon", lon_2)) array([0.28298 , 0.667347, 0.657938, 0.177683]) Coordinates: @@ -456,7 +456,7 @@ def assign_coords(self, coords=None, **coords_kwargs): Note that the same result can also be obtained with a dict e.g. - >>> _ = da.assign_coords({"lon_2": ('lon', lon_2)}) + >>> _ = da.assign_coords({"lon_2": ("lon", lon_2)}) Notes ----- @@ -1397,6 +1397,9 @@ def full_like(other, fill_value, dtype: DTypeLike = None): from .dataset import Dataset from .variable import Variable + if not is_scalar(fill_value): + raise ValueError(f"fill_value must be scalar. Received {fill_value} instead.") + if isinstance(other, Dataset): data_vars = { k: _full_like_variable(v, fill_value, dtype) @@ -1478,7 +1481,7 @@ def zeros_like(other, dtype: DTypeLike = None): * lat (lat) int64 1 2 * lon (lon) int64 0 1 2 - >>> xr.zeros_like(x, dtype=np.float) + >>> xr.zeros_like(x, dtype=float) array([[0., 0., 0.], [0., 0., 0.]]) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 6cf4178b5bf..d8a0c53e817 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -24,9 +24,8 @@ import numpy as np from . import dtypes, duck_array_ops, utils -from .alignment import deep_align +from .alignment import align, deep_align from .merge import merge_coordinates_without_align -from .nanops import dask_array from .options import OPTIONS from .pycompat import dask_array_type from .utils import is_dict_like @@ -1070,6 +1069,200 @@ def earth_mover_distance(first_samples, return apply_array_ufunc(func, *args, dask=dask) +def cov(da_a, da_b, dim=None, ddof=1): + """ + Compute covariance between two DataArray objects along a shared dimension. + + Parameters + ---------- + da_a: DataArray object + Array to compute. + da_b: DataArray object + Array to compute. + dim : str, optional + The dimension along which the covariance will be computed + ddof: int, optional + If ddof=1, covariance is normalized by N-1, giving an unbiased estimate, + else normalization is by N. + + Returns + ------- + covariance: DataArray + + See also + -------- + pandas.Series.cov: corresponding pandas function + xr.corr: respective function to calculate correlation + + Examples + -------- + >>> da_a = DataArray( + ... np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]), + ... dims=("space", "time"), + ... coords=[ + ... ("space", ["IA", "IL", "IN"]), + ... ("time", pd.date_range("2000-01-01", freq="1D", periods=3)), + ... ], + ... ) + >>> da_a + + array([[1. , 2. , 3. ], + [0.1, 0.2, 0.3], + [3.2, 0.6, 1.8]]) + Coordinates: + * space (space) >> da_b = DataArray( + ... np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]), + ... dims=("space", "time"), + ... coords=[ + ... ("space", ["IA", "IL", "IN"]), + ... ("time", pd.date_range("2000-01-01", freq="1D", periods=3)), + ... ], + ... ) + >>> da_b + + array([[ 0.2, 0.4, 0.6], + [15. , 10. , 5. ], + [ 3.2, 0.6, 1.8]]) + Coordinates: + * space (space) >> xr.cov(da_a, da_b) + + array(-3.53055556) + >>> xr.cov(da_a, da_b, dim="time") + + array([ 0.2, -0.5, 1.69333333]) + Coordinates: + * space (space) >> da_a = DataArray( + ... np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]), + ... dims=("space", "time"), + ... coords=[ + ... ("space", ["IA", "IL", "IN"]), + ... ("time", pd.date_range("2000-01-01", freq="1D", periods=3)), + ... ], + ... ) + >>> da_a + + array([[1. , 2. , 3. ], + [0.1, 0.2, 0.3], + [3.2, 0.6, 1.8]]) + Coordinates: + * space (space) >> da_b = DataArray( + ... np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]), + ... dims=("space", "time"), + ... coords=[ + ... ("space", ["IA", "IL", "IN"]), + ... ("time", pd.date_range("2000-01-01", freq="1D", periods=3)), + ... ], + ... ) + >>> da_b + + array([[ 0.2, 0.4, 0.6], + [15. , 10. , 5. ], + [ 3.2, 0.6, 1.8]]) + Coordinates: + * space (space) >> xr.corr(da_a, da_b) + + array(-0.57087777) + >>> xr.corr(da_a, da_b, dim="time") + + array([ 1., -1., 1.]) + Coordinates: + * space (space) ...c' # Note: input_core_dims are always moved to the last position subscripts_list = [ - "..." + "".join([dim_map[d] for d in ds]) for ds in input_core_dims + "..." + "".join(dim_map[d] for d in ds) for ds in input_core_dims ] subscripts = ",".join(subscripts_list) - subscripts += "->..." + "".join([dim_map[d] for d in output_core_dims[0]]) + subscripts += "->..." + "".join(dim_map[d] for d in output_core_dims[0]) join = OPTIONS["arithmetic_join"] # using "inner" emulates `(a * b).sum()` for all joins (except "exact") @@ -1329,7 +1522,7 @@ def polyval(coord, coeffs, degree_dim="degree"): from .dataarray import DataArray from .missing import get_clean_interp_index - x = get_clean_interp_index(coord, coord.name) + x = get_clean_interp_index(coord, coord.name, strict=False) deg_coord = coeffs[degree_dim] @@ -1380,24 +1573,24 @@ def _calc_idxminmax( # This will run argmin or argmax. indx = func(array, dim=dim, axis=None, keep_attrs=keep_attrs, skipna=skipna) - # Get the coordinate we want. - coordarray = array[dim] - # Handle dask arrays. - if isinstance(array, dask_array_type): - res = dask_array.map_blocks(coordarray, indx, dtype=indx.dtype) + if isinstance(array.data, dask_array_type): + import dask.array + + chunks = dict(zip(array.dims, array.chunks)) + dask_coord = dask.array.from_array(array[dim].data, chunks=chunks[dim]) + res = indx.copy(data=dask_coord[indx.data.ravel()].reshape(indx.shape)) + # we need to attach back the dim name + res.name = dim else: - res = coordarray[ - indx, - ] + res = array[dim][(indx,)] + # The dim is gone but we need to remove the corresponding coordinate. + del res.coords[dim] if skipna or (skipna is None and array.dtype.kind in na_dtypes): # Put the NaN values back in after removing them res = res.where(~allna, fill_value) - # The dim is gone but we need to remove the corresponding coordinate. - del res.coords[dim] - # Copy attributes from argmin/argmax, if any res.attrs = indx.attrs diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index ffa05ca64f0..0ce76a5e23a 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1,6 +1,5 @@ import datetime import functools -import warnings from numbers import Number from typing import ( TYPE_CHECKING, @@ -54,7 +53,7 @@ from .formatting import format_item from .indexes import Indexes, default_indexes, propagate_indexes from .indexing import is_fancy_indexer -from .merge import PANDAS_TYPES, _extract_indexes_from_coords +from .merge import PANDAS_TYPES, MergeError, _extract_indexes_from_coords from .options import OPTIONS from .utils import Default, ReprObject, _check_inplace, _default, either_dict_or_kwargs from .variable import ( @@ -261,7 +260,7 @@ class DataArray(AbstractArray, DataWithCoords): _resample_cls = resample.DataArrayResample _weighted_cls = weighted.DataArrayWeighted - dt = property(CombinedDatetimelikeAccessor) + dt = utils.UncachedAccessor(CombinedDatetimelikeAccessor) def __init__( self, @@ -1077,6 +1076,19 @@ def sel( """Return a new DataArray whose data is given by selecting index labels along the specified dimension(s). + In contrast to `DataArray.isel`, indexers for this method should use + labels instead of integers. + + Under the hood, this method is powered by using pandas's powerful Index + objects. This makes label based indexing essentially just as fast as + using integer indexing. + + It also means this method uses pandas's (well documented) logic for + indexing. This means you can use string shortcuts for datetime indexes + (e.g., '2000-01' to select all values in January 2000). It also means + that slices are treated as inclusive of both the start and stop values, + unlike normal Python indexing. + .. warning:: Do not try to assign values when using any of the indexing methods @@ -1089,6 +1101,45 @@ def sel( Assigning values with the chained indexing using ``.sel`` or ``.isel`` fails silently. + Parameters + ---------- + indexers : dict, optional + A dict with keys matching dimensions and values given + by scalars, slices or arrays of tick labels. For dimensions with + multi-index, the indexer may also be a dict-like object with keys + matching index level names. + If DataArrays are passed as indexers, xarray-style indexing will be + carried out. See :ref:`indexing` for the details. + One of indexers or indexers_kwargs must be provided. + method : {None, 'nearest', 'pad'/'ffill', 'backfill'/'bfill'}, optional + Method to use for inexact matches: + + * None (default): only exact matches + * pad / ffill: propagate last valid index value forward + * backfill / bfill: propagate next valid index value backward + * nearest: use nearest valid index value + tolerance : optional + Maximum distance between original and new labels for inexact + matches. The values of the index at the matching locations must + satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + drop : bool, optional + If ``drop=True``, drop coordinates variables in `indexers` instead + of making them scalar. + **indexers_kwargs : {dim: indexer, ...}, optional + The keyword arguments form of ``indexers``. + One of indexers or indexers_kwargs must be provided. + + Returns + ------- + obj : DataArray + A new DataArray with the same contents as this DataArray, except the + data and each dimension is indexed by the appropriate indexers. + If indexer DataArrays have coordinates that do not conflict with + this object, then these coordinates will be attached. + In general, each array's data will be a view of the array's data + in this DataArray, unless vectorized indexing was triggered by using + an array indexer, in which case the data will be a copy. + See Also -------- Dataset.sel @@ -1915,7 +1966,7 @@ def to_unstacked_dataset(self, dim, level=0): # unstacked dataset return Dataset(data_dict) - def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArray": + def transpose(self, *dims: Hashable, transpose_coords: bool = True) -> "DataArray": """Return a new DataArray object with transposed dimensions. Parameters @@ -1923,7 +1974,7 @@ def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArra *dims : hashable, optional By default, reverse the dimensions. Otherwise, reorder the dimensions to this order. - transpose_coords : boolean, optional + transpose_coords : boolean, default True If True, also transpose the coordinates of this DataArray. Returns @@ -1952,15 +2003,6 @@ def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArra coords[name] = coord.variable.transpose(*coord_dims) return self._replace(variable, coords) else: - if transpose_coords is None and any(self[c].ndim > 1 for c in self.coords): - warnings.warn( - "This DataArray contains multi-dimensional " - "coordinates. In the future, these coordinates " - "will be transposed as well unless you specify " - "transpose_coords=False.", - FutureWarning, - stacklevel=2, - ) return self._replace(variable) @property @@ -2671,8 +2713,15 @@ def func(self, other): # don't support automatic alignment with in-place arithmetic. other_coords = getattr(other, "coords", None) other_variable = getattr(other, "variable", other) - with self.coords._merge_inplace(other_coords): - f(self.variable, other_variable) + try: + with self.coords._merge_inplace(other_coords): + f(self.variable, other_variable) + except MergeError as exc: + raise MergeError( + "Automatic alignment is not supported for in-place operations.\n" + "Consider aligning the indices manually or using a not-in-place operation.\n" + "See https://github.com/pydata/xarray/issues/3910 for more explanations." + ) from exc return self return func @@ -2680,24 +2729,7 @@ def func(self, other): def _copy_attrs_from(self, other: Union["DataArray", Dataset, Variable]) -> None: self.attrs = other.attrs - @property - def plot(self) -> _PlotMethods: - """ - Access plotting functions for DataArray's - - >>> d = xr.DataArray([[1, 2], [3, 4]]) - - For convenience just call this directly - - >>> d.plot() - - Or use it as a namespace to use xarray.plot functions as - DataArray methods - - >>> d.plot.imshow() # equivalent to xarray.plot.imshow(d) - - """ - return _PlotMethods(self) + plot = utils.UncachedAccessor(_PlotMethods) def _title_for_slice(self, truncate: int = 50) -> str: """ @@ -3260,57 +3292,107 @@ def map_blocks( func: "Callable[..., T_DSorDA]", args: Sequence[Any] = (), kwargs: Mapping[str, Any] = None, + template: Union["DataArray", "Dataset"] = None, ) -> "T_DSorDA": """ - Apply a function to each chunk of this DataArray. This method is experimental - and its signature may change. + Apply a function to each block of this DataArray. + + .. warning:: + This method is experimental and its signature may change. Parameters ---------- func: callable - User-provided function that accepts a DataArray as its first parameter. The - function will receive a subset of this DataArray, corresponding to one chunk - along each chunked dimension. ``func`` will be executed as - ``func(obj_subset, *args, **kwargs)``. - - The function will be first run on mocked-up data, that looks like this array - but has sizes 0, to determine properties of the returned object such as - dtype, variable names, new dimensions and new indexes (if any). + User-provided function that accepts a DataArray as its first + parameter. The function will receive a subset or 'block' of this DataArray (see below), + corresponding to one chunk along each chunked dimension. ``func`` will be + executed as ``func(subset_dataarray, *subset_args, **kwargs)``. This function must return either a single DataArray or a single Dataset. - This function cannot change size of existing dimensions, or add new chunked - dimensions. + This function cannot add a new chunked dimension. + + obj: DataArray, Dataset + Passed to the function as its first argument, one block at a time. args: Sequence - Passed verbatim to func after unpacking, after the sliced DataArray. xarray - objects, if any, will not be split by chunks. Passing dask collections is - not allowed. + Passed to func after unpacking and subsetting any xarray objects by blocks. + xarray objects in args must be aligned with obj, otherwise an error is raised. kwargs: Mapping Passed verbatim to func after unpacking. xarray objects, if any, will not be - split by chunks. Passing dask collections is not allowed. + subset to blocks. Passing dask collections in kwargs is not allowed. + template: (optional) DataArray, Dataset + xarray object representing the final result after compute is called. If not provided, + the function will be first run on mocked-up data, that looks like ``obj`` but + has sizes 0, to determine properties of the returned object such as dtype, + variable names, attributes, new dimensions and new indexes (if any). + ``template`` must be provided if the function changes the size of existing dimensions. + When provided, ``attrs`` on variables in `template` are copied over to the result. Any + ``attrs`` set by ``func`` will be ignored. + Returns ------- - A single DataArray or Dataset with dask backend, reassembled from the outputs of - the function. + A single DataArray or Dataset with dask backend, reassembled from the outputs of the + function. Notes ----- - This method is designed for when one needs to manipulate a whole xarray object - within each chunk. In the more common case where one can work on numpy arrays, - it is recommended to use apply_ufunc. + This function is designed for when ``func`` needs to manipulate a whole xarray object + subset to each block. In the more common case where ``func`` can work on numpy arrays, it is + recommended to use ``apply_ufunc``. - If none of the variables in this DataArray is backed by dask, calling this - method is equivalent to calling ``func(self, *args, **kwargs)``. + If none of the variables in ``obj`` is backed by dask arrays, calling this function is + equivalent to calling ``func(obj, *args, **kwargs)``. See Also -------- - dask.array.map_blocks, xarray.apply_ufunc, xarray.map_blocks, - xarray.Dataset.map_blocks + dask.array.map_blocks, xarray.apply_ufunc, xarray.Dataset.map_blocks, + xarray.DataArray.map_blocks + + Examples + -------- + + Calculate an anomaly from climatology using ``.groupby()``. Using + ``xr.map_blocks()`` allows for parallel operations with knowledge of ``xarray``, + its indices, and its methods like ``.groupby()``. + + >>> def calculate_anomaly(da, groupby_type="time.month"): + ... gb = da.groupby(groupby_type) + ... clim = gb.mean(dim="time") + ... return gb - clim + >>> time = xr.cftime_range("1990-01", "1992-01", freq="M") + >>> np.random.seed(123) + >>> array = xr.DataArray( + ... np.random.rand(len(time)), dims="time", coords=[time] + ... ).chunk() + >>> array.map_blocks(calculate_anomaly, template=array).compute() + + array([ 0.12894847, 0.11323072, -0.0855964 , -0.09334032, 0.26848862, + 0.12382735, 0.22460641, 0.07650108, -0.07673453, -0.22865714, + -0.19063865, 0.0590131 , -0.12894847, -0.11323072, 0.0855964 , + 0.09334032, -0.26848862, -0.12382735, -0.22460641, -0.07650108, + 0.07673453, 0.22865714, 0.19063865, -0.0590131 ]) + Coordinates: + * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 + + Note that one must explicitly use ``args=[]`` and ``kwargs={}`` to pass arguments + to the function being applied in ``xr.map_blocks()``: + + >>> array.map_blocks( + ... calculate_anomaly, kwargs={"groupby_type": "time.year"}, template=array, + ... ) + + array([ 0.15361741, -0.25671244, -0.31600032, 0.008463 , 0.1766172 , + -0.11974531, 0.43791243, 0.14197797, -0.06191987, -0.15073425, + -0.19967375, 0.18619794, -0.05100474, -0.42989909, -0.09153273, + 0.24841842, -0.30708526, -0.31412523, 0.04197439, 0.0422506 , + 0.14482397, 0.35985481, 0.23487834, 0.12144652]) + Coordinates: + * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 """ from .parallel import map_blocks - return map_blocks(func, self, args, kwargs) + return map_blocks(func, self, args, kwargs, template) def polyfit( self, @@ -3495,17 +3577,18 @@ def pad( Examples -------- - >>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0,1,2])]) - >>> arr.pad(x=(1,2), constant_values=0) + >>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0, 1, 2])]) + >>> arr.pad(x=(1, 2), constant_values=0) array([0, 5, 6, 7, 0, 0]) Coordinates: * x (x) float64 nan 0.0 1.0 2.0 nan nan - >>> da = xr.DataArray([[0,1,2,3], [10,11,12,13]], - dims=["x", "y"], - coords={"x": [0,1], "y": [10, 20 ,30, 40], "z": ("x", [100, 200])} - ) + >>> da = xr.DataArray( + ... [[0, 1, 2, 3], [10, 11, 12, 13]], + ... dims=["x", "y"], + ... coords={"x": [0, 1], "y": [10, 20, 30, 40], "z": ("x", [100, 200])}, + ... ) >>> da.pad(x=1) array([[nan, nan, nan, nan], @@ -3592,8 +3675,9 @@ def idxmin( Examples -------- - >>> array = xr.DataArray([0, 2, 1, 0, -2], dims="x", - ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) + >>> array = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) >>> array.min() array(-2) @@ -3604,13 +3688,15 @@ def idxmin( array('e', dtype='>> array = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], - ... dims=["y", "x"], - ... coords={"y": [-1, 0, 1], - ... "x": np.arange(5.)**2} - ... ) + >>> array = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": np.arange(5.0) ** 2}, + ... ) >>> array.min(dim="x") array([-2., -4., 1.]) @@ -3686,8 +3772,9 @@ def idxmax( Examples -------- - >>> array = xr.DataArray([0, 2, 1, 0, -2], dims="x", - ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) + >>> array = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) >>> array.max() array(2) @@ -3698,13 +3785,15 @@ def idxmax( array('b', dtype='>> array = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], - ... dims=["y", "x"], - ... coords={"y": [-1, 0, 1], - ... "x": np.arange(5.)**2} - ... ) + >>> array = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": np.arange(5.0) ** 2}, + ... ) >>> array.max(dim="x") array([2., 2., 1.]) @@ -3730,9 +3819,212 @@ def idxmax( keep_attrs=keep_attrs, ) + def argmin( + self, + dim: Union[Hashable, Sequence[Hashable]] = None, + axis: int = None, + keep_attrs: bool = None, + skipna: bool = None, + ) -> Union["DataArray", Dict[Hashable, "DataArray"]]: + """Index or indices of the minimum of the DataArray over one or more dimensions. + + If a sequence is passed to 'dim', then result returned as dict of DataArrays, + which can be passed directly to isel(). If a single str is passed to 'dim' then + returns a DataArray with dtype int. + + If there are multiple minima, the indices of the first one found will be + returned. + + Parameters + ---------- + dim : hashable, sequence of hashable or ..., optional + The dimensions over which to find the minimum. By default, finds minimum over + all dimensions - for now returning an int for backward compatibility, but + this is deprecated, in future will return a dict with indices for all + dimensions; to return a dict with all dimensions now, pass '...'. + axis : int, optional + Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments + can be supplied. + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + + Returns + ------- + result : DataArray or dict of DataArray + + See also + -------- + Variable.argmin, DataArray.idxmin + + Examples + -------- + >>> array = xr.DataArray([0, 2, -1, 3], dims="x") + >>> array.min() + + array(-1) + >>> array.argmin() + + array(2) + >>> array.argmin(...) + {'x': + array(2)} + >>> array.isel(array.argmin(...)) + array(-1) + + >>> array = xr.DataArray([[[3, 2, 1], [3, 1, 2], [2, 1, 3]], + ... [[1, 3, 2], [2, -5, 1], [2, 3, 1]]], + ... dims=("x", "y", "z")) + >>> array.min(dim="x") + + array([[ 1, 2, 1], + [ 2, -5, 1], + [ 2, 1, 1]]) + Dimensions without coordinates: y, z + >>> array.argmin(dim="x") + + array([[1, 0, 0], + [1, 1, 1], + [0, 0, 1]]) + Dimensions without coordinates: y, z + >>> array.argmin(dim=["x"]) + {'x': + array([[1, 0, 0], + [1, 1, 1], + [0, 0, 1]]) + Dimensions without coordinates: y, z} + >>> array.min(dim=("x", "z")) + + array([ 1, -5, 1]) + Dimensions without coordinates: y + >>> array.argmin(dim=["x", "z"]) + {'x': + array([0, 1, 0]) + Dimensions without coordinates: y, 'z': + array([2, 1, 1]) + Dimensions without coordinates: y} + >>> array.isel(array.argmin(dim=["x", "z"])) + + array([ 1, -5, 1]) + Dimensions without coordinates: y + """ + result = self.variable.argmin(dim, axis, keep_attrs, skipna) + if isinstance(result, dict): + return {k: self._replace_maybe_drop_dims(v) for k, v in result.items()} + else: + return self._replace_maybe_drop_dims(result) + + def argmax( + self, + dim: Union[Hashable, Sequence[Hashable]] = None, + axis: int = None, + keep_attrs: bool = None, + skipna: bool = None, + ) -> Union["DataArray", Dict[Hashable, "DataArray"]]: + """Index or indices of the maximum of the DataArray over one or more dimensions. + + If a sequence is passed to 'dim', then result returned as dict of DataArrays, + which can be passed directly to isel(). If a single str is passed to 'dim' then + returns a DataArray with dtype int. + + If there are multiple maxima, the indices of the first one found will be + returned. + + Parameters + ---------- + dim : hashable, sequence of hashable or ..., optional + The dimensions over which to find the maximum. By default, finds maximum over + all dimensions - for now returning an int for backward compatibility, but + this is deprecated, in future will return a dict with indices for all + dimensions; to return a dict with all dimensions now, pass '...'. + axis : int, optional + Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments + can be supplied. + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + + Returns + ------- + result : DataArray or dict of DataArray + + See also + -------- + Variable.argmax, DataArray.idxmax + + Examples + -------- + >>> array = xr.DataArray([0, 2, -1, 3], dims="x") + >>> array.max() + + array(3) + >>> array.argmax() + + array(3) + >>> array.argmax(...) + {'x': + array(3)} + >>> array.isel(array.argmax(...)) + + array(3) + + >>> array = xr.DataArray([[[3, 2, 1], [3, 1, 2], [2, 1, 3]], + ... [[1, 3, 2], [2, 5, 1], [2, 3, 1]]], + ... dims=("x", "y", "z")) + >>> array.max(dim="x") + + array([[3, 3, 2], + [3, 5, 2], + [2, 3, 3]]) + Dimensions without coordinates: y, z + >>> array.argmax(dim="x") + + array([[0, 1, 1], + [0, 1, 0], + [0, 1, 0]]) + Dimensions without coordinates: y, z + >>> array.argmax(dim=["x"]) + {'x': + array([[0, 1, 1], + [0, 1, 0], + [0, 1, 0]]) + Dimensions without coordinates: y, z} + >>> array.max(dim=("x", "z")) + + array([3, 5, 3]) + Dimensions without coordinates: y + >>> array.argmax(dim=["x", "z"]) + {'x': + array([0, 1, 0]) + Dimensions without coordinates: y, 'z': + array([0, 1, 2]) + Dimensions without coordinates: y} + >>> array.isel(array.argmax(dim=["x", "z"])) + + array([3, 5, 3]) + Dimensions without coordinates: y + """ + result = self.variable.argmax(dim, axis, keep_attrs, skipna) + if isinstance(result, dict): + return {k: self._replace_maybe_drop_dims(v) for k, v in result.items()} + else: + return self._replace_maybe_drop_dims(result) + # this needs to be at the end, or mypy will confuse with `str` # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names - str = property(StringAccessor) + str = utils.UncachedAccessor(StringAccessor) # priority most be higher than Variable to properly work with binary ufuncs diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d811d54847f..b46b1d6dce0 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -27,6 +27,7 @@ TypeVar, Union, cast, + overload, ) import numpy as np @@ -329,7 +330,7 @@ def split_indexes( else: vars_to_remove.append(d) if not drop: - vars_to_create[str(d) + "_"] = Variable(d, index) + vars_to_create[str(d) + "_"] = Variable(d, index, variables[d].attrs) for d, levs in dim_levels.items(): index = variables[d].to_index() @@ -341,7 +342,7 @@ def split_indexes( if not drop: for lev in levs: idx = index.get_level_values(lev) - vars_to_create[idx.name] = Variable(d, idx) + vars_to_create[idx.name] = Variable(d, idx, variables[d].attrs) new_variables = dict(variables) for v in set(vars_to_remove): @@ -1055,9 +1056,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": structure of the original object, but with the new data. Original object is unaffected. - >>> ds.copy( - ... data={"foo": np.arange(6).reshape(2, 3), "bar": ["a", "b"]} - ... ) + >>> ds.copy(data={"foo": np.arange(6).reshape(2, 3), "bar": ["a", "b"]}) Dimensions: (dim_0: 2, dim_1: 3, x: 2) Coordinates: @@ -1243,13 +1242,25 @@ def loc(self) -> _LocIndexer: """ return _LocIndexer(self) - def __getitem__(self, key: Any) -> "Union[DataArray, Dataset]": + # FIXME https://github.com/python/mypy/issues/7328 + @overload + def __getitem__(self, key: Mapping) -> "Dataset": # type: ignore + ... + + @overload + def __getitem__(self, key: Hashable) -> "DataArray": # type: ignore + ... + + @overload + def __getitem__(self, key: Any) -> "Dataset": + ... + + def __getitem__(self, key): """Access variables or coordinates this dataset as a :py:class:`~xarray.DataArray`. Indexing with a list of names will return a new ``Dataset`` object. """ - # TODO(shoyer): type this properly: https://github.com/python/mypy/issues/7328 if utils.is_dict_like(key): return self.isel(**cast(Mapping, key)) @@ -1537,7 +1548,7 @@ def to_netcdf( ``dask.delayed.Delayed`` object that can be computed later. invalid_netcdf: boolean Only valid along with engine='h5netcdf'. If True, allow writing - hdf5 files which are valid netcdf as described in + hdf5 files which are invalid netcdf as described in https://github.com/shoyer/h5netcdf. Default: False. """ if encoding is None: @@ -1581,7 +1592,7 @@ def to_zarr( mode : {'w', 'w-', 'a', None} Persistence mode: 'w' means create (overwrite if exists); 'w-' means create (fail if exists); - 'a' means append (create if does not exist). + 'a' means override existing variables (create if does not exist). If ``append_dim`` is set, ``mode`` can be omitted as it is internally set to ``'a'``. Otherwise, ``mode`` will default to `w-` if not set. @@ -1600,11 +1611,21 @@ def to_zarr( If True, apply zarr's `consolidate_metadata` function to the store after writing. append_dim: hashable, optional - If set, the dimension on which the data will be appended. + If set, the dimension along which the data will be appended. All + other dimensions on overriden variables must remain the same size. References ---------- https://zarr.readthedocs.io/ + + Notes + ----- + Zarr chunking behavior: + If chunks are found in the encoding argument or attribute + corresponding to any DataArray, those chunks are used. + If a DataArray is a dask array, it is written with those chunks. + If not other chunks are found, Zarr uses its own heuristics to + choose automatic chunk sizes. """ if encoding is None: encoding = {} @@ -1699,7 +1720,10 @@ def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]: def chunk( self, chunks: Union[ - None, Number, Mapping[Hashable, Union[None, Number, Tuple[Number, ...]]] + None, + Number, + str, + Mapping[Hashable, Union[None, Number, str, Tuple[Number, ...]]], ] = None, name_prefix: str = "xarray-", token: str = None, @@ -1717,7 +1741,7 @@ def chunk( Parameters ---------- - chunks : int or mapping, optional + chunks : int, 'auto' or mapping, optional Chunk sizes along each dimension, e.g., ``5`` or ``{'x': 5, 'y': 5}``. name_prefix : str, optional @@ -1734,7 +1758,7 @@ def chunk( """ from dask.base import tokenize - if isinstance(chunks, Number): + if isinstance(chunks, (Number, str)): chunks = dict.fromkeys(self.dims, chunks) if chunks is not None: @@ -1768,7 +1792,7 @@ def maybe_chunk(name, var, chunks): return self._replace(variables) def _validate_indexers( - self, indexers: Mapping[Hashable, Any], missing_dims: str = "raise", + self, indexers: Mapping[Hashable, Any], missing_dims: str = "raise" ) -> Iterator[Tuple[Hashable, Union[int, slice, np.ndarray, Variable]]]: """ Here we make sure + indexer has a valid keys @@ -4526,7 +4550,7 @@ def _set_sparse_data_from_dataframe( idx = dataframe.index if isinstance(idx, pd.MultiIndex): coords = np.stack([np.asarray(code) for code in idx.codes], axis=0) - is_sorted = idx.is_lexsorted + is_sorted = idx.is_lexsorted() shape = tuple(lev.size for lev in idx.levels) else: coords = np.arange(idx.size).reshape(1, -1) @@ -4598,6 +4622,7 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Datas See also -------- xarray.DataArray.from_series + pandas.DataFrame.to_xarray """ # TODO: Add an option to remove dimensions along which the variables # are constant, to enable consistent serialization to/from a dataframe, @@ -5551,16 +5576,7 @@ def real(self): def imag(self): return self._unary_op(lambda x: x.imag, keep_attrs=True)(self) - @property - def plot(self): - """ - Access plotting functions for Datasets. - Use it as a namespace to use xarray.plot functions as Dataset methods - - >>> ds.plot.scatter(...) # equivalent to xarray.plot.scatter(ds,...) - - """ - return _Dataset_PlotMethods(self) + plot = utils.UncachedAccessor(_Dataset_PlotMethods) def filter_by_attrs(self, **kwargs): """Returns a ``Dataset`` with variables that match specific conditions. @@ -5709,57 +5725,108 @@ def map_blocks( func: "Callable[..., T_DSorDA]", args: Sequence[Any] = (), kwargs: Mapping[str, Any] = None, + template: Union["DataArray", "Dataset"] = None, ) -> "T_DSorDA": """ - Apply a function to each chunk of this Dataset. This method is experimental and - its signature may change. + Apply a function to each block of this Dataset. + + .. warning:: + This method is experimental and its signature may change. Parameters ---------- func: callable - User-provided function that accepts a Dataset as its first parameter. The - function will receive a subset of this Dataset, corresponding to one chunk - along each chunked dimension. ``func`` will be executed as - ``func(obj_subset, *args, **kwargs)``. - - The function will be first run on mocked-up data, that looks like this - Dataset but has sizes 0, to determine properties of the returned object such - as dtype, variable names, new dimensions and new indexes (if any). + User-provided function that accepts a Dataset as its first + parameter. The function will receive a subset or 'block' of this Dataset (see below), + corresponding to one chunk along each chunked dimension. ``func`` will be + executed as ``func(subset_dataset, *subset_args, **kwargs)``. This function must return either a single DataArray or a single Dataset. - This function cannot change size of existing dimensions, or add new chunked - dimensions. + This function cannot add a new chunked dimension. + + obj: DataArray, Dataset + Passed to the function as its first argument, one block at a time. args: Sequence - Passed verbatim to func after unpacking, after the sliced DataArray. xarray - objects, if any, will not be split by chunks. Passing dask collections is - not allowed. + Passed to func after unpacking and subsetting any xarray objects by blocks. + xarray objects in args must be aligned with obj, otherwise an error is raised. kwargs: Mapping Passed verbatim to func after unpacking. xarray objects, if any, will not be - split by chunks. Passing dask collections is not allowed. + subset to blocks. Passing dask collections in kwargs is not allowed. + template: (optional) DataArray, Dataset + xarray object representing the final result after compute is called. If not provided, + the function will be first run on mocked-up data, that looks like ``obj`` but + has sizes 0, to determine properties of the returned object such as dtype, + variable names, attributes, new dimensions and new indexes (if any). + ``template`` must be provided if the function changes the size of existing dimensions. + When provided, ``attrs`` on variables in `template` are copied over to the result. Any + ``attrs`` set by ``func`` will be ignored. + Returns ------- - A single DataArray or Dataset with dask backend, reassembled from the outputs of - the function. + A single DataArray or Dataset with dask backend, reassembled from the outputs of the + function. Notes ----- - This method is designed for when one needs to manipulate a whole xarray object - within each chunk. In the more common case where one can work on numpy arrays, - it is recommended to use apply_ufunc. + This function is designed for when ``func`` needs to manipulate a whole xarray object + subset to each block. In the more common case where ``func`` can work on numpy arrays, it is + recommended to use ``apply_ufunc``. - If none of the variables in this Dataset is backed by dask, calling this method - is equivalent to calling ``func(self, *args, **kwargs)``. + If none of the variables in ``obj`` is backed by dask arrays, calling this function is + equivalent to calling ``func(obj, *args, **kwargs)``. See Also -------- - dask.array.map_blocks, xarray.apply_ufunc, xarray.map_blocks, + dask.array.map_blocks, xarray.apply_ufunc, xarray.Dataset.map_blocks, xarray.DataArray.map_blocks + + Examples + -------- + + Calculate an anomaly from climatology using ``.groupby()``. Using + ``xr.map_blocks()`` allows for parallel operations with knowledge of ``xarray``, + its indices, and its methods like ``.groupby()``. + + >>> def calculate_anomaly(da, groupby_type="time.month"): + ... gb = da.groupby(groupby_type) + ... clim = gb.mean(dim="time") + ... return gb - clim + >>> time = xr.cftime_range("1990-01", "1992-01", freq="M") + >>> np.random.seed(123) + >>> array = xr.DataArray( + ... np.random.rand(len(time)), dims="time", coords=[time] + ... ).chunk() + >>> ds = xr.Dataset({"a": array}) + >>> ds.map_blocks(calculate_anomaly, template=ds).compute() + + array([ 0.12894847, 0.11323072, -0.0855964 , -0.09334032, 0.26848862, + 0.12382735, 0.22460641, 0.07650108, -0.07673453, -0.22865714, + -0.19063865, 0.0590131 , -0.12894847, -0.11323072, 0.0855964 , + 0.09334032, -0.26848862, -0.12382735, -0.22460641, -0.07650108, + 0.07673453, 0.22865714, 0.19063865, -0.0590131 ]) + Coordinates: + * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 + + Note that one must explicitly use ``args=[]`` and ``kwargs={}`` to pass arguments + to the function being applied in ``xr.map_blocks()``: + + >>> ds.map_blocks( + ... calculate_anomaly, kwargs={"groupby_type": "time.year"}, template=ds, + ... ) + + array([ 0.15361741, -0.25671244, -0.31600032, 0.008463 , 0.1766172 , + -0.11974531, 0.43791243, 0.14197797, -0.06191987, -0.15073425, + -0.19967375, 0.18619794, -0.05100474, -0.42989909, -0.09153273, + 0.24841842, -0.30708526, -0.31412523, 0.04197439, 0.0422506 , + 0.14482397, 0.35985481, 0.23487834, 0.12144652]) + Coordinates: + * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 """ from .parallel import map_blocks - return map_blocks(func, self, args, kwargs) + return map_blocks(func, self, args, kwargs, template) def polyfit( self, @@ -5823,7 +5890,7 @@ def polyfit( variables = {} skipna_da = skipna - x = get_clean_interp_index(self, dim) + x = get_clean_interp_index(self, dim, strict=False) xname = "{}_".format(self[dim].name) order = int(deg) + 1 lhs = np.vander(x, order) @@ -5934,7 +6001,7 @@ def polyfit( "The number of data points must exceed order to scale the covariance matrix." ) fac = residuals / (x.shape[0] - order) - covariance = xr.DataArray(Vbase, dims=("cov_i", "cov_j"),) * fac + covariance = xr.DataArray(Vbase, dims=("cov_i", "cov_j")) * fac variables[name + "polyfit_covariance"] = covariance return Dataset(data_vars=variables, attrs=self.attrs.copy()) @@ -6060,8 +6127,8 @@ def pad( Examples -------- - >>> ds = xr.Dataset({'foo': ('x', range(5))}) - >>> ds.pad(x=(1,2)) + >>> ds = xr.Dataset({"foo": ("x", range(5))}) + >>> ds.pad(x=(1, 2)) Dimensions: (x: 8) Dimensions without coordinates: x @@ -6155,17 +6222,20 @@ def idxmin( Examples -------- - >>> array1 = xr.DataArray([0, 2, 1, 0, -2], dims="x", - ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) - >>> array2 = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], - ... dims=["y", "x"], - ... coords={"y": [-1, 0, 1], - ... "x": ['a', 'b', 'c', 'd', 'e']} - ... ) - >>> ds = xr.Dataset({'int': array1, 'float': array2}) - >>> ds.min(dim='x') + >>> array1 = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) + >>> array2 = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": ["a", "b", "c", "d", "e"]}, + ... ) + >>> ds = xr.Dataset({"int": array1, "float": array2}) + >>> ds.min(dim="x") Dimensions: (y: 3) Coordinates: @@ -6173,7 +6243,7 @@ def idxmin( Data variables: int int64 -2 float (y) float64 -2.0 -4.0 1.0 - >>> ds.argmin(dim='x') + >>> ds.argmin(dim="x") Dimensions: (y: 3) Coordinates: @@ -6181,7 +6251,7 @@ def idxmin( Data variables: int int64 4 float (y) int64 4 0 2 - >>> ds.idxmin(dim='x') + >>> ds.idxmin(dim="x") Dimensions: (y: 3) Coordinates: @@ -6197,7 +6267,7 @@ def idxmin( skipna=skipna, fill_value=fill_value, keep_attrs=keep_attrs, - ), + ) ) def idxmax( @@ -6250,17 +6320,20 @@ def idxmax( Examples -------- - >>> array1 = xr.DataArray([0, 2, 1, 0, -2], dims="x", - ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) - >>> array2 = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], - ... dims=["y", "x"], - ... coords={"y": [-1, 0, 1], - ... "x": ['a', 'b', 'c', 'd', 'e']} - ... ) - >>> ds = xr.Dataset({'int': array1, 'float': array2}) - >>> ds.max(dim='x') + >>> array1 = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) + >>> array2 = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": ["a", "b", "c", "d", "e"]}, + ... ) + >>> ds = xr.Dataset({"int": array1, "float": array2}) + >>> ds.max(dim="x") Dimensions: (y: 3) Coordinates: @@ -6268,7 +6341,7 @@ def idxmax( Data variables: int int64 2 float (y) float64 2.0 2.0 1.0 - >>> ds.argmax(dim='x') + >>> ds.argmax(dim="x") Dimensions: (y: 3) Coordinates: @@ -6276,7 +6349,7 @@ def idxmax( Data variables: int int64 1 float (y) int64 0 2 2 - >>> ds.idxmax(dim='x') + >>> ds.idxmax(dim="x") Dimensions: (y: 3) Coordinates: @@ -6292,8 +6365,134 @@ def idxmax( skipna=skipna, fill_value=fill_value, keep_attrs=keep_attrs, - ), + ) ) + def argmin(self, dim=None, axis=None, **kwargs): + """Indices of the minima of the member variables. + + If there are multiple minima, the indices of the first one found will be + returned. + + Parameters + ---------- + dim : str, optional + The dimension over which to find the minimum. By default, finds minimum over + all dimensions - for now returning an int for backward compatibility, but + this is deprecated, in future will be an error, since DataArray.argmin will + return a dict with indices for all dimensions, which does not make sense for + a Dataset. + axis : int, optional + Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments + can be supplied. + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + + Returns + ------- + result : Dataset + + See also + -------- + DataArray.argmin + + """ + if dim is None and axis is None: + warnings.warn( + "Once the behaviour of DataArray.argmin() and Variable.argmin() with " + "neither dim nor axis argument changes to return a dict of indices of " + "each dimension, for consistency it will be an error to call " + "Dataset.argmin() with no argument, since we don't return a dict of " + "Datasets.", + DeprecationWarning, + stacklevel=2, + ) + if ( + dim is None + or axis is not None + or (not isinstance(dim, Sequence) and dim is not ...) + or isinstance(dim, str) + ): + # Return int index if single dimension is passed, and is not part of a + # sequence + argmin_func = getattr(duck_array_ops, "argmin") + return self.reduce(argmin_func, dim=dim, axis=axis, **kwargs) + else: + raise ValueError( + "When dim is a sequence or ..., DataArray.argmin() returns a dict. " + "dicts cannot be contained in a Dataset, so cannot call " + "Dataset.argmin() with a sequence or ... for dim" + ) + + def argmax(self, dim=None, axis=None, **kwargs): + """Indices of the maxima of the member variables. + + If there are multiple maxima, the indices of the first one found will be + returned. + + Parameters + ---------- + dim : str, optional + The dimension over which to find the maximum. By default, finds maximum over + all dimensions - for now returning an int for backward compatibility, but + this is deprecated, in future will be an error, since DataArray.argmax will + return a dict with indices for all dimensions, which does not make sense for + a Dataset. + axis : int, optional + Axis over which to apply `argmax`. Only one of the 'dim' and 'axis' arguments + can be supplied. + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + + Returns + ------- + result : Dataset + + See also + -------- + DataArray.argmax + + """ + if dim is None and axis is None: + warnings.warn( + "Once the behaviour of DataArray.argmax() and Variable.argmax() with " + "neither dim nor axis argument changes to return a dict of indices of " + "each dimension, for consistency it will be an error to call " + "Dataset.argmax() with no argument, since we don't return a dict of " + "Datasets.", + DeprecationWarning, + stacklevel=2, + ) + if ( + dim is None + or axis is not None + or (not isinstance(dim, Sequence) and dim is not ...) + or isinstance(dim, str) + ): + # Return int index if single dimension is passed, and is not part of a + # sequence + argmax_func = getattr(duck_array_ops, "argmax") + return self.reduce(argmax_func, dim=dim, axis=axis, **kwargs) + else: + raise ValueError( + "When dim is a sequence or ..., DataArray.argmin() returns a dict. " + "dicts cannot be contained in a Dataset, so cannot call " + "Dataset.argmin() with a sequence or ... for dim" + ) + ops.inject_all_ops_and_reduce_methods(Dataset, array_only=False) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 1340b456cf2..df579d23544 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -6,6 +6,7 @@ import contextlib import inspect import warnings +from distutils.version import LooseVersion from functools import partial import numpy as np @@ -20,6 +21,14 @@ except ImportError: dask_array = None # type: ignore +# TODO: remove after we stop supporting dask < 2.9.1 +try: + import dask + + dask_version = dask.__version__ +except ImportError: + dask_version = None + def _dask_or_eager_func( name, @@ -199,8 +208,19 @@ def allclose_or_equiv(arr1, arr2, rtol=1e-5, atol=1e-8): """ arr1 = asarray(arr1) arr2 = asarray(arr2) + lazy_equiv = lazy_array_equiv(arr1, arr2) if lazy_equiv is None: + # TODO: remove after we require dask >= 2.9.1 + sufficient_dask_version = ( + dask_version is not None and LooseVersion(dask_version) >= "2.9.1" + ) + if not sufficient_dask_version and any( + isinstance(arr, dask_array_type) for arr in [arr1, arr2] + ): + arr1 = np.array(arr1) + arr2 = np.array(arr2) + return bool(isclose(arr1, arr2, rtol=rtol, atol=atol, equal_nan=True).all()) else: return lazy_equiv @@ -339,6 +359,7 @@ def f(values, axis=None, skipna=None, **kwargs): cumprod_1d.numeric_only = True cumsum_1d = _create_nan_agg_method("cumsum") cumsum_1d.numeric_only = True +unravel_index = _dask_or_eager_func("unravel_index") _mean = _create_nan_agg_method("mean") diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 534d253ecc8..28eaae5f05b 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -3,7 +3,7 @@ import contextlib import functools from datetime import datetime, timedelta -from itertools import zip_longest +from itertools import chain, zip_longest from typing import Hashable import numpy as np @@ -140,7 +140,7 @@ def format_item(x, timedelta_format=None, quote_strings=True): return format_timedelta(x, timedelta_format=timedelta_format) elif isinstance(x, (str, bytes)): return repr(x) if quote_strings else x - elif isinstance(x, (float, np.float)): + elif isinstance(x, (float, np.float_)): return f"{x:.4}" else: return str(x) @@ -298,12 +298,10 @@ def _summarize_coord_multiindex(coord, col_width, marker): def _summarize_coord_levels(coord, col_width, marker="-"): return "\n".join( - [ - summarize_variable( - lname, coord.get_level_variable(lname), col_width, marker=marker - ) - for lname in coord.level_names - ] + summarize_variable( + lname, coord.get_level_variable(lname), col_width, marker=marker + ) + for lname in coord.level_names ) @@ -424,6 +422,17 @@ def set_numpy_options(*args, **kwargs): np.set_printoptions(**original) +def limit_lines(string: str, *, limit: int): + """ + If the string is more lines than the limit, + this returns the middle lines replaced by an ellipsis + """ + lines = string.splitlines() + if len(lines) > limit: + string = "\n".join(chain(lines[: limit // 2], ["..."], lines[-limit // 2 :])) + return string + + def short_numpy_repr(array): array = np.asarray(array) @@ -449,7 +458,7 @@ def short_data_repr(array): elif hasattr(internal_data, "__array_function__") or isinstance( internal_data, dask_array_type ): - return repr(array.data) + return limit_lines(repr(array.data), limit=40) elif array._in_memory or array.size < 1e5: return short_numpy_repr(array) else: @@ -541,7 +550,10 @@ def extra_items_repr(extra_keys, mapping, ab_side): for k in a_keys & b_keys: try: # compare xarray variable - compatible = getattr(a_mapping[k], compat)(b_mapping[k]) + if not callable(compat): + compatible = getattr(a_mapping[k], compat)(b_mapping[k]) + else: + compatible = compat(a_mapping[k], b_mapping[k]) is_variable = True except AttributeError: # compare attribute value @@ -562,7 +574,7 @@ def extra_items_repr(extra_keys, mapping, ab_side): for m in (a_mapping, b_mapping): attr_s = "\n".join( - [summarize_attr(ak, av) for ak, av in m[k].attrs.items()] + summarize_attr(ak, av) for ak, av in m[k].attrs.items() ) attrs_summary.append(attr_s) @@ -598,8 +610,13 @@ def extra_items_repr(extra_keys, mapping, ab_side): def _compat_to_str(compat): + if callable(compat): + compat = compat.__name__ + if compat == "equals": return "equal" + elif compat == "allclose": + return "close" else: return compat @@ -613,8 +630,12 @@ def diff_array_repr(a, b, compat): ] summary.append(diff_dim_summary(a, b)) + if callable(compat): + equiv = compat + else: + equiv = array_equiv - if not array_equiv(a.data, b.data): + if not equiv(a.data, b.data): temp = [wrap_indent(short_numpy_repr(obj), start=" ") for obj in (a, b)] diff_data_repr = [ ab_side + "\n" + ab_data_repr diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index 8678a58b381..400ef61502e 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -20,7 +20,9 @@ def short_data_repr_html(array): internal_data = getattr(array, "variable", array)._data if hasattr(internal_data, "_repr_html_"): return internal_data._repr_html_() - return escape(short_data_repr(array)) + else: + text = escape(short_data_repr(array)) + return f"
{text}
" def format_dims(dims, coord_names): @@ -123,7 +125,7 @@ def summarize_variable(name, var, is_index=False, dtype=None, preview=None): f"" f"
{attrs_ul}
" - f"
{data_repr}
" + f"
{data_repr}
" ) @@ -182,8 +184,9 @@ def dim_section(obj): def array_section(obj): # "unique" id to expand/collapse the section data_id = "section-" + str(uuid.uuid4()) - collapsed = "" - preview = escape(inline_variable_array_repr(obj.variable, max_width=70)) + collapsed = "checked" + variable = getattr(obj, "variable", obj) + preview = escape(inline_variable_array_repr(variable, max_width=70)) data_repr = short_data_repr_html(obj) data_icon = _icon("icon-database") @@ -192,7 +195,7 @@ def array_section(obj): f"" f"" f"
{preview}
" - f"
{data_repr}
" + f"
{data_repr}
" "" ) @@ -221,14 +224,20 @@ def array_section(obj): ) -def _obj_repr(header_components, sections): +def _obj_repr(obj, header_components, sections): + """Return HTML repr of an xarray object. + + If CSS is not injected (untrusted notebook), fallback to the plain text repr. + + """ header = f"
{''.join(h for h in header_components)}
" sections = "".join(f"
  • {s}
  • " for s in sections) return ( "
    " f"{ICONS_SVG}" - "
    " + f"
    {escape(repr(obj))}
    " + "" @@ -256,7 +265,7 @@ def array_repr(arr): sections.append(attr_section(arr.attrs)) - return _obj_repr(header_components, sections) + return _obj_repr(arr, header_components, sections) def dataset_repr(ds): @@ -271,4 +280,4 @@ def dataset_repr(ds): attr_section(ds.attrs), ] - return _obj_repr(header_components, sections) + return _obj_repr(ds, header_components, sections) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 5a5f4c0d296..04c0fabae6a 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -29,7 +29,7 @@ def check_reduce_dims(reduce_dims, dimensions): if reduce_dims is not ...: if is_scalar(reduce_dims): reduce_dims = [reduce_dims] - if any([dim not in dimensions for dim in reduce_dims]): + if any(dim not in dimensions for dim in reduce_dims): raise ValueError( "cannot reduce over dimensions %r. expected either '...' to reduce over all dimensions or one or more of %r." % (reduce_dims, dimensions) @@ -272,8 +272,8 @@ def __init__( squeeze=False, grouper=None, bins=None, - restore_coord_dims=None, - cut_kwargs={}, + restore_coord_dims=True, + cut_kwargs=None, ): """Create a GroupBy object @@ -292,13 +292,15 @@ def __init__( bins : array-like, optional If `bins` is specified, the groups will be discretized into the specified bins by `pandas.cut`. - restore_coord_dims : bool, optional + restore_coord_dims : bool, default True If True, also restore the dimension order of multi-dimensional coordinates. cut_kwargs : dict, optional Extra keyword arguments to pass to `pandas.cut` """ + if cut_kwargs is None: + cut_kwargs = {} from .dataarray import DataArray if grouper is not None and bins is not None: @@ -319,7 +321,7 @@ def __init__( group = _DummyGroup(obj, group.name, group.coords) if getattr(group, "name", None) is None: - raise ValueError("`group` must have a name") + group.name = "group" group, obj, stacked_dim, inserted_dims = _ensure_1d(group, obj) (group_dim,) = group.dims @@ -387,21 +389,6 @@ def __init__( "Failed to group data. Are you grouping by a variable that is all NaN?" ) - if ( - isinstance(obj, DataArray) - and restore_coord_dims is None - and any(obj[c].ndim > 1 for c in obj.coords) - ): - warnings.warn( - "This DataArray contains multi-dimensional " - "coordinates. In the future, the dimension order " - "of these coordinates will be restored as well " - "unless you specify restore_coord_dims=False.", - FutureWarning, - stacklevel=2, - ) - restore_coord_dims = False - # specification for the groupby operation self._obj = obj self._group = group diff --git a/xarray/core/merge.py b/xarray/core/merge.py index fea94246471..35b77d700a0 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -841,7 +841,7 @@ def merge( from .dataarray import DataArray from .dataset import Dataset - dict_like_objects = list() + dict_like_objects = [] for obj in objects: if not isinstance(obj, (DataArray, Dataset, dict)): raise TypeError( diff --git a/xarray/core/missing.py b/xarray/core/missing.py index f973b4a5468..59d4f777c73 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -208,7 +208,9 @@ def _apply_over_vars_with_dim(func, self, dim=None, **kwargs): return ds -def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] = True): +def get_clean_interp_index( + arr, dim: Hashable, use_coordinate: Union[str, bool] = True, strict: bool = True +): """Return index to use for x values in interpolation or curve fitting. Parameters @@ -221,6 +223,8 @@ def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] If use_coordinate is True, the coordinate that shares the name of the dimension along which interpolation is being performed will be used as the x values. If False, the x values are set as an equally spaced sequence. + strict : bool + Whether to raise errors if the index is either non-unique or non-monotonic (default). Returns ------- @@ -257,11 +261,12 @@ def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] if isinstance(index, pd.MultiIndex): index.name = dim - if not index.is_monotonic: - raise ValueError(f"Index {index.name!r} must be monotonically increasing") + if strict: + if not index.is_monotonic: + raise ValueError(f"Index {index.name!r} must be monotonically increasing") - if not index.is_unique: - raise ValueError(f"Index {index.name!r} has duplicate values") + if not index.is_unique: + raise ValueError(f"Index {index.name!r} has duplicate values") # Special case for non-standard calendar indexes # Numerical datetime values are defined with respect to 1970-01-01T00:00:00 in units of nanoseconds @@ -282,7 +287,7 @@ def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] # xarray/numpy raise a ValueError raise TypeError( f"Index {index.name!r} must be castable to float64 to support " - f"interpolation, got {type(index).__name__}." + f"interpolation or curve fitting, got {type(index).__name__}." ) return index @@ -619,6 +624,19 @@ def interp(var, indexes_coords, method, **kwargs): # default behavior kwargs["bounds_error"] = kwargs.get("bounds_error", False) + # check if the interpolation can be done in orthogonal manner + if ( + len(indexes_coords) > 1 + and method in ["linear", "nearest"] + and all(dest[1].ndim == 1 for dest in indexes_coords.values()) + and len(set([d[1].dims[0] for d in indexes_coords.values()])) + == len(indexes_coords) + ): + # interpolate sequentially + for dim, dest in indexes_coords.items(): + var = interp(var, {dim: dest}, method, **kwargs) + return var + # target dimensions dims = list(indexes_coords) x, new_x = zip(*[indexes_coords[d] for d in dims]) @@ -659,7 +677,7 @@ def interp_func(var, x, new_x, method, kwargs): New coordinates. Should not contain NaN. method: string {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'} for - 1-dimensional itnterpolation. + 1-dimensional interpolation. {'linear', 'nearest'} for multidimensional interpolation **kwargs: Optional keyword arguments to be passed to scipy.interpolator diff --git a/xarray/core/ops.py b/xarray/core/ops.py index b789f93b4f1..d4aeea37aad 100644 --- a/xarray/core/ops.py +++ b/xarray/core/ops.py @@ -47,8 +47,6 @@ # methods which remove an axis REDUCE_METHODS = ["all", "any"] NAN_REDUCE_METHODS = [ - "argmax", - "argmin", "max", "min", "mean", diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index 6f1668f698f..86044e72dd2 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -16,6 +16,8 @@ DefaultDict, Dict, Hashable, + Iterable, + List, Mapping, Sequence, Tuple, @@ -25,12 +27,50 @@ import numpy as np +from .alignment import align from .dataarray import DataArray from .dataset import Dataset T_DSorDA = TypeVar("T_DSorDA", DataArray, Dataset) +def unzip(iterable): + return zip(*iterable) + + +def assert_chunks_compatible(a: Dataset, b: Dataset): + a = a.unify_chunks() + b = b.unify_chunks() + + for dim in set(a.chunks).intersection(set(b.chunks)): + if a.chunks[dim] != b.chunks[dim]: + raise ValueError(f"Chunk sizes along dimension {dim!r} are not equal.") + + +def check_result_variables( + result: Union[DataArray, Dataset], expected: Mapping[str, Any], kind: str +): + + if kind == "coords": + nice_str = "coordinate" + elif kind == "data_vars": + nice_str = "data" + + # check that coords and data variables are as expected + missing = expected[kind] - set(getattr(result, kind)) + if missing: + raise ValueError( + "Result from applying user function does not contain " + f"{nice_str} variables {missing}." + ) + extra = set(getattr(result, kind)) - expected[kind] + if extra: + raise ValueError( + "Result from applying user function has unexpected " + f"{nice_str} variables {extra}." + ) + + def dataset_to_dataarray(obj: Dataset) -> DataArray: if not isinstance(obj, Dataset): raise TypeError("Expected Dataset, got %s" % type(obj)) @@ -43,6 +83,17 @@ def dataset_to_dataarray(obj: Dataset) -> DataArray: return next(iter(obj.data_vars.values())) +def dataarray_to_dataset(obj: DataArray) -> Dataset: + # only using _to_temp_dataset would break + # func = lambda x: x.to_dataset() + # since that relies on preserving name. + if obj.name is None: + dataset = obj._to_temp_dataset() + else: + dataset = obj.to_dataset() + return dataset + + def make_meta(obj): """If obj is a DataArray or Dataset, return a new object of the same type and with the same variables and dtypes, but where all variables have size 0 and numpy @@ -80,7 +131,8 @@ def infer_template( template = func(*meta_args, **kwargs) except Exception as e: raise Exception( - "Cannot infer object returned from running user provided function." + "Cannot infer object returned from running user provided function. " + "Please supply the 'template' kwarg to map_blocks." ) from e if not isinstance(template, (Dataset, DataArray)): @@ -102,39 +154,54 @@ def make_dict(x: Union[DataArray, Dataset]) -> Dict[Hashable, Any]: return {k: v.data for k, v in x.variables.items()} +def _get_chunk_slicer(dim: Hashable, chunk_index: Mapping, chunk_bounds: Mapping): + if dim in chunk_index: + which_chunk = chunk_index[dim] + return slice(chunk_bounds[dim][which_chunk], chunk_bounds[dim][which_chunk + 1]) + return slice(None) + + def map_blocks( func: Callable[..., T_DSorDA], obj: Union[DataArray, Dataset], args: Sequence[Any] = (), kwargs: Mapping[str, Any] = None, + template: Union[DataArray, Dataset] = None, ) -> T_DSorDA: - """Apply a function to each chunk of a DataArray or Dataset. This function is - experimental and its signature may change. + """Apply a function to each block of a DataArray or Dataset. + + .. warning:: + This function is experimental and its signature may change. Parameters ---------- func: callable User-provided function that accepts a DataArray or Dataset as its first - parameter. The function will receive a subset of 'obj' (see below), + parameter ``obj``. The function will receive a subset or 'block' of ``obj`` (see below), corresponding to one chunk along each chunked dimension. ``func`` will be - executed as ``func(obj_subset, *args, **kwargs)``. - - The function will be first run on mocked-up data, that looks like 'obj' but - has sizes 0, to determine properties of the returned object such as dtype, - variable names, new dimensions and new indexes (if any). + executed as ``func(subset_obj, *subset_args, **kwargs)``. This function must return either a single DataArray or a single Dataset. - This function cannot change size of existing dimensions, or add new chunked - dimensions. + This function cannot add a new chunked dimension. + obj: DataArray, Dataset - Passed to the function as its first argument, one dask chunk at a time. + Passed to the function as its first argument, one block at a time. args: Sequence - Passed verbatim to func after unpacking, after the sliced obj. xarray objects, - if any, will not be split by chunks. Passing dask collections is not allowed. + Passed to func after unpacking and subsetting any xarray objects by blocks. + xarray objects in args must be aligned with obj, otherwise an error is raised. kwargs: Mapping Passed verbatim to func after unpacking. xarray objects, if any, will not be - split by chunks. Passing dask collections is not allowed. + subset to blocks. Passing dask collections in kwargs is not allowed. + template: (optional) DataArray, Dataset + xarray object representing the final result after compute is called. If not provided, + the function will be first run on mocked-up data, that looks like ``obj`` but + has sizes 0, to determine properties of the returned object such as dtype, + variable names, attributes, new dimensions and new indexes (if any). + ``template`` must be provided if the function changes the size of existing dimensions. + When provided, ``attrs`` on variables in `template` are copied over to the result. Any + ``attrs`` set by ``func`` will be ignored. + Returns ------- @@ -143,11 +210,11 @@ def map_blocks( Notes ----- - This function is designed for when one needs to manipulate a whole xarray object - within each chunk. In the more common case where one can work on numpy arrays, it is - recommended to use apply_ufunc. + This function is designed for when ``func`` needs to manipulate a whole xarray object + subset to each block. In the more common case where ``func`` can work on numpy arrays, it is + recommended to use ``apply_ufunc``. - If none of the variables in obj is backed by dask, calling this function is + If none of the variables in ``obj`` is backed by dask arrays, calling this function is equivalent to calling ``func(obj, *args, **kwargs)``. See Also @@ -163,10 +230,6 @@ def map_blocks( its indices, and its methods like ``.groupby()``. >>> def calculate_anomaly(da, groupby_type="time.month"): - ... # Necessary workaround to xarray's check with zero dimensions - ... # https://github.com/pydata/xarray/issues/3575 - ... if sum(da.shape) == 0: - ... return da ... gb = da.groupby(groupby_type) ... clim = gb.mean(dim="time") ... return gb - clim @@ -175,7 +238,7 @@ def map_blocks( >>> array = xr.DataArray( ... np.random.rand(len(time)), dims="time", coords=[time] ... ).chunk() - >>> xr.map_blocks(calculate_anomaly, array).compute() + >>> xr.map_blocks(calculate_anomaly, array, template=array).compute() array([ 0.12894847, 0.11323072, -0.0855964 , -0.09334032, 0.26848862, 0.12382735, 0.22460641, 0.07650108, -0.07673453, -0.22865714, @@ -189,7 +252,10 @@ def map_blocks( to the function being applied in ``xr.map_blocks()``: >>> xr.map_blocks( - ... calculate_anomaly, array, kwargs={"groupby_type": "time.year"}, + ... calculate_anomaly, + ... array, + ... kwargs={"groupby_type": "time.year"}, + ... template=array, ... ) array([ 0.15361741, -0.25671244, -0.31600032, 0.008463 , 0.1766172 , @@ -201,22 +267,57 @@ def map_blocks( * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 """ - def _wrapper(func, obj, to_array, args, kwargs): - if to_array: - obj = dataset_to_dataarray(obj) - - result = func(obj, *args, **kwargs) + def _wrapper( + func: Callable, + args: List, + kwargs: dict, + arg_is_array: Iterable[bool], + expected: dict, + ): + """ + Wrapper function that receives datasets in args; converts to dataarrays when necessary; + passes these to the user function `func` and checks returned objects for expected shapes/sizes/etc. + """ + + converted_args = [ + dataset_to_dataarray(arg) if is_array else arg + for is_array, arg in zip(arg_is_array, args) + ] + + result = func(*converted_args, **kwargs) + + # check all dims are present + missing_dimensions = set(expected["shapes"]) - set(result.sizes) + if missing_dimensions: + raise ValueError( + f"Dimensions {missing_dimensions} missing on returned object." + ) + # check that index lengths and values are as expected for name, index in result.indexes.items(): - if name in obj.indexes: - if len(index) != len(obj.indexes[name]): + if name in expected["shapes"]: + if len(index) != expected["shapes"][name]: + raise ValueError( + f"Received dimension {name!r} of length {len(index)}. Expected length {expected['shapes'][name]}." + ) + if name in expected["indexes"]: + expected_index = expected["indexes"][name] + if not index.equals(expected_index): raise ValueError( - "Length of the %r dimension has changed. This is not allowed." - % name + f"Expected index {name!r} to be {expected_index!r}. Received {index!r} instead." ) + # check that all expected variables were returned + check_result_variables(result, expected, "coords") + if isinstance(result, Dataset): + check_result_variables(result, expected, "data_vars") + return make_dict(result) + if template is not None and not isinstance(template, (DataArray, Dataset)): + raise TypeError( + f"template must be a DataArray or Dataset. Received {type(template).__name__} instead." + ) if not isinstance(args, Sequence): raise TypeError("args must be a sequence (for example, a list or tuple).") if kwargs is None: @@ -224,32 +325,76 @@ def _wrapper(func, obj, to_array, args, kwargs): elif not isinstance(kwargs, Mapping): raise TypeError("kwargs must be a mapping (for example, a dict)") - for value in list(args) + list(kwargs.values()): + for value in kwargs.values(): if dask.is_dask_collection(value): raise TypeError( - "Cannot pass dask collections in args or kwargs yet. Please compute or " + "Cannot pass dask collections in kwargs yet. Please compute or " "load values before passing to map_blocks." ) if not dask.is_dask_collection(obj): return func(obj, *args, **kwargs) - if isinstance(obj, DataArray): - # only using _to_temp_dataset would break - # func = lambda x: x.to_dataset() - # since that relies on preserving name. - if obj.name is None: - dataset = obj._to_temp_dataset() - else: - dataset = obj.to_dataset() - input_is_array = True - else: - dataset = obj - input_is_array = False + all_args = [obj] + list(args) + is_xarray = [isinstance(arg, (Dataset, DataArray)) for arg in all_args] + is_array = [isinstance(arg, DataArray) for arg in all_args] - input_chunks = dataset.chunks + # there should be a better way to group this. partition? + xarray_indices, xarray_objs = unzip( + (index, arg) for index, arg in enumerate(all_args) if is_xarray[index] + ) + others = [ + (index, arg) for index, arg in enumerate(all_args) if not is_xarray[index] + ] + + # all xarray objects must be aligned. This is consistent with apply_ufunc. + aligned = align(*xarray_objs, join="exact") + xarray_objs = tuple( + dataarray_to_dataset(arg) if is_da else arg + for is_da, arg in zip(is_array, aligned) + ) + + _, npargs = unzip( + sorted(list(zip(xarray_indices, xarray_objs)) + others, key=lambda x: x[0]) + ) + + # check that chunk sizes are compatible + input_chunks = dict(npargs[0].chunks) + input_indexes = dict(npargs[0].indexes) + for arg in xarray_objs[1:]: + assert_chunks_compatible(npargs[0], arg) + input_chunks.update(arg.chunks) + input_indexes.update(arg.indexes) + + if template is None: + # infer template by providing zero-shaped arrays + template = infer_template(func, aligned[0], *args, **kwargs) + template_indexes = set(template.indexes) + preserved_indexes = template_indexes & set(input_indexes) + new_indexes = template_indexes - set(input_indexes) + indexes = {dim: input_indexes[dim] for dim in preserved_indexes} + indexes.update({k: template.indexes[k] for k in new_indexes}) + output_chunks = { + dim: input_chunks[dim] for dim in template.dims if dim in input_chunks + } + + else: + # template xarray object has been provided with proper sizes and chunk shapes + indexes = dict(template.indexes) + if isinstance(template, DataArray): + output_chunks = dict(zip(template.dims, template.chunks)) # type: ignore + else: + output_chunks = dict(template.chunks) + + for dim in output_chunks: + if dim in input_chunks and len(input_chunks[dim]) != len(output_chunks[dim]): + raise ValueError( + "map_blocks requires that one block of the input maps to one block of output. " + f"Expected number of output chunks along dimension {dim!r} to be {len(input_chunks[dim])}. " + f"Received {len(output_chunks[dim])} instead. Please provide template if not provided, or " + "fix the provided template." + ) - template: Union[DataArray, Dataset] = infer_template(func, obj, *args, **kwargs) if isinstance(template, DataArray): result_is_array = True template_name = template.name @@ -261,13 +406,6 @@ def _wrapper(func, obj, to_array, args, kwargs): f"func output must be DataArray or Dataset; got {type(template)}" ) - template_indexes = set(template.indexes) - dataset_indexes = set(dataset.indexes) - preserved_indexes = template_indexes & dataset_indexes - new_indexes = template_indexes - dataset_indexes - indexes = {dim: dataset.indexes[dim] for dim in preserved_indexes} - indexes.update({k: template.indexes[k] for k in new_indexes}) - # We're building a new HighLevelGraph hlg. We'll have one new layer # for each variable in the dataset, which is the result of the # func applied to the values. @@ -275,19 +413,27 @@ def _wrapper(func, obj, to_array, args, kwargs): graph: Dict[Any, Any] = {} new_layers: DefaultDict[str, Dict[Any, Any]] = collections.defaultdict(dict) gname = "{}-{}".format( - dask.utils.funcname(func), dask.base.tokenize(dataset, args, kwargs) + dask.utils.funcname(func), dask.base.tokenize(npargs[0], args, kwargs) ) # map dims to list of chunk indexes ichunk = {dim: range(len(chunks_v)) for dim, chunks_v in input_chunks.items()} # mapping from chunk index to slice bounds - chunk_index_bounds = { + input_chunk_bounds = { dim: np.cumsum((0,) + chunks_v) for dim, chunks_v in input_chunks.items() } + output_chunk_bounds = { + dim: np.cumsum((0,) + chunks_v) for dim, chunks_v in output_chunks.items() + } - # iterate over all possible chunk combinations - for v in itertools.product(*ichunk.values()): - chunk_index_dict = dict(zip(dataset.dims, v)) + def subset_dataset_to_block( + graph: dict, gname: str, dataset: Dataset, input_chunk_bounds, chunk_index + ): + """ + Creates a task that subsets an xarray dataset to a block determined by chunk_index. + Block extents are determined by input_chunk_bounds. + Also subtasks that subset the constituent variables of a dataset. + """ # this will become [[name1, variable1], # [name2, variable2], @@ -296,35 +442,31 @@ def _wrapper(func, obj, to_array, args, kwargs): data_vars = [] coords = [] + chunk_tuple = tuple(chunk_index.values()) for name, variable in dataset.variables.items(): # make a task that creates tuple of (dims, chunk) if dask.is_dask_collection(variable.data): # recursively index into dask_keys nested list to get chunk chunk = variable.__dask_keys__() for dim in variable.dims: - chunk = chunk[chunk_index_dict[dim]] + chunk = chunk[chunk_index[dim]] - chunk_variable_task = (f"{gname}-{chunk[0]}",) + v + chunk_variable_task = (f"{gname}-{name}-{chunk[0]}",) + chunk_tuple graph[chunk_variable_task] = ( tuple, [variable.dims, chunk, variable.attrs], ) else: - # non-dask array with possibly chunked dimensions + # non-dask array possibly with dimensions chunked on other variables # index into variable appropriately - subsetter = {} - for dim in variable.dims: - if dim in chunk_index_dict: - which_chunk = chunk_index_dict[dim] - subsetter[dim] = slice( - chunk_index_bounds[dim][which_chunk], - chunk_index_bounds[dim][which_chunk + 1], - ) - + subsetter = { + dim: _get_chunk_slicer(dim, chunk_index, input_chunk_bounds) + for dim in variable.dims + } subset = variable.isel(subsetter) chunk_variable_task = ( "{}-{}".format(gname, dask.base.tokenize(subset)), - ) + v + ) + chunk_tuple graph[chunk_variable_task] = ( tuple, [subset.dims, subset, subset.attrs], @@ -336,15 +478,37 @@ def _wrapper(func, obj, to_array, args, kwargs): else: data_vars.append([name, chunk_variable_task]) - from_wrapper = (gname,) + v - graph[from_wrapper] = ( - _wrapper, - func, - (Dataset, (dict, data_vars), (dict, coords), dataset.attrs), - input_is_array, - args, - kwargs, - ) + return (Dataset, (dict, data_vars), (dict, coords), dataset.attrs) + + # iterate over all possible chunk combinations + for chunk_tuple in itertools.product(*ichunk.values()): + # mapping from dimension name to chunk index + chunk_index = dict(zip(ichunk.keys(), chunk_tuple)) + + blocked_args = [ + subset_dataset_to_block(graph, gname, arg, input_chunk_bounds, chunk_index) + if isxr + else arg + for isxr, arg in zip(is_xarray, npargs) + ] + + # expected["shapes", "coords", "data_vars", "indexes"] are used to + # raise nice error messages in _wrapper + expected = {} + # input chunk 0 along a dimension maps to output chunk 0 along the same dimension + # even if length of dimension is changed by the applied function + expected["shapes"] = { + k: output_chunks[k][v] for k, v in chunk_index.items() if k in output_chunks + } + expected["data_vars"] = set(template.data_vars.keys()) # type: ignore + expected["coords"] = set(template.coords.keys()) # type: ignore + expected["indexes"] = { + dim: indexes[dim][_get_chunk_slicer(dim, chunk_index, output_chunk_bounds)] + for dim in indexes + } + + from_wrapper = (gname,) + chunk_tuple + graph[from_wrapper] = (_wrapper, func, blocked_args, kwargs, is_array, expected) # mapping from variable name to dask graph key var_key_map: Dict[Hashable, str] = {} @@ -356,10 +520,11 @@ def _wrapper(func, obj, to_array, args, kwargs): key: Tuple[Any, ...] = (gname_l,) for dim in variable.dims: - if dim in chunk_index_dict: - key += (chunk_index_dict[dim],) + if dim in chunk_index: + key += (chunk_index[dim],) else: # unchunked dimensions in the input have one chunk in the result + # output can have new dimensions with exactly one chunk key += (0,) # We're adding multiple new layers to the graph: @@ -370,7 +535,11 @@ def _wrapper(func, obj, to_array, args, kwargs): # layer. new_layers[gname_l][key] = (operator.getitem, from_wrapper, name) - hlg = HighLevelGraph.from_collections(gname, graph, dependencies=[dataset]) + hlg = HighLevelGraph.from_collections( + gname, + graph, + dependencies=[arg for arg in npargs if dask.is_dask_collection(arg)], + ) for gname_l, layer in new_layers.items(): # This adds in the getitems for each variable in the dataset. @@ -378,12 +547,16 @@ def _wrapper(func, obj, to_array, args, kwargs): hlg.layers[gname_l] = layer result = Dataset(coords=indexes, attrs=template.attrs) + for index in result.indexes: + result[index].attrs = template[index].attrs + result[index].encoding = template[index].encoding + for name, gname_l in var_key_map.items(): dims = template[name].dims var_chunks = [] for dim in dims: - if dim in input_chunks: - var_chunks.append(input_chunks[dim]) + if dim in output_chunks: + var_chunks.append(output_chunks[dim]) elif dim in indexes: var_chunks.append((len(indexes[dim]),)) elif dim in template.dims: @@ -394,6 +567,7 @@ def _wrapper(func, obj, to_array, args, kwargs): hlg, name=gname_l, chunks=var_chunks, dtype=template[name].dtype ) result[name] = (dims, data, template[name].attrs) + result[name].encoding = template[name].encoding result = result.set_coords(template._coord_names) diff --git a/xarray/core/pdcompat.py b/xarray/core/pdcompat.py index f2e4518e0dc..f2e22329fc8 100644 --- a/xarray/core/pdcompat.py +++ b/xarray/core/pdcompat.py @@ -55,4 +55,4 @@ def count_not_none(*args) -> int: Copied from pandas.core.common.count_not_none (not part of the public API) """ - return sum([arg is not None for arg in args]) + return sum(arg is not None for arg in args) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 1126cf3037f..0542f850b02 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -787,6 +787,24 @@ def drop_dims_from_indexers( ) +class UncachedAccessor: + """ Acts like a property, but on both classes and class instances + + This class is necessary because some tools (e.g. pydoc and sphinx) + inspect classes for which property returns itself and not the + accessor. + """ + + def __init__(self, accessor): + self._accessor = accessor + + def __get__(self, obj, cls): + if obj is None: + return self._accessor + + return self._accessor(obj) + + # Singleton type, as per https://github.com/python/typing/pull/240 class Default(Enum): token = 0 diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 68e823ca426..c505c749557 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -6,7 +6,17 @@ from collections import defaultdict from datetime import timedelta from distutils.version import LooseVersion -from typing import Any, Dict, Hashable, Mapping, Tuple, TypeVar, Union +from typing import ( + Any, + Dict, + Hashable, + Mapping, + Optional, + Sequence, + Tuple, + TypeVar, + Union, +) import numpy as np import pandas as pd @@ -2069,6 +2079,166 @@ def _to_numeric(self, offset=None, datetime_unit=None, dtype=float): ) return type(self)(self.dims, numeric_array, self._attrs) + def _unravel_argminmax( + self, + argminmax: str, + dim: Union[Hashable, Sequence[Hashable], None], + axis: Union[int, None], + keep_attrs: Optional[bool], + skipna: Optional[bool], + ) -> Union["Variable", Dict[Hashable, "Variable"]]: + """Apply argmin or argmax over one or more dimensions, returning the result as a + dict of DataArray that can be passed directly to isel. + """ + if dim is None and axis is None: + warnings.warn( + "Behaviour of argmin/argmax with neither dim nor axis argument will " + "change to return a dict of indices of each dimension. To get a " + "single, flat index, please use np.argmin(da.data) or " + "np.argmax(da.data) instead of da.argmin() or da.argmax().", + DeprecationWarning, + stacklevel=3, + ) + + argminmax_func = getattr(duck_array_ops, argminmax) + + if dim is ...: + # In future, should do this also when (dim is None and axis is None) + dim = self.dims + if ( + dim is None + or axis is not None + or not isinstance(dim, Sequence) + or isinstance(dim, str) + ): + # Return int index if single dimension is passed, and is not part of a + # sequence + return self.reduce( + argminmax_func, dim=dim, axis=axis, keep_attrs=keep_attrs, skipna=skipna + ) + + # Get a name for the new dimension that does not conflict with any existing + # dimension + newdimname = "_unravel_argminmax_dim_0" + count = 1 + while newdimname in self.dims: + newdimname = "_unravel_argminmax_dim_{}".format(count) + count += 1 + + stacked = self.stack({newdimname: dim}) + + result_dims = stacked.dims[:-1] + reduce_shape = tuple(self.sizes[d] for d in dim) + + result_flat_indices = stacked.reduce(argminmax_func, axis=-1, skipna=skipna) + + result_unravelled_indices = duck_array_ops.unravel_index( + result_flat_indices.data, reduce_shape + ) + + result = { + d: Variable(dims=result_dims, data=i) + for d, i in zip(dim, result_unravelled_indices) + } + + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + if keep_attrs: + for v in result.values(): + v.attrs = self.attrs + + return result + + def argmin( + self, + dim: Union[Hashable, Sequence[Hashable]] = None, + axis: int = None, + keep_attrs: bool = None, + skipna: bool = None, + ) -> Union["Variable", Dict[Hashable, "Variable"]]: + """Index or indices of the minimum of the Variable over one or more dimensions. + If a sequence is passed to 'dim', then result returned as dict of Variables, + which can be passed directly to isel(). If a single str is passed to 'dim' then + returns a Variable with dtype int. + + If there are multiple minima, the indices of the first one found will be + returned. + + Parameters + ---------- + dim : hashable, sequence of hashable or ..., optional + The dimensions over which to find the minimum. By default, finds minimum over + all dimensions - for now returning an int for backward compatibility, but + this is deprecated, in future will return a dict with indices for all + dimensions; to return a dict with all dimensions now, pass '...'. + axis : int, optional + Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments + can be supplied. + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + + Returns + ------- + result : Variable or dict of Variable + + See also + -------- + DataArray.argmin, DataArray.idxmin + """ + return self._unravel_argminmax("argmin", dim, axis, keep_attrs, skipna) + + def argmax( + self, + dim: Union[Hashable, Sequence[Hashable]] = None, + axis: int = None, + keep_attrs: bool = None, + skipna: bool = None, + ) -> Union["Variable", Dict[Hashable, "Variable"]]: + """Index or indices of the maximum of the Variable over one or more dimensions. + If a sequence is passed to 'dim', then result returned as dict of Variables, + which can be passed directly to isel(). If a single str is passed to 'dim' then + returns a Variable with dtype int. + + If there are multiple maxima, the indices of the first one found will be + returned. + + Parameters + ---------- + dim : hashable, sequence of hashable or ..., optional + The dimensions over which to find the maximum. By default, finds maximum over + all dimensions - for now returning an int for backward compatibility, but + this is deprecated, in future will return a dict with indices for all + dimensions; to return a dict with all dimensions now, pass '...'. + axis : int, optional + Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments + can be supplied. + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + + Returns + ------- + result : Variable or dict of Variable + + See also + -------- + DataArray.argmax, DataArray.idxmax + """ + return self._unravel_argminmax("argmax", dim, axis, keep_attrs, skipna) + ops.inject_all_ops_and_reduce_methods(Variable) @@ -2412,7 +2582,7 @@ def assert_unique_multiindex_level_names(variables): duplicate_names = [v for v in level_names.values() if len(v) > 1] if duplicate_names: - conflict_str = "\n".join([", ".join(v) for v in duplicate_names]) + conflict_str = "\n".join(", ".join(v) for v in duplicate_names) raise ValueError("conflicting MultiIndex level name(s):\n%s" % conflict_str) # Check confliction between level names and dimensions GH:2299 for k, v in variables.items(): diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 996d2e4c43e..fa143342c06 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -72,11 +72,11 @@ class Weighted: def __init__(self, obj: "DataArray", weights: "DataArray") -> None: ... - @overload # noqa: F811 - def __init__(self, obj: "Dataset", weights: "DataArray") -> None: # noqa: F811 + @overload + def __init__(self, obj: "Dataset", weights: "DataArray") -> None: ... - def __init__(self, obj, weights): # noqa: F811 + def __init__(self, obj, weights): """ Create a Weighted object @@ -142,7 +142,14 @@ def _sum_of_weights( # we need to mask data values that are nan; else the weights are wrong mask = da.notnull() - sum_of_weights = self._reduce(mask, self.weights, dim=dim, skipna=False) + # bool -> int, because ``xr.dot([True, True], [True, True])`` -> True + # (and not 2); GH4074 + if self.weights.dtype == bool: + sum_of_weights = self._reduce( + mask, self.weights.astype(int), dim=dim, skipna=False + ) + else: + sum_of_weights = self._reduce(mask, self.weights, dim=dim, skipna=False) # 0-weights are not valid valid_weights = sum_of_weights != 0.0 diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 4657bee9415..e4a981daf8c 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -14,6 +14,7 @@ from .facetgrid import _easy_facetgrid from .utils import ( _add_colorbar, + _assert_valid_xy, _ensure_plottable, _infer_interval_breaks, _infer_xy_labels, @@ -29,19 +30,17 @@ def _infer_line_data(darray, x, y, hue): - error_msg = "must be either None or one of ({:s})".format( - ", ".join([repr(dd) for dd in darray.dims]) - ) + ndims = len(darray.dims) - if x is not None and x not in darray.dims and x not in darray.coords: - raise ValueError("x " + error_msg) + if x is not None and y is not None: + raise ValueError("Cannot specify both x and y kwargs for line plots.") - if y is not None and y not in darray.dims and y not in darray.coords: - raise ValueError("y " + error_msg) + if x is not None: + _assert_valid_xy(darray, x, "x") - if x is not None and y is not None: - raise ValueError("You cannot specify both x and y kwargs" "for line plots.") + if y is not None: + _assert_valid_xy(darray, y, "y") if ndims == 1: huename = None @@ -252,7 +251,7 @@ def line( Dimension or coordinate for which you want multiple lines plotted. If plotting against a 2D coordinate, ``hue`` must be a dimension. x, y : string, optional - Dimensions or coordinates for x, y axis. + Dimension, coordinate or MultiIndex level for x, y axis. Only one of these may be specified. The other coordinate plots values from the DataArray on which this plot method is called. @@ -446,6 +445,11 @@ def __init__(self, darray): def __call__(self, **kwargs): return plot(self._da, **kwargs) + # we can't use functools.wraps here since that also modifies the name / qualname + __doc__ = __call__.__doc__ = plot.__doc__ + __call__.__wrapped__ = plot # type: ignore + __call__.__annotations__ = plot.__annotations__ + @functools.wraps(hist) def hist(self, ax=None, **kwargs): return hist(self._da, ax=ax, **kwargs) diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index c3512828888..e5c1fa89333 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -268,7 +268,7 @@ def _determine_cmap_params( cmap = OPTIONS["cmap_sequential"] # Handle discrete levels - if levels is not None and norm is None: + if levels is not None: if is_scalar(levels): if user_minmax: levels = np.linspace(vmin, vmax, levels) @@ -360,7 +360,9 @@ def _infer_xy_labels(darray, x, y, imshow=False, rgb=None): darray must be a 2 dimensional data array, or 3d for imshow only. """ - assert x is None or x != y + if (x is not None) and (x == y): + raise ValueError("x and y cannot be equal.") + if imshow and darray.ndim == 3: return _infer_xy_labels_3d(darray, x, y, rgb) @@ -369,18 +371,41 @@ def _infer_xy_labels(darray, x, y, imshow=False, rgb=None): raise ValueError("DataArray must be 2d") y, x = darray.dims elif x is None: - if y not in darray.dims and y not in darray.coords: - raise ValueError("y must be a dimension name if x is not supplied") + _assert_valid_xy(darray, y, "y") x = darray.dims[0] if y == darray.dims[1] else darray.dims[1] elif y is None: - if x not in darray.dims and x not in darray.coords: - raise ValueError("x must be a dimension name if y is not supplied") + _assert_valid_xy(darray, x, "x") y = darray.dims[0] if x == darray.dims[1] else darray.dims[1] - elif any(k not in darray.coords and k not in darray.dims for k in (x, y)): - raise ValueError("x and y must be coordinate variables") + else: + _assert_valid_xy(darray, x, "x") + _assert_valid_xy(darray, y, "y") + + if ( + all(k in darray._level_coords for k in (x, y)) + and darray._level_coords[x] == darray._level_coords[y] + ): + raise ValueError("x and y cannot be levels of the same MultiIndex") + return x, y +def _assert_valid_xy(darray, xy, name): + """ + make sure x and y passed to plotting functions are valid + """ + + # MultiIndex cannot be plotted; no point in allowing them here + multiindex = set([darray._level_coords[lc] for lc in darray._level_coords]) + + valid_xy = ( + set(darray.dims) | set(darray.coords) | set(darray._level_coords) + ) - multiindex + + if xy not in valid_xy: + valid_xy_str = "', '".join(sorted(valid_xy)) + raise ValueError(f"{name} must be one of None, '{valid_xy_str}'") + + def get_axis(figsize, size, aspect, ax): import matplotlib as mpl import matplotlib.pyplot as plt diff --git a/xarray/static/css/style.css b/xarray/static/css/style.css index 7e382de3b5b..39cd6d6755f 100644 --- a/xarray/static/css/style.css +++ b/xarray/static/css/style.css @@ -13,11 +13,29 @@ --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee); } +html[theme=dark], +body.vscode-dark { + --xr-font-color0: rgba(255, 255, 255, 1); + --xr-font-color2: rgba(255, 255, 255, 0.54); + --xr-font-color3: rgba(255, 255, 255, 0.38); + --xr-border-color: #1F1F1F; + --xr-disabled-color: #515151; + --xr-background-color: #111111; + --xr-background-color-row-even: #111111; + --xr-background-color-row-odd: #313131; +} + .xr-wrap { + display: block; min-width: 300px; max-width: 700px; } +.xr-text-repr-fallback { + /* fallback to plain text repr when CSS is not injected (untrusted notebook) */ + display: none; +} + .xr-header { padding-top: 6px; padding-bottom: 6px; diff --git a/xarray/static/html/icons-svg-inline.html b/xarray/static/html/icons-svg-inline.html index c44f89c4304..b0e837a26cd 100644 --- a/xarray/static/html/icons-svg-inline.html +++ b/xarray/static/html/icons-svg-inline.html @@ -1,13 +1,11 @@ -Show/Hide data repr -Show/Hide attributes diff --git a/xarray/testing.py b/xarray/testing.py index ac189f7e023..9681503414e 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -1,21 +1,17 @@ """Testing functions exposed to the user API""" +import functools from typing import Hashable, Set, Union import numpy as np import pandas as pd -from xarray.core import duck_array_ops, formatting +from xarray.core import duck_array_ops, formatting, utils from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.indexes import default_indexes from xarray.core.variable import IndexVariable, Variable -__all__ = ( - "assert_allclose", - "assert_chunks_equal", - "assert_equal", - "assert_identical", -) +__all__ = ("assert_allclose", "assert_chunks_equal", "assert_equal", "assert_identical") def _decode_string_data(data): @@ -123,27 +119,31 @@ def assert_allclose(a, b, rtol=1e-05, atol=1e-08, decode_bytes=True): """ __tracebackhide__ = True assert type(a) == type(b) - kwargs = dict(rtol=rtol, atol=atol, decode_bytes=decode_bytes) + + equiv = functools.partial( + _data_allclose_or_equiv, rtol=rtol, atol=atol, decode_bytes=decode_bytes + ) + equiv.__name__ = "allclose" + + def compat_variable(a, b): + a = getattr(a, "variable", a) + b = getattr(b, "variable", b) + + return a.dims == b.dims and (a._data is b._data or equiv(a.data, b.data)) + if isinstance(a, Variable): - assert a.dims == b.dims - allclose = _data_allclose_or_equiv(a.values, b.values, **kwargs) - assert allclose, f"{a.values}\n{b.values}" + allclose = compat_variable(a, b) + assert allclose, formatting.diff_array_repr(a, b, compat=equiv) elif isinstance(a, DataArray): - assert_allclose(a.variable, b.variable, **kwargs) - assert set(a.coords) == set(b.coords) - for v in a.coords.variables: - # can't recurse with this function as coord is sometimes a - # DataArray, so call into _data_allclose_or_equiv directly - allclose = _data_allclose_or_equiv( - a.coords[v].values, b.coords[v].values, **kwargs - ) - assert allclose, "{}\n{}".format(a.coords[v].values, b.coords[v].values) + allclose = utils.dict_equiv( + a.coords, b.coords, compat=compat_variable + ) and compat_variable(a.variable, b.variable) + assert allclose, formatting.diff_array_repr(a, b, compat=equiv) elif isinstance(a, Dataset): - assert set(a.data_vars) == set(b.data_vars) - assert set(a.coords) == set(b.coords) - for k in list(a.variables) + list(a.coords): - assert_allclose(a[k], b[k], **kwargs) - + allclose = a._coord_names == b._coord_names and utils.dict_equiv( + a.variables, b.variables, compat=compat_variable + ) + assert allclose, formatting.diff_dataset_repr(a, b, compat=equiv) else: raise TypeError("{} not supported by assertion comparison".format(type(a))) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 0af34612b08..b1fe5375723 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -30,6 +30,7 @@ save_mfdataset, ) from xarray.backends.common import robust_getitem +from xarray.backends.netcdf3 import _nc3_dtype_coercions from xarray.backends.netCDF4_ import _extract_nc4_variable_encoding from xarray.backends.pydap_ import PydapDataStore from xarray.coding.variables import SerializationWarning @@ -86,6 +87,7 @@ dask_version = "10.0" ON_WINDOWS = sys.platform == "win32" +default_value = object() def open_example_dataset(name, *args, **kwargs): @@ -227,7 +229,27 @@ def __getitem__(self, key): class NetCDF3Only: - pass + netcdf3_formats = ("NETCDF3_CLASSIC", "NETCDF3_64BIT") + + @requires_scipy + def test_dtype_coercion_error(self): + """Failing dtype coercion should lead to an error""" + for dtype, format in itertools.product( + _nc3_dtype_coercions, self.netcdf3_formats + ): + if dtype == "bool": + # coerced upcast (bool to int8) ==> can never fail + continue + + # Using the largest representable value, create some data that will + # no longer compare equal after the coerced downcast + maxval = np.iinfo(dtype).max + x = np.array([0, 1, 2, maxval], dtype=dtype) + ds = Dataset({"x": ("t", x, {})}) + + with create_tmp_file(allow_cleanup_failure=False) as path: + with pytest.raises(ValueError, match="could not safely cast"): + ds.to_netcdf(path, format=format) class DatasetIOBase: @@ -296,9 +318,14 @@ def test_write_store(self): def check_dtypes_roundtripped(self, expected, actual): for k in expected.variables: expected_dtype = expected.variables[k].dtype - if isinstance(self, NetCDF3Only) and expected_dtype == "int64": - # downcast - expected_dtype = np.dtype("int32") + + # For NetCDF3, the backend should perform dtype coercion + if ( + isinstance(self, NetCDF3Only) + and str(expected_dtype) in _nc3_dtype_coercions + ): + expected_dtype = np.dtype(_nc3_dtype_coercions[str(expected_dtype)]) + actual_dtype = actual.variables[k].dtype # TODO: check expected behavior for string dtypes more carefully string_kinds = {"O", "S", "U"} @@ -858,7 +885,7 @@ def test_roundtrip_endian(self): "x": np.arange(3, 10, dtype=">i2"), "y": np.arange(3, 20, dtype="array") def test_short_data_repr_html_non_str_keys(dataset): @@ -108,8 +108,8 @@ def test_summarize_attrs_with_unsafe_attr_name_and_value(): def test_repr_of_dataarray(dataarray): formatted = fh.array_repr(dataarray) assert "dim_0" in formatted - # has an expandable data section - assert formatted.count("class='xr-array-in' type='checkbox' >") == 1 + # has an expanded data section + assert formatted.count("class='xr-array-in' type='checkbox' checked>") == 1 # coords and attrs don't have an items so they'll be be disabled and collapsed assert ( formatted.count("class='xr-section-summary-in' type='checkbox' disabled >") == 2 @@ -137,3 +137,22 @@ def test_repr_of_dataset(dataset): ) assert "<U4" in formatted or ">U4" in formatted assert "<IA>" in formatted + + +def test_repr_text_fallback(dataset): + formatted = fh.dataset_repr(dataset) + + # Just test that the "pre" block used for fallback to plain text is present. + assert "
    " in formatted
    +
    +
    +def test_variable_repr_html():
    +    v = xr.Variable(["time", "x"], [[1, 2, 3], [4, 5, 6]], {"foo": "bar"})
    +    assert hasattr(v, "_repr_html_")
    +    with xr.set_options(display_style="html"):
    +        html = v._repr_html_().strip()
    +    # We don't do a complete string identity since
    +    # html output is probably subject to change, is long and... reasons.
    +    # Just test that something reasonable was produced.
    +    assert html.startswith("")
    +    assert "xarray.Variable" in html
    diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
    index 866d5fb0899..aa54c8f36f1 100644
    --- a/xarray/tests/test_groupby.py
    +++ b/xarray/tests/test_groupby.py
    @@ -538,4 +538,16 @@ def test_groupby_bins_timeseries():
         assert_identical(actual, expected)
     
     
    +def test_groupby_none_group_name():
    +    # GH158
    +    # xarray should not fail if a DataArray's name attribute is None
    +
    +    data = np.arange(10) + 10
    +    da = xr.DataArray(data)  # da.name = None
    +    key = xr.DataArray(np.floor_divide(data, 2))
    +
    +    mean = da.groupby(key).mean()
    +    assert "group" in mean.dims
    +
    +
     # TODO: move other groupby tests from test_dataset and test_dataarray over here
    diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py
    index 0502348160e..7a0dda216e2 100644
    --- a/xarray/tests/test_interp.py
    +++ b/xarray/tests/test_interp.py
    @@ -699,3 +699,21 @@ def test_3641():
         times = xr.cftime_range("0001", periods=3, freq="500Y")
         da = xr.DataArray(range(3), dims=["time"], coords=[times])
         da.interp(time=["0002-05-01"])
    +
    +
    +@requires_scipy
    +@pytest.mark.parametrize("method", ["nearest", "linear"])
    +def test_decompose(method):
    +    da = xr.DataArray(
    +        np.arange(6).reshape(3, 2),
    +        dims=["x", "y"],
    +        coords={"x": [0, 1, 2], "y": [-0.1, -0.3]},
    +    )
    +    x_new = xr.DataArray([0.5, 1.5, 2.5], dims=["x1"])
    +    y_new = xr.DataArray([-0.15, -0.25], dims=["y1"])
    +    x_broadcast, y_broadcast = xr.broadcast(x_new, y_new)
    +    assert x_broadcast.ndim == 2
    +
    +    actual = da.interp(x=x_new, y=y_new, method=method).drop(("x", "y"))
    +    expected = da.interp(x=x_broadcast, y=y_broadcast, method=method).drop(("x", "y"))
    +    assert_allclose(actual, expected)
    diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py
    index 731cd165244..bc186c8bd15 100644
    --- a/xarray/tests/test_missing.py
    +++ b/xarray/tests/test_missing.py
    @@ -534,6 +534,18 @@ def test_get_clean_interp_index_potential_overflow():
         get_clean_interp_index(da, "time")
     
     
    +@pytest.mark.parametrize("index", ([0, 2, 1], [0, 1, 1]))
    +def test_get_clean_interp_index_strict(index):
    +    da = xr.DataArray([0, 1, 2], dims=("x",), coords={"x": index})
    +
    +    with pytest.raises(ValueError):
    +        get_clean_interp_index(da, "x")
    +
    +    clean = get_clean_interp_index(da, "x", strict=False)
    +    np.testing.assert_array_equal(index, clean)
    +    assert clean.dtype == np.float64
    +
    +
     @pytest.fixture
     def da_time():
         return xr.DataArray(
    diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py
    index bf1f9ed60bb..938f403e01b 100644
    --- a/xarray/tests/test_plot.py
    +++ b/xarray/tests/test_plot.py
    @@ -111,6 +111,12 @@ class TestPlot(PlotTestCase):
         def setup_array(self):
             self.darray = DataArray(easy_array((2, 3, 4)))
     
    +    def test_accessor(self):
    +        from ..plot.plot import _PlotMethods
    +
    +        assert DataArray.plot is _PlotMethods
    +        assert isinstance(self.darray.plot, _PlotMethods)
    +
         def test_label_from_attrs(self):
             da = self.darray.copy()
             assert "" == label_from_attrs(da)
    @@ -136,14 +142,14 @@ def test_label_from_attrs(self):
         def test1d(self):
             self.darray[:, 0, 0].plot()
     
    -        with raises_regex(ValueError, "None"):
    +        with raises_regex(ValueError, "x must be one of None, 'dim_0'"):
                 self.darray[:, 0, 0].plot(x="dim_1")
     
             with raises_regex(TypeError, "complex128"):
                 (self.darray[:, 0, 0] + 1j).plot()
     
         def test_1d_bool(self):
    -        xr.ones_like(self.darray[:, 0, 0], dtype=np.bool).plot()
    +        xr.ones_like(self.darray[:, 0, 0], dtype=bool).plot()
     
         def test_1d_x_y_kw(self):
             z = np.arange(10)
    @@ -155,14 +161,31 @@ def test_1d_x_y_kw(self):
             for aa, (x, y) in enumerate(xy):
                 da.plot(x=x, y=y, ax=ax.flat[aa])
     
    -        with raises_regex(ValueError, "cannot"):
    +        with raises_regex(ValueError, "Cannot specify both"):
                 da.plot(x="z", y="z")
     
    -        with raises_regex(ValueError, "None"):
    -            da.plot(x="f", y="z")
    +        error_msg = "must be one of None, 'z'"
    +        with raises_regex(ValueError, f"x {error_msg}"):
    +            da.plot(x="f")
    +
    +        with raises_regex(ValueError, f"y {error_msg}"):
    +            da.plot(y="f")
    +
    +    def test_multiindex_level_as_coord(self):
    +        da = xr.DataArray(
    +            np.arange(5),
    +            dims="x",
    +            coords=dict(a=("x", np.arange(5)), b=("x", np.arange(5, 10))),
    +        )
    +        da = da.set_index(x=["a", "b"])
    +
    +        for x in ["a", "b"]:
    +            h = da.plot(x=x)[0]
    +            assert_array_equal(h.get_xdata(), da[x].values)
     
    -        with raises_regex(ValueError, "None"):
    -            da.plot(x="z", y="f")
    +        for y in ["a", "b"]:
    +            h = da.plot(y=y)[0]
    +            assert_array_equal(h.get_ydata(), da[y].values)
     
         # Test for bug in GH issue #2725
         def test_infer_line_data(self):
    @@ -211,7 +234,7 @@ def test_2d_line(self):
             self.darray[:, :, 0].plot.line(x="dim_0", hue="dim_1")
             self.darray[:, :, 0].plot.line(y="dim_0", hue="dim_1")
     
    -        with raises_regex(ValueError, "cannot"):
    +        with raises_regex(ValueError, "Cannot"):
                 self.darray[:, :, 0].plot.line(x="dim_1", y="dim_0", hue="dim_1")
     
         def test_2d_line_accepts_legend_kw(self):
    @@ -854,21 +877,22 @@ def test_norm_sets_vmin_vmax(self):
             vmin = self.data.min()
             vmax = self.data.max()
     
    -        for norm, extend in zip(
    +        for norm, extend, levels in zip(
                 [
    +                mpl.colors.Normalize(),
                     mpl.colors.Normalize(),
                     mpl.colors.Normalize(vmin + 0.1, vmax - 0.1),
                     mpl.colors.Normalize(None, vmax - 0.1),
                     mpl.colors.Normalize(vmin + 0.1, None),
                 ],
    -            ["neither", "both", "max", "min"],
    +            ["neither", "neither", "both", "max", "min"],
    +            [7, None, None, None, None],
             ):
     
                 test_min = vmin if norm.vmin is None else norm.vmin
                 test_max = vmax if norm.vmax is None else norm.vmax
     
    -            cmap_params = _determine_cmap_params(self.data, norm=norm)
    -
    +            cmap_params = _determine_cmap_params(self.data, norm=norm, levels=levels)
                 assert cmap_params["vmin"] == test_min
                 assert cmap_params["vmax"] == test_max
                 assert cmap_params["extend"] == extend
    @@ -1013,7 +1037,7 @@ def test_1d_raises_valueerror(self):
                 self.plotfunc(self.darray[0, :])
     
         def test_bool(self):
    -        xr.ones_like(self.darray, dtype=np.bool).plot()
    +        xr.ones_like(self.darray, dtype=bool).plot()
     
         def test_complex_raises_typeerror(self):
             with raises_regex(TypeError, "complex128"):
    @@ -1031,6 +1055,16 @@ def test_nonnumeric_index_raises_typeerror(self):
             with raises_regex(TypeError, r"[Pp]lot"):
                 self.plotfunc(a)
     
    +    def test_multiindex_raises_typeerror(self):
    +        a = DataArray(
    +            easy_array((3, 2)),
    +            dims=("x", "y"),
    +            coords=dict(x=("x", [0, 1, 2]), a=("y", [0, 1]), b=("y", [2, 3])),
    +        )
    +        a = a.set_index(y=("a", "b"))
    +        with raises_regex(TypeError, r"[Pp]lot"):
    +            self.plotfunc(a)
    +
         def test_can_pass_in_axis(self):
             self.pass_in_axis(self.plotmethod)
     
    @@ -1139,15 +1173,16 @@ def test_positional_coord_string(self):
             assert "y_long_name [y_units]" == ax.get_ylabel()
     
         def test_bad_x_string_exception(self):
    -        with raises_regex(ValueError, "x and y must be coordinate variables"):
    +
    +        with raises_regex(ValueError, "x and y cannot be equal."):
    +            self.plotmethod(x="y", y="y")
    +
    +        error_msg = "must be one of None, 'x', 'x2d', 'y', 'y2d'"
    +        with raises_regex(ValueError, f"x {error_msg}"):
                 self.plotmethod("not_a_real_dim", "y")
    -        with raises_regex(
    -            ValueError, "x must be a dimension name if y is not supplied"
    -        ):
    +        with raises_regex(ValueError, f"x {error_msg}"):
                 self.plotmethod(x="not_a_real_dim")
    -        with raises_regex(
    -            ValueError, "y must be a dimension name if x is not supplied"
    -        ):
    +        with raises_regex(ValueError, f"y {error_msg}"):
                 self.plotmethod(y="not_a_real_dim")
             self.darray.coords["z"] = 100
     
    @@ -1182,6 +1217,27 @@ def test_non_linked_coords_transpose(self):
             # simply ensure that these high coords were passed over
             assert np.min(ax.get_xlim()) > 100.0
     
    +    def test_multiindex_level_as_coord(self):
    +        da = DataArray(
    +            easy_array((3, 2)),
    +            dims=("x", "y"),
    +            coords=dict(x=("x", [0, 1, 2]), a=("y", [0, 1]), b=("y", [2, 3])),
    +        )
    +        da = da.set_index(y=["a", "b"])
    +
    +        for x, y in (("a", "x"), ("b", "x"), ("x", "a"), ("x", "b")):
    +            self.plotfunc(da, x=x, y=y)
    +
    +            ax = plt.gca()
    +            assert x == ax.get_xlabel()
    +            assert y == ax.get_ylabel()
    +
    +        with raises_regex(ValueError, "levels of the same MultiIndex"):
    +            self.plotfunc(da, x="a", y="b")
    +
    +        with raises_regex(ValueError, "y must be one of None, 'a', 'b', 'x'"):
    +            self.plotfunc(da, x="a", y="y")
    +
         def test_default_title(self):
             a = DataArray(easy_array((4, 3, 2)), dims=["a", "b", "c"])
             a.coords["c"] = [0, 1]
    @@ -2048,6 +2104,12 @@ def setUp(self):
             ds.B.attrs["units"] = "Bunits"
             self.ds = ds
     
    +    def test_accessor(self):
    +        from ..plot.dataset_plot import _Dataset_PlotMethods
    +
    +        assert Dataset.plot is _Dataset_PlotMethods
    +        assert isinstance(self.ds.plot, _Dataset_PlotMethods)
    +
         @pytest.mark.parametrize(
             "add_guide, hue_style, legend, colorbar",
             [
    diff --git a/xarray/tests/test_testing.py b/xarray/tests/test_testing.py
    index 041b7341ade..f4961af58e9 100644
    --- a/xarray/tests/test_testing.py
    +++ b/xarray/tests/test_testing.py
    @@ -1,3 +1,5 @@
    +import pytest
    +
     import xarray as xr
     
     
    @@ -5,3 +7,26 @@ def test_allclose_regression():
         x = xr.DataArray(1.01)
         y = xr.DataArray(1.02)
         xr.testing.assert_allclose(x, y, atol=0.01)
    +
    +
    +@pytest.mark.parametrize(
    +    "obj1,obj2",
    +    (
    +        pytest.param(
    +            xr.Variable("x", [1e-17, 2]), xr.Variable("x", [0, 3]), id="Variable",
    +        ),
    +        pytest.param(
    +            xr.DataArray([1e-17, 2], dims="x"),
    +            xr.DataArray([0, 3], dims="x"),
    +            id="DataArray",
    +        ),
    +        pytest.param(
    +            xr.Dataset({"a": ("x", [1e-17, 2]), "b": ("y", [-2e-18, 2])}),
    +            xr.Dataset({"a": ("x", [0, 2]), "b": ("y", [0, 1])}),
    +            id="Dataset",
    +        ),
    +    ),
    +)
    +def test_assert_allclose(obj1, obj2):
    +    with pytest.raises(AssertionError):
    +        xr.testing.assert_allclose(obj1, obj2)
    diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py
    index 2826dc2479c..20a5f0e8613 100644
    --- a/xarray/tests/test_units.py
    +++ b/xarray/tests/test_units.py
    @@ -7,9 +7,8 @@
     import pytest
     
     import xarray as xr
    -from xarray.core import formatting
     from xarray.core.npcompat import IS_NEP18_ACTIVE
    -from xarray.testing import assert_allclose, assert_identical
    +from xarray.testing import assert_allclose, assert_equal, assert_identical
     
     from .test_variable import _PAD_XR_NP_ARGS, VariableSubclassobjects
     
    @@ -27,11 +26,6 @@
         pytest.mark.skipif(
             not IS_NEP18_ACTIVE, reason="NUMPY_EXPERIMENTAL_ARRAY_FUNCTION is not enabled"
         ),
    -    # TODO: remove this once pint has a released version with __array_function__
    -    pytest.mark.skipif(
    -        not hasattr(unit_registry.Quantity, "__array_function__"),
    -        reason="pint does not implement __array_function__ yet",
    -    ),
         # pytest.mark.filterwarnings("ignore:::pint[.*]"),
     ]
     
    @@ -51,10 +45,23 @@ def dimensionality(obj):
     def compatible_mappings(first, second):
         return {
             key: is_compatible(unit1, unit2)
    -        for key, (unit1, unit2) in merge_mappings(first, second)
    +        for key, (unit1, unit2) in zip_mappings(first, second)
         }
     
     
    +def merge_mappings(base, *mappings):
    +    result = base.copy()
    +    for m in mappings:
    +        result.update(m)
    +
    +    return result
    +
    +
    +def zip_mappings(*mappings):
    +    for key in set(mappings[0]).intersection(*mappings[1:]):
    +        yield key, tuple(m[key] for m in mappings)
    +
    +
     def array_extract_units(obj):
         if isinstance(obj, (xr.Variable, xr.DataArray, xr.Dataset)):
             obj = obj.data
    @@ -257,50 +264,11 @@ def assert_units_equal(a, b):
         assert extract_units(a) == extract_units(b)
     
     
    -def assert_equal_with_units(a, b):
    -    # works like xr.testing.assert_equal, but also explicitly checks units
    -    # so, it is more like assert_identical
    -    __tracebackhide__ = True
    -
    -    if isinstance(a, xr.Dataset) or isinstance(b, xr.Dataset):
    -        a_units = extract_units(a)
    -        b_units = extract_units(b)
    -
    -        a_without_units = strip_units(a)
    -        b_without_units = strip_units(b)
    -
    -        assert a_without_units.equals(b_without_units), formatting.diff_dataset_repr(
    -            a, b, "equals"
    -        )
    -        assert a_units == b_units
    -    else:
    -        a = a if not isinstance(a, (xr.DataArray, xr.Variable)) else a.data
    -        b = b if not isinstance(b, (xr.DataArray, xr.Variable)) else b.data
    -
    -        assert type(a) == type(b) or (
    -            isinstance(a, Quantity) and isinstance(b, Quantity)
    -        )
    -
    -        # workaround until pint implements allclose in __array_function__
    -        if isinstance(a, Quantity) or isinstance(b, Quantity):
    -            assert (
    -                hasattr(a, "magnitude") and hasattr(b, "magnitude")
    -            ) and np.allclose(a.magnitude, b.magnitude, equal_nan=True)
    -            assert (hasattr(a, "units") and hasattr(b, "units")) and a.units == b.units
    -        else:
    -            assert np.allclose(a, b, equal_nan=True)
    -
    -
     @pytest.fixture(params=[float, int])
     def dtype(request):
         return request.param
     
     
    -def merge_mappings(*mappings):
    -    for key in set(mappings[0]).intersection(*mappings[1:]):
    -        yield key, tuple(m[key] for m in mappings)
    -
    -
     def merge_args(default_args, new_args):
         from itertools import zip_longest
     
    @@ -329,19 +297,29 @@ def __call__(self, obj, *args, **kwargs):
             all_args = merge_args(self.args, args)
             all_kwargs = {**self.kwargs, **kwargs}
     
    +        xarray_classes = (
    +            xr.Variable,
    +            xr.DataArray,
    +            xr.Dataset,
    +            xr.core.groupby.GroupBy,
    +        )
    +
    +        if not isinstance(obj, xarray_classes):
    +            # remove typical xarray args like "dim"
    +            exclude_kwargs = ("dim", "dims")
    +            all_kwargs = {
    +                key: value
    +                for key, value in all_kwargs.items()
    +                if key not in exclude_kwargs
    +            }
    +
             func = getattr(obj, self.name, None)
    +
             if func is None or not isinstance(func, Callable):
                 # fall back to module level numpy functions if not a xarray object
                 if not isinstance(obj, (xr.Variable, xr.DataArray, xr.Dataset)):
                     numpy_func = getattr(np, self.name)
                     func = partial(numpy_func, obj)
    -                # remove typical xarray args like "dim"
    -                exclude_kwargs = ("dim", "dims")
    -                all_kwargs = {
    -                    key: value
    -                    for key, value in all_kwargs.items()
    -                    if key not in exclude_kwargs
    -                }
                 else:
                     raise AttributeError(f"{obj} has no method named '{self.name}'")
     
    @@ -425,6 +403,10 @@ def test_apply_ufunc_dataset(dtype):
         assert_identical(expected, actual)
     
     
    +# TODO: remove once pint==0.12 has been released
    +@pytest.mark.xfail(
    +    LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    +)
     @pytest.mark.parametrize(
         "unit,error",
         (
    @@ -512,6 +494,10 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype):
         assert_allclose(expected_b, actual_b)
     
     
    +# TODO: remove once pint==0.12 has been released
    +@pytest.mark.xfail(
    +    LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    +)
     @pytest.mark.parametrize(
         "unit,error",
         (
    @@ -929,6 +915,10 @@ def test_concat_dataset(variant, unit, error, dtype):
         assert_identical(expected, actual)
     
     
    +# TODO: remove once pint==0.12 has been released
    +@pytest.mark.xfail(
    +    LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    +)
     @pytest.mark.parametrize(
         "unit,error",
         (
    @@ -1036,6 +1026,10 @@ def test_merge_dataarray(variant, unit, error, dtype):
         assert_allclose(expected, actual)
     
     
    +# TODO: remove once pint==0.12 has been released
    +@pytest.mark.xfail(
    +    LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    +)
     @pytest.mark.parametrize(
         "unit,error",
         (
    @@ -1385,7 +1379,6 @@ def wrapper(cls):
         "test_datetime64_conversion",
         "test_timedelta64_conversion",
         "test_pandas_period_index",
    -    "test_1d_math",
         "test_1d_reduce",
         "test_array_interface",
         "test___array__",
    @@ -1413,13 +1406,20 @@ def example_1d_objects(self):
             ]:
                 yield (self.cls("x", data), data)
     
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    +    )
    +    def test_real_and_imag(self):
    +        super().test_real_and_imag()
    +
         @pytest.mark.parametrize(
             "func",
             (
                 method("all"),
                 method("any"),
    -            method("argmax"),
    -            method("argmin"),
    +            method("argmax", dim="x"),
    +            method("argmin", dim="x"),
                 method("argsort"),
                 method("cumprod"),
                 method("cumsum"),
    @@ -1443,13 +1443,33 @@ def test_aggregation(self, func, dtype):
             )
             variable = xr.Variable("x", array)
     
    -        units = extract_units(func(array))
    +        numpy_kwargs = func.kwargs.copy()
    +        if "dim" in func.kwargs:
    +            numpy_kwargs["axis"] = variable.get_axis_num(numpy_kwargs.pop("dim"))
    +
    +        units = extract_units(func(array, **numpy_kwargs))
             expected = attach_units(func(strip_units(variable)), units)
             actual = func(variable)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_allclose(expected, actual)
    +
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    +    )
    +    def test_aggregate_complex(self):
    +        variable = xr.Variable("x", [1, 2j, np.nan] * unit_registry.m)
    +        expected = xr.Variable((), (0.5 + 1j) * unit_registry.m)
    +        actual = variable.mean()
    +
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_allclose(expected, actual)
     
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    +    )
         @pytest.mark.parametrize(
             "func",
             (
    @@ -1660,7 +1680,7 @@ def test_missing_value_fillna(self, unit, error):
                 method("equals"),
                 pytest.param(
                     method("identical"),
    -                marks=pytest.mark.skip(reason="behaviour of identical is unclear"),
    +                marks=pytest.mark.skip(reason="behavior of identical is undecided"),
                 ),
             ),
             ids=repr,
    @@ -1748,6 +1768,10 @@ def test_isel(self, indices, dtype):
             assert_units_equal(expected, actual)
             xr.testing.assert_identical(expected, actual)
     
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    +    )
         @pytest.mark.parametrize(
             "unit,error",
             (
    @@ -1885,7 +1909,10 @@ def test_squeeze(self, dtype):
                 method("coarsen", windows={"y": 2}, func=np.mean),
                 pytest.param(
                     method("quantile", q=[0.25, 0.75]),
    -                marks=pytest.mark.xfail(reason="nanquantile not implemented"),
    +                marks=pytest.mark.xfail(
    +                    LooseVersion(pint.__version__) <= "0.12",
    +                    reason="quantile / nanquantile not implemented yet",
    +                ),
                 ),
                 pytest.param(
                     method("rank", dim="x"),
    @@ -2161,8 +2188,8 @@ class TestDataArray:
                     "with_dims",
                     marks=pytest.mark.xfail(reason="units in indexes are not supported"),
                 ),
    -            pytest.param("with_coords"),
    -            pytest.param("without_coords"),
    +            "with_coords",
    +            "without_coords",
             ),
         )
         def test_init(self, variant, dtype):
    @@ -2221,24 +2248,36 @@ def test_repr(self, func, variant, dtype):
             # warnings or errors, but does not check the result
             func(data_array)
     
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose",
    +    )
         @pytest.mark.parametrize(
             "func",
             (
    +            function("all"),
    +            function("any"),
                 pytest.param(
    -                function("all"),
    -                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
    +                function("argmax"),
    +                marks=pytest.mark.skip(
    +                    reason="calling np.argmax as a function on xarray objects is not "
    +                    "supported"
    +                ),
                 ),
                 pytest.param(
    -                function("any"),
    -                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
    +                function("argmin"),
    +                marks=pytest.mark.skip(
    +                    reason="calling np.argmin as a function on xarray objects is not "
    +                    "supported"
    +                ),
                 ),
    -            function("argmax"),
    -            function("argmin"),
                 function("max"),
                 function("mean"),
                 pytest.param(
                     function("median"),
    -                marks=pytest.mark.xfail(reason="not implemented by xarray"),
    +                marks=pytest.mark.skip(
    +                    reason="median does not work with dataarrays yet"
    +                ),
                 ),
                 function("min"),
                 pytest.param(
    @@ -2249,38 +2288,24 @@ def test_repr(self, func, variant, dtype):
                 function("std"),
                 function("var"),
                 function("cumsum"),
    -            pytest.param(
    -                function("cumprod"),
    -                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
    -            ),
    -            pytest.param(
    -                method("all"),
    -                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
    -            ),
    -            pytest.param(
    -                method("any"),
    -                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
    -            ),
    -            method("argmax"),
    -            method("argmin"),
    +            function("cumprod"),
    +            method("all"),
    +            method("any"),
    +            method("argmax", dim="x"),
    +            method("argmin", dim="x"),
                 method("max"),
                 method("mean"),
                 method("median"),
                 method("min"),
                 pytest.param(
                     method("prod"),
    -                marks=pytest.mark.xfail(
    -                    reason="comparison of quantity with ndarrays in nanops not implemented"
    -                ),
    +                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
                 ),
                 method("sum"),
                 method("std"),
                 method("var"),
                 method("cumsum"),
    -            pytest.param(
    -                method("cumprod"),
    -                marks=pytest.mark.xfail(reason="pint does not implement cumprod yet"),
    -            ),
    +            method("cumprod"),
             ),
             ids=repr,
         )
    @@ -2290,13 +2315,18 @@ def test_aggregation(self, func, dtype):
             )
             data_array = xr.DataArray(data=array, dims="x")
     
    +        numpy_kwargs = func.kwargs.copy()
    +        if "dim" in numpy_kwargs:
    +            numpy_kwargs["axis"] = data_array.get_axis_num(numpy_kwargs.pop("dim"))
    +
             # units differ based on the applied function, so we need to
             # first compute the units
             units = extract_units(func(array))
             expected = attach_units(func(strip_units(data_array)), units)
             actual = func(data_array)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        assert_allclose(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
    @@ -2314,7 +2344,8 @@ def test_unary_operations(self, func, dtype):
             expected = attach_units(func(strip_units(data_array)), units)
             actual = func(data_array)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
    @@ -2333,7 +2364,8 @@ def test_binary_operations(self, func, dtype):
             expected = attach_units(func(strip_units(data_array)), units)
             actual = func(data_array)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "comparison",
    @@ -2383,7 +2415,8 @@ def test_comparison_operations(self, comparison, unit, error, dtype):
                 strip_units(convert_units(to_compare_with, expected_units)),
             )
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "units,error",
    @@ -2411,9 +2444,10 @@ def test_univariate_ufunc(self, units, error, dtype):
             )
             actual = func(data_array)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
    -    @pytest.mark.xfail(reason="xarray's `np.maximum` strips units")
    +    @pytest.mark.xfail(reason="needs the type register system for __array_ufunc__")
         @pytest.mark.parametrize(
             "unit,error",
             (
    @@ -2422,7 +2456,12 @@ def test_univariate_ufunc(self, units, error, dtype):
                     unit_registry.dimensionless, DimensionalityError, id="dimensionless"
                 ),
                 pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
    -            pytest.param(unit_registry.mm, None, id="compatible_unit"),
    +            pytest.param(
    +                unit_registry.mm,
    +                None,
    +                id="compatible_unit",
    +                marks=pytest.mark.xfail(reason="pint converts to the wrong units"),
    +            ),
                 pytest.param(unit_registry.m, None, id="identical_unit"),
             ),
         )
    @@ -2433,7 +2472,7 @@ def test_bivariate_ufunc(self, unit, error, dtype):
     
             if error is not None:
                 with pytest.raises(error):
    -                np.maximum(data_array, 0 * unit)
    +                np.maximum(data_array, 1 * unit)
     
                 return
     
    @@ -2441,16 +2480,18 @@ def test_bivariate_ufunc(self, unit, error, dtype):
             expected = attach_units(
                 np.maximum(
                     strip_units(data_array),
    -                strip_units(convert_units(0 * unit, expected_units)),
    +                strip_units(convert_units(1 * unit, expected_units)),
                 ),
                 expected_units,
             )
     
    -        actual = np.maximum(data_array, 0 * unit)
    -        assert_equal_with_units(expected, actual)
    +        actual = np.maximum(data_array, 1 * unit)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
    -        actual = np.maximum(0 * unit, data_array)
    -        assert_equal_with_units(expected, actual)
    +        actual = np.maximum(1 * unit, data_array)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.parametrize("property", ("T", "imag", "real"))
         def test_numpy_properties(self, property, dtype):
    @@ -2466,7 +2507,8 @@ def test_numpy_properties(self, property, dtype):
             )
             actual = getattr(data_array, property)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
    @@ -2481,16 +2523,86 @@ def test_numpy_methods(self, func, dtype):
             expected = attach_units(strip_units(data_array), units)
             actual = func(data_array)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
    +
    +    def test_item(self, dtype):
    +        array = np.arange(10).astype(dtype) * unit_registry.m
    +        data_array = xr.DataArray(data=array)
    +
    +        func = method("item", 2)
    +
    +        expected = func(strip_units(data_array)) * unit_registry.m
    +        actual = func(data_array)
    +
    +        np.testing.assert_allclose(expected, actual)
    +
    +    @pytest.mark.parametrize(
    +        "unit,error",
    +        (
    +            pytest.param(1, DimensionalityError, id="no_unit"),
    +            pytest.param(
    +                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
    +            ),
    +            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
    +            pytest.param(unit_registry.cm, None, id="compatible_unit"),
    +            pytest.param(unit_registry.m, None, id="identical_unit"),
    +        ),
    +    )
    +    @pytest.mark.parametrize(
    +        "func",
    +        (
    +            method("searchsorted", 5),
    +            pytest.param(
    +                function("searchsorted", 5),
    +                marks=pytest.mark.xfail(
    +                    reason="xarray does not implement __array_function__"
    +                ),
    +            ),
    +        ),
    +        ids=repr,
    +    )
    +    def test_searchsorted(self, func, unit, error, dtype):
    +        array = np.arange(10).astype(dtype) * unit_registry.m
    +        data_array = xr.DataArray(data=array)
    +
    +        scalar_types = (int, float)
    +        args = list(value * unit for value in func.args)
    +        kwargs = {
    +            key: (value * unit if isinstance(value, scalar_types) else value)
    +            for key, value in func.kwargs.items()
    +        }
    +
    +        if error is not None:
    +            with pytest.raises(error):
    +                func(data_array, *args, **kwargs)
    +
    +            return
    +
    +        units = extract_units(data_array)
    +        expected_units = extract_units(func(array, *args, **kwargs))
    +        stripped_args = [strip_units(convert_units(value, units)) for value in args]
    +        stripped_kwargs = {
    +            key: strip_units(convert_units(value, units))
    +            for key, value in kwargs.items()
    +        }
    +        expected = attach_units(
    +            func(strip_units(data_array), *stripped_args, **stripped_kwargs),
    +            expected_units,
    +        )
    +        actual = func(data_array, *args, **kwargs)
    +
    +        assert_units_equal(expected, actual)
    +        np.testing.assert_allclose(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
             (
                 method("clip", min=3, max=8),
                 pytest.param(
    -                method("searchsorted", v=5),
    +                function("clip", a_min=3, a_max=8),
                     marks=pytest.mark.xfail(
    -                    reason="searchsorted somehow requires a undocumented `keys` argument"
    +                    reason="xarray does not implement __array_function__"
                     ),
                 ),
             ),
    @@ -2513,28 +2625,32 @@ def test_numpy_methods_with_args(self, func, unit, error, dtype):
             data_array = xr.DataArray(data=array)
     
             scalar_types = (int, float)
    +        args = list(value * unit for value in func.args)
             kwargs = {
                 key: (value * unit if isinstance(value, scalar_types) else value)
                 for key, value in func.kwargs.items()
             }
             if error is not None:
                 with pytest.raises(error):
    -                func(data_array, **kwargs)
    +                func(data_array, *args, **kwargs)
     
                 return
     
             units = extract_units(data_array)
    -        expected_units = extract_units(func(array, **kwargs))
    +        expected_units = extract_units(func(array, *args, **kwargs))
    +        stripped_args = [strip_units(convert_units(value, units)) for value in args]
             stripped_kwargs = {
                 key: strip_units(convert_units(value, units))
                 for key, value in kwargs.items()
             }
             expected = attach_units(
    -            func(strip_units(data_array), **stripped_kwargs), expected_units
    +            func(strip_units(data_array), *stripped_args, **stripped_kwargs),
    +            expected_units,
             )
    -        actual = func(data_array, **kwargs)
    +        actual = func(data_array, *args, **kwargs)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "func", (method("isnull"), method("notnull"), method("count")), ids=repr
    @@ -2551,15 +2667,13 @@ def test_missing_value_detection(self, func, dtype):
                 )
                 * unit_registry.degK
             )
    -        x = np.arange(array.shape[0]) * unit_registry.m
    -        y = np.arange(array.shape[1]) * unit_registry.m
    -
    -        data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
    +        data_array = xr.DataArray(data=array)
     
             expected = func(strip_units(data_array))
             actual = func(data_array)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.xfail(reason="ffill and bfill lose units in data")
         @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr)
    @@ -2576,7 +2690,8 @@ def test_missing_value_filling(self, func, dtype):
             )
             actual = func(data_array, dim="x")
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "unit,error",
    @@ -2586,12 +2701,7 @@ def test_missing_value_filling(self, func, dtype):
                     unit_registry.dimensionless, DimensionalityError, id="dimensionless"
                 ),
                 pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
    -            pytest.param(
    -                unit_registry.cm,
    -                None,
    -                id="compatible_unit",
    -                marks=pytest.mark.xfail(reason="fillna converts to value's unit"),
    -            ),
    +            pytest.param(unit_registry.cm, None, id="compatible_unit"),
                 pytest.param(unit_registry.m, None, id="identical_unit"),
             ),
         )
    @@ -2629,7 +2739,8 @@ def test_fillna(self, fill_value, unit, error, dtype):
             )
             actual = func(data_array, value=value)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         def test_dropna(self, dtype):
             array = (
    @@ -2643,18 +2754,13 @@ def test_dropna(self, dtype):
             expected = attach_units(strip_units(data_array).dropna(dim="x"), units)
             actual = data_array.dropna(dim="x")
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "unit",
             (
    -            pytest.param(
    -                1,
    -                id="no_unit",
    -                marks=pytest.mark.xfail(
    -                    reason="pint's isin implementation does not work well with mixed args"
    -                ),
    -            ),
    +            pytest.param(1, id="no_unit"),
                 pytest.param(unit_registry.dimensionless, id="dimensionless"),
                 pytest.param(unit_registry.s, id="incompatible_unit"),
                 pytest.param(unit_registry.cm, id="compatible_unit"),
    @@ -2677,22 +2783,11 @@ def test_isin(self, unit, dtype):
             ) & array.check(unit)
             actual = data_array.isin(values)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
    -        "variant",
    -        (
    -            pytest.param(
    -                "masking",
    -                marks=pytest.mark.xfail(reason="array(nan) is not a quantity"),
    -            ),
    -            "replacing_scalar",
    -            "replacing_array",
    -            pytest.param(
    -                "dropping",
    -                marks=pytest.mark.xfail(reason="array(nan) is not a quantity"),
    -            ),
    -        ),
    +        "variant", ("masking", "replacing_scalar", "replacing_array", "dropping")
         )
         @pytest.mark.parametrize(
             "unit,error",
    @@ -2742,22 +2837,24 @@ def test_where(self, variant, unit, error, dtype):
             )
             actual = data_array.where(**kwargs)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
    -    @pytest.mark.xfail(reason="interpolate strips units")
    -    def test_interpolate_na(self, dtype):
    +    @pytest.mark.xfail(reason="uses numpy.vectorize")
    +    def test_interpolate_na(self):
             array = (
                 np.array([-1.03, 0.1, 1.4, np.nan, 2.3, np.nan, np.nan, 9.1])
                 * unit_registry.m
             )
             x = np.arange(len(array))
    -        data_array = xr.DataArray(data=array, coords={"x": x}, dims="x").astype(dtype)
    +        data_array = xr.DataArray(data=array, coords={"x": x}, dims="x")
     
             units = extract_units(data_array)
             expected = attach_units(strip_units(data_array).interpolate_na(dim="x"), units)
             actual = data_array.interpolate_na(dim="x")
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "unit,error",
    @@ -2767,18 +2864,8 @@ def test_interpolate_na(self, dtype):
                     unit_registry.dimensionless, DimensionalityError, id="dimensionless"
                 ),
                 pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
    -            pytest.param(
    -                unit_registry.cm,
    -                None,
    -                id="compatible_unit",
    -                marks=pytest.mark.xfail(reason="depends on reindex"),
    -            ),
    -            pytest.param(
    -                unit_registry.m,
    -                None,
    -                id="identical_unit",
    -                marks=pytest.mark.xfail(reason="depends on reindex"),
    -            ),
    +            pytest.param(unit_registry.cm, None, id="compatible_unit",),
    +            pytest.param(unit_registry.m, None, id="identical_unit",),
             ),
         )
         def test_combine_first(self, unit, error, dtype):
    @@ -2807,7 +2894,8 @@ def test_combine_first(self, unit, error, dtype):
             )
             actual = data_array.combine_first(other)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "unit",
    @@ -2829,7 +2917,17 @@ def test_combine_first(self, unit, error, dtype):
                 "coords",
             ),
         )
    -    @pytest.mark.parametrize("func", (method("equals"), method("identical")), ids=repr)
    +    @pytest.mark.parametrize(
    +        "func",
    +        (
    +            method("equals"),
    +            pytest.param(
    +                method("identical"),
    +                marks=pytest.mark.skip(reason="the behavior of identical is undecided"),
    +            ),
    +        ),
    +        ids=repr,
    +    )
         def test_comparisons(self, func, variation, unit, dtype):
             def is_compatible(a, b):
                 a = a if a is not None else 1
    @@ -2903,7 +3001,8 @@ def test_broadcast_like(self, unit, dtype):
             )
             actual = arr1.broadcast_like(arr2)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "unit",
    @@ -2950,7 +3049,6 @@ def test_broadcast_equals(self, unit, dtype):
                 method("reset_coords", names="x2"),
                 method("copy"),
                 method("astype", np.float32),
    -            method("item", 1),
             ),
             ids=repr,
         )
    @@ -2978,7 +3076,8 @@ def test_content_manipulation(self, func, dtype):
             expected = attach_units(func(strip_units(data_array), **stripped_kwargs), units)
             actual = func(data_array)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "func", (pytest.param(method("copy", data=np.arange(20))),), ids=repr
    @@ -3004,7 +3103,9 @@ def test_content_manipulation_with_units(self, func, unit, dtype):
             )
     
             actual = func(data_array, **kwargs)
    -        assert_equal_with_units(expected, actual)
    +
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "indices",
    @@ -3024,7 +3125,8 @@ def test_isel(self, indices, dtype):
             )
             actual = data_array.isel(x=indices)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.xfail(reason="indexes don't support units")
         @pytest.mark.parametrize(
    @@ -3067,7 +3169,9 @@ def test_sel(self, raw_values, unit, error, dtype):
                 extract_units(data_array),
             )
             actual = data_array.sel(x=values)
    -        assert_equal_with_units(expected, actual)
    +
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.xfail(reason="indexes don't support units")
         @pytest.mark.parametrize(
    @@ -3110,7 +3214,9 @@ def test_loc(self, raw_values, unit, error, dtype):
                 extract_units(data_array),
             )
             actual = data_array.loc[{"x": values}]
    -        assert_equal_with_units(expected, actual)
    +
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.xfail(reason="indexes don't support units")
         @pytest.mark.parametrize(
    @@ -3153,7 +3259,9 @@ def test_drop_sel(self, raw_values, unit, error, dtype):
                 extract_units(data_array),
             )
             actual = data_array.drop_sel(x=values)
    -        assert_equal_with_units(expected, actual)
    +
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "shape",
    @@ -3181,7 +3289,9 @@ def test_squeeze(self, shape, dtype):
                 strip_units(data_array).squeeze(), extract_units(data_array)
             )
             actual = data_array.squeeze()
    -        assert_equal_with_units(expected, actual)
    +
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
             # try squeezing the dimensions separately
             names = tuple(dim for dim, coord in coords.items() if len(coord) == 1)
    @@ -3190,7 +3300,9 @@ def test_squeeze(self, shape, dtype):
                     strip_units(data_array).squeeze(dim=name), extract_units(data_array)
                 )
                 actual = data_array.squeeze(dim=name)
    -            assert_equal_with_units(expected, actual)
    +
    +            assert_units_equal(expected, actual)
    +            xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
    @@ -3212,49 +3324,46 @@ def test_head_tail_thin(self, func, dtype):
             )
             actual = func(data_array)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
    -    @pytest.mark.xfail(reason="indexes don't support units")
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    +    )
    +    @pytest.mark.parametrize("variant", ("data", "coords"))
         @pytest.mark.parametrize(
    -        "unit,error",
    +        "func",
             (
    -            pytest.param(1, DimensionalityError, id="no_unit"),
                 pytest.param(
    -                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
    +                method("interp"), marks=pytest.mark.xfail(reason="uses scipy")
                 ),
    -            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
    -            pytest.param(unit_registry.cm, None, id="compatible_unit"),
    -            pytest.param(unit_registry.m, None, id="identical_unit"),
    +            method("reindex"),
             ),
    +        ids=repr,
         )
    -    def test_interp(self, unit, error):
    -        array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
    -        new_coords = (np.arange(10) + 0.5) * unit
    -        coords = {
    -            "x": np.arange(10) * unit_registry.m,
    -            "y": np.arange(5) * unit_registry.m,
    +    def test_interp_reindex(self, variant, func, dtype):
    +        variants = {
    +            "data": (unit_registry.m, 1),
    +            "coords": (1, unit_registry.m),
             }
    +        data_unit, coord_unit = variants.get(variant)
     
    -        data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
    +        array = np.linspace(1, 2, 10).astype(dtype) * data_unit
    +        y = np.arange(10) * coord_unit
     
    -        if error is not None:
    -            with pytest.raises(error):
    -                data_array.interp(x=new_coords)
    -
    -            return
    +        x = np.arange(10)
    +        new_x = np.arange(10) + 0.5
    +        data_array = xr.DataArray(array, coords={"x": x, "y": ("x", y)}, dims="x")
     
             units = extract_units(data_array)
    -        expected = attach_units(
    -            strip_units(data_array).interp(
    -                x=strip_units(convert_units(new_coords, {None: unit_registry.m}))
    -            ),
    -            units,
    -        )
    -        actual = data_array.interp(x=new_coords)
    +        expected = attach_units(func(strip_units(data_array), x=new_x), units)
    +        actual = func(data_array, x=new_x)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_allclose(expected, actual)
     
    -    @pytest.mark.xfail(reason="indexes strip units")
    +    @pytest.mark.xfail(reason="indexes don't support units")
         @pytest.mark.parametrize(
             "unit,error",
             (
    @@ -3267,79 +3376,70 @@ def test_interp(self, unit, error):
                 pytest.param(unit_registry.m, None, id="identical_unit"),
             ),
         )
    -    def test_interp_like(self, unit, error):
    -        array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
    -        coords = {
    -            "x": (np.arange(10) + 0.3) * unit_registry.m,
    -            "y": (np.arange(5) + 0.3) * unit_registry.m,
    -        }
    -
    -        data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
    -        other = xr.DataArray(
    -            data=np.empty((20, 10)) * unit_registry.degK,
    -            coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit},
    -            dims=("x", "y"),
    -        )
    +    @pytest.mark.parametrize(
    +        "func", (method("interp"), method("reindex")), ids=repr,
    +    )
    +    def test_interp_reindex_indexing(self, func, unit, error, dtype):
    +        array = np.linspace(1, 2, 10).astype(dtype)
    +        x = np.arange(10) * unit_registry.m
    +        new_x = (np.arange(10) + 0.5) * unit
    +        data_array = xr.DataArray(array, coords={"x": x}, dims="x")
     
             if error is not None:
                 with pytest.raises(error):
    -                data_array.interp_like(other)
    +                func(data_array, x=new_x)
     
                 return
     
             units = extract_units(data_array)
             expected = attach_units(
    -            strip_units(data_array).interp_like(
    -                strip_units(convert_units(other, units))
    +            func(
    +                strip_units(data_array),
    +                x=strip_units(convert_units(new_x, {None: unit_registry.m})),
                 ),
                 units,
             )
    -        actual = data_array.interp_like(other)
    +        actual = func(data_array, x=new_x)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
    -    @pytest.mark.xfail(reason="indexes don't support units")
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    +    )
    +    @pytest.mark.parametrize("variant", ("data", "coords"))
         @pytest.mark.parametrize(
    -        "unit,error",
    +        "func",
             (
    -            pytest.param(1, DimensionalityError, id="no_unit"),
                 pytest.param(
    -                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
    +                method("interp_like"), marks=pytest.mark.xfail(reason="uses scipy")
                 ),
    -            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
    -            pytest.param(unit_registry.cm, None, id="compatible_unit"),
    -            pytest.param(unit_registry.m, None, id="identical_unit"),
    +            method("reindex_like"),
             ),
    +        ids=repr,
         )
    -    def test_reindex(self, unit, error, dtype):
    -        array = (
    -            np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
    -        )
    -        new_coords = (np.arange(10) + 0.5) * unit
    -        coords = {
    -            "x": np.arange(10) * unit_registry.m,
    -            "y": np.arange(5) * unit_registry.m,
    +    def test_interp_reindex_like(self, variant, func, dtype):
    +        variants = {
    +            "data": (unit_registry.m, 1),
    +            "coords": (1, unit_registry.m),
             }
    +        data_unit, coord_unit = variants.get(variant)
     
    -        data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
    -        func = method("reindex")
    -
    -        if error is not None:
    -            with pytest.raises(error):
    -                func(data_array, x=new_coords)
    +        array = np.linspace(1, 2, 10).astype(dtype) * data_unit
    +        coord = np.arange(10) * coord_unit
     
    -            return
    +        x = np.arange(10)
    +        new_x = np.arange(-2, 2) + 0.5
    +        data_array = xr.DataArray(array, coords={"x": x, "y": ("x", coord)}, dims="x")
    +        other = xr.DataArray(np.empty_like(new_x), coords={"x": new_x}, dims="x")
     
    -        expected = attach_units(
    -            func(
    -                strip_units(data_array),
    -                x=strip_units(convert_units(new_coords, {None: unit_registry.m})),
    -            ),
    -            {None: unit_registry.degK},
    -        )
    -        actual = func(data_array, x=new_coords)
    +        units = extract_units(data_array)
    +        expected = attach_units(func(strip_units(data_array), other), units)
    +        actual = func(data_array, other)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_allclose(expected, actual)
     
         @pytest.mark.xfail(reason="indexes don't support units")
         @pytest.mark.parametrize(
    @@ -3354,38 +3454,35 @@ def test_reindex(self, unit, error, dtype):
                 pytest.param(unit_registry.m, None, id="identical_unit"),
             ),
         )
    -    def test_reindex_like(self, unit, error, dtype):
    -        array = (
    -            np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
    -        )
    -        coords = {
    -            "x": (np.arange(10) + 0.3) * unit_registry.m,
    -            "y": (np.arange(5) + 0.3) * unit_registry.m,
    -        }
    +    @pytest.mark.parametrize(
    +        "func", (method("interp_like"), method("reindex_like")), ids=repr,
    +    )
    +    def test_interp_reindex_like_indexing(self, func, unit, error, dtype):
    +        array = np.linspace(1, 2, 10).astype(dtype)
    +        x = np.arange(10) * unit_registry.m
    +        new_x = (np.arange(-2, 2) + 0.5) * unit
     
    -        data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
    -        other = xr.DataArray(
    -            data=np.empty((20, 10)) * unit_registry.degK,
    -            coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit},
    -            dims=("x", "y"),
    -        )
    +        data_array = xr.DataArray(array, coords={"x": x}, dims="x")
    +        other = xr.DataArray(np.empty_like(new_x), {"x": new_x}, dims="x")
     
             if error is not None:
                 with pytest.raises(error):
    -                data_array.reindex_like(other)
    +                func(data_array, other)
     
                 return
     
             units = extract_units(data_array)
             expected = attach_units(
    -            strip_units(data_array).reindex_like(
    -                strip_units(convert_units(other, units))
    +            func(
    +                strip_units(data_array),
    +                strip_units(convert_units(other, {None: unit_registry.m})),
                 ),
                 units,
             )
    -        actual = data_array.reindex_like(other)
    +        actual = func(data_array, other)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
    @@ -3407,7 +3504,8 @@ def test_stacking_stacked(self, func, dtype):
             expected = attach_units(func(strip_units(stacked)), {"data": unit_registry.m})
             actual = func(stacked)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.xfail(reason="indexes don't support units")
         def test_to_unstacked_dataset(self, dtype):
    @@ -3430,7 +3528,8 @@ def test_to_unstacked_dataset(self, dtype):
             ).rename({elem.magnitude: elem for elem in x})
             actual = func(data_array)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
    @@ -3438,9 +3537,7 @@ def test_to_unstacked_dataset(self, dtype):
                 method("transpose", "y", "x", "z"),
                 method("stack", a=("x", "y")),
                 method("set_index", x="x2"),
    -            pytest.param(
    -                method("shift", x=2), marks=pytest.mark.xfail(reason="strips units")
    -            ),
    +            method("shift", x=2),
                 method("roll", x=2, roll_coords=False),
                 method("sortby", "x2"),
             ),
    @@ -3466,7 +3563,8 @@ def test_stacking_reordering(self, func, dtype):
             expected = attach_units(func(strip_units(data_array)), {None: unit_registry.m})
             actual = func(data_array)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
    @@ -3476,16 +3574,13 @@ def test_stacking_reordering(self, func, dtype):
                 method("integrate", dim="x"),
                 pytest.param(
                     method("quantile", q=[0.25, 0.75]),
    -                marks=pytest.mark.xfail(reason="nanquantile not implemented"),
    -            ),
    -            method("reduce", func=np.sum, dim="x"),
    -            pytest.param(
    -                lambda x: x.dot(x),
    -                id="method_dot",
                     marks=pytest.mark.xfail(
    -                    reason="pint does not implement the dot method"
    +                    LooseVersion(pint.__version__) <= "0.12",
    +                    reason="quantile / nanquantile not implemented yet",
                     ),
                 ),
    +            method("reduce", func=np.sum, dim="x"),
    +            pytest.param(lambda x: x.dot(x), id="method_dot"),
             ),
             ids=repr,
         )
    @@ -3512,8 +3607,13 @@ def test_computation(self, func, dtype):
             expected = attach_units(func(strip_units(data_array)), units)
             actual = func(data_array)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
    +    )
         @pytest.mark.parametrize(
             "func",
             (
    @@ -3522,11 +3622,15 @@ def test_computation(self, func, dtype):
                 method("coarsen", y=2),
                 pytest.param(
                     method("rolling", y=3),
    -                marks=pytest.mark.xfail(reason="rolling strips units"),
    +                marks=pytest.mark.xfail(
    +                    reason="numpy.lib.stride_tricks.as_strided converts to ndarray"
    +                ),
                 ),
                 pytest.param(
                     method("rolling_exp", y=3),
    -                marks=pytest.mark.xfail(reason="units not supported by numbagg"),
    +                marks=pytest.mark.xfail(
    +                    reason="numbagg functions are not supported by pint"
    +                ),
                 ),
             ),
             ids=repr,
    @@ -3545,7 +3649,8 @@ def test_computation_objects(self, func, dtype):
             expected = attach_units(func(strip_units(data_array)).mean(), units)
             actual = func(data_array).mean()
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_allclose(expected, actual)
     
         def test_resample(self, dtype):
             array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
    @@ -3559,7 +3664,8 @@ def test_resample(self, dtype):
             expected = attach_units(func(strip_units(data_array)).mean(), units)
             actual = func(data_array).mean()
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
    @@ -3569,7 +3675,10 @@ def test_resample(self, dtype):
                 method("last"),
                 pytest.param(
                     method("quantile", q=[0.25, 0.5, 0.75], dim="x"),
    -                marks=pytest.mark.xfail(reason="nanquantile not implemented"),
    +                marks=pytest.mark.xfail(
    +                    LooseVersion(pint.__version__) <= "0.12",
    +                    reason="quantile / nanquantile not implemented yet",
    +                ),
                 ),
             ),
             ids=repr,
    @@ -3598,18 +3707,20 @@ def test_grouped_operations(self, func, dtype):
             )
             actual = func(data_array.groupby("y"))
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_identical(expected, actual)
     
     
    +@pytest.mark.filterwarnings("error::pint.UnitStrippedWarning")
     class TestDataset:
         @pytest.mark.parametrize(
             "unit,error",
             (
    -            pytest.param(1, DimensionalityError, id="no_unit"),
    +            pytest.param(1, xr.MergeError, id="no_unit"),
                 pytest.param(
    -                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
    +                unit_registry.dimensionless, xr.MergeError, id="dimensionless"
                 ),
    -            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
    +            pytest.param(unit_registry.s, xr.MergeError, id="incompatible_unit"),
                 pytest.param(unit_registry.mm, None, id="compatible_unit"),
                 pytest.param(unit_registry.m, None, id="same_unit"),
             ),
    @@ -3618,11 +3729,10 @@ class TestDataset:
             "shared",
             (
                 "nothing",
    -            pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
                 pytest.param(
    -                "coords",
    -                marks=pytest.mark.xfail(reason="reindex does not work with pint yet"),
    +                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
                 ),
    +            "coords",
             ),
         )
         def test_init(self, shared, unit, error, dtype):
    @@ -3630,60 +3740,53 @@ def test_init(self, shared, unit, error, dtype):
             scaled_unit = unit_registry.mm
     
             a = np.linspace(0, 1, 10).astype(dtype) * unit_registry.Pa
    -        b = np.linspace(-1, 0, 12).astype(dtype) * unit_registry.Pa
    -
    -        raw_x = np.arange(a.shape[0])
    -        x = raw_x * original_unit
    -        x2 = x.to(scaled_unit)
    -
    -        raw_y = np.arange(b.shape[0])
    -        y = raw_y * unit
    -        y_units = unit if isinstance(y, unit_registry.Quantity) else None
    -        if isinstance(y, unit_registry.Quantity):
    -            if y.check(scaled_unit):
    -                y2 = y.to(scaled_unit)
    -            else:
    -                y2 = y * 1000
    -            y2_units = y2.units
    -        else:
    -            y2 = y * 1000
    -            y2_units = None
    +        b = np.linspace(-1, 0, 10).astype(dtype) * unit_registry.degK
    +
    +        values_a = np.arange(a.shape[0])
    +        dim_a = values_a * original_unit
    +        coord_a = dim_a.to(scaled_unit)
    +
    +        values_b = np.arange(b.shape[0])
    +        dim_b = values_b * unit
    +        coord_b = (
    +            dim_b.to(scaled_unit)
    +            if unit_registry.is_compatible_with(dim_b, scaled_unit)
    +            and unit != scaled_unit
    +            else dim_b * 1000
    +        )
     
             variants = {
    -            "nothing": ({"x": x, "x2": ("x", x2)}, {"y": y, "y2": ("y", y2)}),
    -            "dims": (
    -                {"x": x, "x2": ("x", strip_units(x2))},
    -                {"x": y, "y2": ("x", strip_units(y2))},
    +            "nothing": ({}, {}),
    +            "dims": ({"x": dim_a}, {"x": dim_b}),
    +            "coords": (
    +                {"x": values_a, "y": ("x", coord_a)},
    +                {"x": values_b, "y": ("x", coord_b)},
                 ),
    -            "coords": ({"x": raw_x, "y": ("x", x2)}, {"x": raw_y, "y": ("x", y2)}),
             }
             coords_a, coords_b = variants.get(shared)
     
             dims_a, dims_b = ("x", "y") if shared == "nothing" else ("x", "x")
     
    -        arr1 = xr.DataArray(data=a, coords=coords_a, dims=dims_a)
    -        arr2 = xr.DataArray(data=b, coords=coords_b, dims=dims_b)
    +        a = xr.DataArray(data=a, coords=coords_a, dims=dims_a)
    +        b = xr.DataArray(data=b, coords=coords_b, dims=dims_b)
    +
             if error is not None and shared != "nothing":
                 with pytest.raises(error):
    -                xr.Dataset(data_vars={"a": arr1, "b": arr2})
    +                xr.Dataset(data_vars={"a": a, "b": b})
     
                 return
     
    -        actual = xr.Dataset(data_vars={"a": arr1, "b": arr2})
    +        actual = xr.Dataset(data_vars={"a": a, "b": b})
     
    -        expected_units = {
    -            "a": a.units,
    -            "b": b.units,
    -            "x": x.units,
    -            "x2": x2.units,
    -            "y": y_units,
    -            "y2": y2_units,
    -        }
    +        units = merge_mappings(
    +            extract_units(a.rename("a")), extract_units(b.rename("b"))
    +        )
             expected = attach_units(
    -            xr.Dataset(data_vars={"a": strip_units(arr1), "b": strip_units(arr2)}),
    -            expected_units,
    +            xr.Dataset(data_vars={"a": strip_units(a), "b": strip_units(b)}), units
             )
    -        assert_equal_with_units(actual, expected)
    +
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.parametrize(
             "func", (pytest.param(str, id="str"), pytest.param(repr, id="repr"))
    @@ -3691,79 +3794,79 @@ def test_init(self, shared, unit, error, dtype):
         @pytest.mark.parametrize(
             "variant",
             (
    +            "data",
                 pytest.param(
    -                "with_dims",
    +                "dims",
                     marks=pytest.mark.xfail(reason="units in indexes are not supported"),
                 ),
    -            pytest.param("with_coords"),
    -            pytest.param("without_coords"),
    +            "coords",
             ),
         )
    -    @pytest.mark.filterwarnings("error:::pint[.*]")
         def test_repr(self, func, variant, dtype):
    -        array1 = np.linspace(1, 2, 10, dtype=dtype) * unit_registry.Pa
    -        array2 = np.linspace(0, 1, 10, dtype=dtype) * unit_registry.degK
    +        unit1, unit2 = (
    +            (unit_registry.Pa, unit_registry.degK) if variant == "data" else (1, 1)
    +        )
    +
    +        array1 = np.linspace(1, 2, 10, dtype=dtype) * unit1
    +        array2 = np.linspace(0, 1, 10, dtype=dtype) * unit2
     
             x = np.arange(len(array1)) * unit_registry.s
             y = x.to(unit_registry.ms)
     
             variants = {
    -            "with_dims": {"x": x},
    -            "with_coords": {"y": ("x", y)},
    -            "without_coords": {},
    +            "dims": {"x": x},
    +            "coords": {"y": ("x", y)},
    +            "data": {},
             }
     
    -        data_array = xr.Dataset(
    +        ds = xr.Dataset(
                 data_vars={"a": ("x", array1), "b": ("x", array2)},
                 coords=variants.get(variant),
             )
     
             # FIXME: this just checks that the repr does not raise
             # warnings or errors, but does not check the result
    -        func(data_array)
    +        func(ds)
     
         @pytest.mark.parametrize(
             "func",
             (
    +            function("all"),
    +            function("any"),
                 pytest.param(
    -                function("all"),
    -                marks=pytest.mark.xfail(reason="not implemented by pint"),
    +                function("argmax"),
    +                marks=pytest.mark.skip(
    +                    reason="calling np.argmax as a function on xarray objects is not "
    +                    "supported"
    +                ),
                 ),
                 pytest.param(
    -                function("any"),
    -                marks=pytest.mark.xfail(reason="not implemented by pint"),
    +                function("argmin"),
    +                marks=pytest.mark.skip(
    +                    reason="calling np.argmin as a function on xarray objects is not "
    +                    "supported"
    +                ),
                 ),
    -            function("argmax"),
    -            function("argmin"),
                 function("max"),
                 function("min"),
                 function("mean"),
                 pytest.param(
                     function("median"),
    -                marks=pytest.mark.xfail(
    -                    reason="np.median does not work with dataset yet"
    -                ),
    +                marks=pytest.mark.xfail(reason="median does not work with dataset yet"),
                 ),
                 function("sum"),
                 pytest.param(
                     function("prod"),
    -                marks=pytest.mark.xfail(reason="not implemented by pint"),
    +                marks=pytest.mark.xfail(reason="prod does not work with dataset yet"),
                 ),
                 function("std"),
                 function("var"),
                 function("cumsum"),
    -            pytest.param(
    -                function("cumprod"),
    -                marks=pytest.mark.xfail(reason="fails within xarray"),
    -            ),
    -            pytest.param(
    -                method("all"), marks=pytest.mark.xfail(reason="not implemented by pint")
    -            ),
    -            pytest.param(
    -                method("any"), marks=pytest.mark.xfail(reason="not implemented by pint")
    -            ),
    -            method("argmax"),
    -            method("argmin"),
    +            function("cumprod"),
    +            method("all"),
    +            method("any"),
    +            method("argmax", dim="x"),
    +            method("argmin", dim="x"),
                 method("max"),
                 method("min"),
                 method("mean"),
    @@ -3771,68 +3874,64 @@ def test_repr(self, func, variant, dtype):
                 method("sum"),
                 pytest.param(
                     method("prod"),
    -                marks=pytest.mark.xfail(reason="not implemented by pint"),
    +                marks=pytest.mark.xfail(reason="prod does not work with dataset yet"),
                 ),
                 method("std"),
                 method("var"),
                 method("cumsum"),
    -            pytest.param(
    -                method("cumprod"), marks=pytest.mark.xfail(reason="fails within xarray")
    -            ),
    +            method("cumprod"),
             ),
             ids=repr,
         )
         def test_aggregation(self, func, dtype):
    -        unit_a = (
    -            unit_registry.Pa if func.name != "cumprod" else unit_registry.dimensionless
    -        )
    -        unit_b = (
    -            unit_registry.kg / unit_registry.m ** 3
    +        unit_a, unit_b = (
    +            (unit_registry.Pa, unit_registry.degK)
                 if func.name != "cumprod"
    -            else unit_registry.dimensionless
    -        )
    -        a = xr.DataArray(data=np.linspace(0, 1, 10).astype(dtype) * unit_a, dims="x")
    -        b = xr.DataArray(data=np.linspace(-1, 0, 10).astype(dtype) * unit_b, dims="x")
    -        x = xr.DataArray(data=np.arange(10).astype(dtype) * unit_registry.m, dims="x")
    -        y = xr.DataArray(
    -            data=np.arange(10, 20).astype(dtype) * unit_registry.s, dims="x"
    +            else (unit_registry.dimensionless, unit_registry.dimensionless)
             )
     
    -        ds = xr.Dataset(data_vars={"a": a, "b": b}, coords={"x": x, "y": y})
    +        a = np.linspace(0, 1, 10).astype(dtype) * unit_a
    +        b = np.linspace(-1, 0, 10).astype(dtype) * unit_b
    +
    +        ds = xr.Dataset({"a": ("x", a), "b": ("x", b)})
    +
    +        if "dim" in func.kwargs:
    +            numpy_kwargs = func.kwargs.copy()
    +            dim = numpy_kwargs.pop("dim")
    +
    +            axis_a = ds.a.get_axis_num(dim)
    +            axis_b = ds.b.get_axis_num(dim)
    +
    +            numpy_kwargs_a = numpy_kwargs.copy()
    +            numpy_kwargs_a["axis"] = axis_a
    +            numpy_kwargs_b = numpy_kwargs.copy()
    +            numpy_kwargs_b["axis"] = axis_b
    +        else:
    +            numpy_kwargs_a = {}
    +            numpy_kwargs_b = {}
    +
    +        units_a = array_extract_units(func(a, **numpy_kwargs_a))
    +        units_b = array_extract_units(func(b, **numpy_kwargs_b))
    +        units = {"a": units_a, "b": units_b}
     
             actual = func(ds)
    -        expected = attach_units(
    -            func(strip_units(ds)),
    -            {
    -                "a": extract_units(func(a)).get(None),
    -                "b": extract_units(func(b)).get(None),
    -            },
    -        )
    +        expected = attach_units(func(strip_units(ds)), units)
     
    -        assert_equal_with_units(actual, expected)
    +        assert_units_equal(expected, actual)
    +        assert_allclose(expected, actual)
     
         @pytest.mark.parametrize("property", ("imag", "real"))
         def test_numpy_properties(self, property, dtype):
    -        ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(
    -                    data=np.linspace(0, 1, 10) * unit_registry.Pa, dims="x"
    -                ),
    -                "b": xr.DataArray(
    -                    data=np.linspace(-1, 0, 15) * unit_registry.Pa, dims="y"
    -                ),
    -            },
    -            coords={
    -                "x": np.arange(10) * unit_registry.m,
    -                "y": np.arange(15) * unit_registry.s,
    -            },
    -        )
    +        a = np.linspace(0, 1, 10) * unit_registry.Pa
    +        b = np.linspace(-1, 0, 15) * unit_registry.degK
    +        ds = xr.Dataset({"a": ("x", a), "b": ("y", b)})
             units = extract_units(ds)
     
             actual = getattr(ds, property)
             expected = attach_units(getattr(strip_units(ds), property), units)
     
    -        assert_equal_with_units(actual, expected)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
    @@ -3846,31 +3945,19 @@ def test_numpy_properties(self, property, dtype):
             ids=repr,
         )
         def test_numpy_methods(self, func, dtype):
    -        ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(
    -                    data=np.linspace(1, -1, 10) * unit_registry.Pa, dims="x"
    -                ),
    -                "b": xr.DataArray(
    -                    data=np.linspace(-1, 1, 15) * unit_registry.Pa, dims="y"
    -                ),
    -            },
    -            coords={
    -                "x": np.arange(10) * unit_registry.m,
    -                "y": np.arange(15) * unit_registry.s,
    -            },
    -        )
    -        units = {
    -            "a": array_extract_units(func(ds.a)),
    -            "b": array_extract_units(func(ds.b)),
    -            "x": unit_registry.m,
    -            "y": unit_registry.s,
    -        }
    +        a = np.linspace(1, -1, 10) * unit_registry.Pa
    +        b = np.linspace(-1, 1, 15) * unit_registry.degK
    +        ds = xr.Dataset({"a": ("x", a), "b": ("y", b)})
    +
    +        units_a = array_extract_units(func(a))
    +        units_b = array_extract_units(func(b))
    +        units = {"a": units_a, "b": units_b}
     
             actual = func(ds)
             expected = attach_units(func(strip_units(ds)), units)
     
    -        assert_equal_with_units(actual, expected)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.parametrize("func", (method("clip", min=3, max=8),), ids=repr)
         @pytest.mark.parametrize(
    @@ -3887,21 +3974,13 @@ def test_numpy_methods(self, func, dtype):
         )
         def test_numpy_methods_with_args(self, func, unit, error, dtype):
             data_unit = unit_registry.m
    -        ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=np.arange(10) * data_unit, dims="x"),
    -                "b": xr.DataArray(data=np.arange(15) * data_unit, dims="y"),
    -            },
    -            coords={
    -                "x": np.arange(10) * unit_registry.m,
    -                "y": np.arange(15) * unit_registry.s,
    -            },
    -        )
    +        a = np.linspace(0, 10, 15) * unit_registry.m
    +        b = np.linspace(-2, 12, 20) * unit_registry.m
    +        ds = xr.Dataset({"a": ("x", a), "b": ("y", b)})
             units = extract_units(ds)
     
             kwargs = {
    -            key: (value * unit if isinstance(value, (int, float)) else value)
    -            for key, value in func.kwargs.items()
    +            key: array_attach_units(value, unit) for key, value in func.kwargs.items()
             }
     
             if error is not None:
    @@ -3918,7 +3997,8 @@ def test_numpy_methods_with_args(self, func, unit, error, dtype):
             actual = func(ds, **kwargs)
             expected = attach_units(func(strip_units(ds), **stripped_kwargs), units)
     
    -        assert_equal_with_units(actual, expected)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.parametrize(
             "func", (method("isnull"), method("notnull"), method("count")), ids=repr
    @@ -3948,22 +4028,13 @@ def test_missing_value_detection(self, func, dtype):
                 * unit_registry.Pa
             )
     
    -        x = np.arange(array1.shape[0]) * unit_registry.m
    -        y = np.arange(array1.shape[1]) * unit_registry.m
    -        z = np.arange(array2.shape[0]) * unit_registry.m
    -
    -        ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=array1, dims=("x", "y")),
    -                "b": xr.DataArray(data=array2, dims=("z", "x")),
    -            },
    -            coords={"x": x, "y": y, "z": z},
    -        )
    +        ds = xr.Dataset({"a": (("x", "y"), array1), "b": (("z", "x"), array2)})
     
             expected = func(strip_units(ds))
             actual = func(ds)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.xfail(reason="ffill and bfill lose the unit")
         @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr)
    @@ -3977,23 +4048,14 @@ def test_missing_value_filling(self, func, dtype):
                 * unit_registry.Pa
             )
     
    -        x = np.arange(len(array1))
    -
    -        ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=array1, dims="x"),
    -                "b": xr.DataArray(data=array2, dims="x"),
    -            },
    -            coords={"x": x},
    -        )
    +        ds = xr.Dataset({"a": ("x", array1), "b": ("y", array2)})
    +        units = extract_units(ds)
     
    -        expected = attach_units(
    -            func(strip_units(ds), dim="x"),
    -            {"a": unit_registry.degK, "b": unit_registry.Pa},
    -        )
    +        expected = attach_units(func(strip_units(ds), dim="x"), units)
             actual = func(ds, dim="x")
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.parametrize(
             "unit,error",
    @@ -4003,14 +4065,7 @@ def test_missing_value_filling(self, func, dtype):
                     unit_registry.dimensionless, DimensionalityError, id="dimensionless"
                 ),
                 pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
    -            pytest.param(
    -                unit_registry.cm,
    -                None,
    -                id="compatible_unit",
    -                marks=pytest.mark.xfail(
    -                    reason="where converts the array, not the fill value"
    -                ),
    -            ),
    +            pytest.param(unit_registry.cm, None, id="compatible_unit",),
                 pytest.param(unit_registry.m, None, id="identical_unit"),
             ),
         )
    @@ -4031,30 +4086,26 @@ def test_fillna(self, fill_value, unit, error, dtype):
                 np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype)
                 * unit_registry.m
             )
    -        ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=array1, dims="x"),
    -                "b": xr.DataArray(data=array2, dims="x"),
    -            }
    -        )
    +        ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)})
    +        value = fill_value * unit
    +        units = extract_units(ds)
     
             if error is not None:
                 with pytest.raises(error):
    -                ds.fillna(value=fill_value * unit)
    +                ds.fillna(value=value)
     
                 return
     
    -        actual = ds.fillna(value=fill_value * unit)
    +        actual = ds.fillna(value=value)
             expected = attach_units(
                 strip_units(ds).fillna(
    -                value=strip_units(
    -                    convert_units(fill_value * unit, {None: unit_registry.m})
    -                )
    +                value=strip_units(convert_units(value, {None: unit_registry.m}))
                 ),
    -            {"a": unit_registry.m, "b": unit_registry.m},
    +            units,
             )
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         def test_dropna(self, dtype):
             array1 = (
    @@ -4065,22 +4116,14 @@ def test_dropna(self, dtype):
                 np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype)
                 * unit_registry.Pa
             )
    -        x = np.arange(len(array1))
    -        ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=array1, dims="x"),
    -                "b": xr.DataArray(data=array2, dims="x"),
    -            },
    -            coords={"x": x},
    -        )
    +        ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)})
    +        units = extract_units(ds)
     
    -        expected = attach_units(
    -            strip_units(ds).dropna(dim="x"),
    -            {"a": unit_registry.degK, "b": unit_registry.Pa},
    -        )
    +        expected = attach_units(strip_units(ds).dropna(dim="x"), units)
             actual = ds.dropna(dim="x")
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.parametrize(
             "unit",
    @@ -4101,34 +4144,28 @@ def test_isin(self, unit, dtype):
                 np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype)
                 * unit_registry.m
             )
    -        x = np.arange(len(array1))
    -        ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=array1, dims="x"),
    -                "b": xr.DataArray(data=array2, dims="x"),
    -            },
    -            coords={"x": x},
    -        )
    +        ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)})
     
             raw_values = np.array([1.4, np.nan, 2.3]).astype(dtype)
             values = raw_values * unit
     
    -        if (
    -            isinstance(values, unit_registry.Quantity)
    -            and values.check(unit_registry.m)
    -            and unit != unit_registry.m
    -        ):
    -            raw_values = values.to(unit_registry.m).magnitude
    +        converted_values = (
    +            convert_units(values, {None: unit_registry.m})
    +            if is_compatible(unit, unit_registry.m)
    +            else values
    +        )
     
    -        expected = strip_units(ds).isin(raw_values)
    -        if not isinstance(values, unit_registry.Quantity) or not values.check(
    -            unit_registry.m
    -        ):
    +        expected = strip_units(ds).isin(strip_units(converted_values))
    +        # TODO: use `unit_registry.is_compatible_with(unit, unit_registry.m)` instead.
    +        # Needs `pint>=0.12.1`, though, so we probably should wait until that is released.
    +        if not is_compatible(unit, unit_registry.m):
                 expected.a[:] = False
                 expected.b[:] = False
    +
             actual = ds.isin(values)
     
    -        assert_equal_with_units(actual, expected)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.parametrize(
             "variant", ("masking", "replacing_scalar", "replacing_array", "dropping")
    @@ -4150,13 +4187,8 @@ def test_where(self, variant, unit, error, dtype):
             array1 = np.linspace(0, 1, 10).astype(dtype) * original_unit
             array2 = np.linspace(-1, 0, 10).astype(dtype) * original_unit
     
    -        ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=array1, dims="x"),
    -                "b": xr.DataArray(data=array2, dims="x"),
    -            },
    -            coords={"x": np.arange(len(array1))},
    -        )
    +        ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)})
    +        units = extract_units(ds)
     
             condition = ds < 0.5 * original_unit
             other = np.linspace(-2, -1, 10).astype(dtype) * unit
    @@ -4178,15 +4210,13 @@ def test_where(self, variant, unit, error, dtype):
                 for key, value in kwargs.items()
             }
     
    -        expected = attach_units(
    -            strip_units(ds).where(**kwargs_without_units),
    -            {"a": original_unit, "b": original_unit},
    -        )
    +        expected = attach_units(strip_units(ds).where(**kwargs_without_units), units,)
             actual = ds.where(**kwargs)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
    -    @pytest.mark.xfail(reason="interpolate strips units")
    +    @pytest.mark.xfail(reason="interpolate_na uses numpy.vectorize")
         def test_interpolate_na(self, dtype):
             array1 = (
                 np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype)
    @@ -4196,24 +4226,15 @@ def test_interpolate_na(self, dtype):
                 np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype)
                 * unit_registry.Pa
             )
    -        x = np.arange(len(array1))
    -        ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=array1, dims="x"),
    -                "b": xr.DataArray(data=array2, dims="x"),
    -            },
    -            coords={"x": x},
    -        )
    +        ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)})
    +        units = extract_units(ds)
     
    -        expected = attach_units(
    -            strip_units(ds).interpolate_na(dim="x"),
    -            {"a": unit_registry.degK, "b": unit_registry.Pa},
    -        )
    +        expected = attach_units(strip_units(ds).interpolate_na(dim="x"), units,)
             actual = ds.interpolate_na(dim="x")
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
    -    @pytest.mark.xfail(reason="wrong argument order for `where`")
         @pytest.mark.parametrize(
             "unit,error",
             (
    @@ -4226,31 +4247,40 @@ def test_interpolate_na(self, dtype):
                 pytest.param(unit_registry.m, None, id="same_unit"),
             ),
         )
    -    def test_combine_first(self, unit, error, dtype):
    +    @pytest.mark.parametrize(
    +        "variant",
    +        (
    +            "data",
    +            pytest.param(
    +                "dims", marks=pytest.mark.xfail(reason="indexes don't support units"),
    +            ),
    +        ),
    +    )
    +    def test_combine_first(self, variant, unit, error, dtype):
    +        variants = {
    +            "data": (unit_registry.m, unit, 1, 1),
    +            "dims": (1, 1, unit_registry.m, unit),
    +        }
    +        data_unit, other_data_unit, dims_unit, other_dims_unit = variants.get(variant)
    +
             array1 = (
    -            np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype)
    -            * unit_registry.m
    +            np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) * data_unit
             )
             array2 = (
    -            np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype)
    -            * unit_registry.m
    +            np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) * data_unit
             )
    -        x = np.arange(len(array1))
    +        x = np.arange(len(array1)) * dims_unit
             ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=array1, dims="x"),
    -                "b": xr.DataArray(data=array2, dims="x"),
    -            },
    -            coords={"x": x},
    +            data_vars={"a": ("x", array1), "b": ("x", array2)}, coords={"x": x},
             )
    -        other_array1 = np.ones_like(array1) * unit
    -        other_array2 = -1 * np.ones_like(array2) * unit
    +        units = extract_units(ds)
    +
    +        other_array1 = np.ones_like(array1) * other_data_unit
    +        other_array2 = np.full_like(array2, fill_value=-1) * other_data_unit
    +        other_x = (np.arange(array1.shape[0]) + 5) * other_dims_unit
             other = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=other_array1, dims="x"),
    -                "b": xr.DataArray(data=other_array2, dims="x"),
    -            },
    -            coords={"x": np.arange(array1.shape[0])},
    +            data_vars={"a": ("x", other_array1), "b": ("x", other_array2)},
    +            coords={"x": other_x},
             )
     
             if error is not None:
    @@ -4260,16 +4290,13 @@ def test_combine_first(self, unit, error, dtype):
                 return
     
             expected = attach_units(
    -            strip_units(ds).combine_first(
    -                strip_units(
    -                    convert_units(other, {"a": unit_registry.m, "b": unit_registry.m})
    -                )
    -            ),
    -            {"a": unit_registry.m, "b": unit_registry.m},
    +            strip_units(ds).combine_first(strip_units(convert_units(other, units))),
    +            units,
             )
             actual = ds.combine_first(other)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.parametrize(
             "unit",
    @@ -4282,7 +4309,7 @@ def test_combine_first(self, unit, error, dtype):
             ),
         )
         @pytest.mark.parametrize(
    -        "variation",
    +        "variant",
             (
                 "data",
                 pytest.param(
    @@ -4291,50 +4318,67 @@ def test_combine_first(self, unit, error, dtype):
                 "coords",
             ),
         )
    -    @pytest.mark.parametrize("func", (method("equals"), method("identical")), ids=repr)
    -    def test_comparisons(self, func, variation, unit, dtype):
    -        def is_compatible(a, b):
    -            a = a if a is not None else 1
    -            b = b if b is not None else 1
    -            quantity = np.arange(5) * a
    -
    -            return a == b or quantity.check(b)
    -
    +    @pytest.mark.parametrize(
    +        "func",
    +        (
    +            method("equals"),
    +            pytest.param(
    +                method("identical"),
    +                marks=pytest.mark.skip("behaviour of identical is unclear"),
    +            ),
    +        ),
    +        ids=repr,
    +    )
    +    def test_comparisons(self, func, variant, unit, dtype):
             array1 = np.linspace(0, 5, 10).astype(dtype)
             array2 = np.linspace(-5, 0, 10).astype(dtype)
     
             coord = np.arange(len(array1)).astype(dtype)
     
    -        original_unit = unit_registry.m
    -        quantity1 = array1 * original_unit
    -        quantity2 = array2 * original_unit
    -        x = coord * original_unit
    -        y = coord * original_unit
    +        variants = {
    +            "data": (unit_registry.m, 1, 1),
    +            "dims": (1, unit_registry.m, 1),
    +            "coords": (1, 1, unit_registry.m),
    +        }
    +        data_unit, dim_unit, coord_unit = variants.get(variant)
     
    -        units = {"data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit)}
    -        data_unit, dim_unit, coord_unit = units.get(variation)
    +        a = array1 * data_unit
    +        b = array2 * data_unit
    +        x = coord * dim_unit
    +        y = coord * coord_unit
     
             ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=quantity1, dims="x"),
    -                "b": xr.DataArray(data=quantity2, dims="x"),
    -            },
    -            coords={"x": x, "y": ("x", y)},
    +            data_vars={"a": ("x", a), "b": ("x", b)}, coords={"x": x, "y": ("x", y)},
             )
    +        units = extract_units(ds)
    +
    +        other_variants = {
    +            "data": (unit, 1, 1),
    +            "dims": (1, unit, 1),
    +            "coords": (1, 1, unit),
    +        }
    +        other_data_unit, other_dim_unit, other_coord_unit = other_variants.get(variant)
     
             other_units = {
    -            "a": data_unit if quantity1.check(data_unit) else None,
    -            "b": data_unit if quantity2.check(data_unit) else None,
    -            "x": dim_unit if x.check(dim_unit) else None,
    -            "y": coord_unit if y.check(coord_unit) else None,
    +            "a": other_data_unit,
    +            "b": other_data_unit,
    +            "x": other_dim_unit,
    +            "y": other_coord_unit,
             }
    -        other = attach_units(strip_units(convert_units(ds, other_units)), other_units)
     
    -        units = extract_units(ds)
    +        to_convert = {
    +            key: unit if is_compatible(unit, reference) else None
    +            for key, (unit, reference) in zip_mappings(units, other_units)
    +        }
    +        # convert units where possible, then attach all units to the converted dataset
    +        other = attach_units(strip_units(convert_units(ds, to_convert)), other_units)
             other_units = extract_units(other)
     
    +        # make sure all units are compatible and only then try to
    +        # convert and compare values
             equal_ds = all(
    -            is_compatible(units[name], other_units[name]) for name in units.keys()
    +            is_compatible(unit, other_unit)
    +            for _, (unit, other_unit) in zip_mappings(units, other_units)
             ) and (strip_units(ds).equals(strip_units(convert_units(other, units))))
             equal_units = units == other_units
             expected = equal_ds and (func.name != "identical" or equal_units)
    @@ -4343,6 +4387,9 @@ def is_compatible(a, b):
     
             assert expected == actual
     
    +    # TODO: eventually use another decorator / wrapper function that
    +    # applies a filter to the parametrize combinations:
    +    # we only need a single test for data
         @pytest.mark.parametrize(
             "unit",
             (
    @@ -4353,14 +4400,29 @@ def is_compatible(a, b):
                 pytest.param(unit_registry.m, id="identical_unit"),
             ),
         )
    -    def test_broadcast_like(self, unit, dtype):
    -        array1 = np.linspace(1, 2, 2 * 1).reshape(2, 1).astype(dtype) * unit_registry.Pa
    -        array2 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * unit_registry.Pa
    +    @pytest.mark.parametrize(
    +        "variant",
    +        (
    +            "data",
    +            pytest.param(
    +                "dims", marks=pytest.mark.xfail(reason="indexes don't support units"),
    +            ),
    +        ),
    +    )
    +    def test_broadcast_like(self, variant, unit, dtype):
    +        variants = {
    +            "data": ((unit_registry.m, unit), (1, 1)),
    +            "dims": ((1, 1), (unit_registry.m, unit)),
    +        }
    +        (data_unit1, data_unit2), (dim_unit1, dim_unit2) = variants.get(variant)
     
    -        x1 = np.arange(2) * unit_registry.m
    -        x2 = np.arange(2) * unit
    -        y1 = np.array([0]) * unit_registry.m
    -        y2 = np.arange(3) * unit
    +        array1 = np.linspace(1, 2, 2 * 1).reshape(2, 1).astype(dtype) * data_unit1
    +        array2 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * data_unit2
    +
    +        x1 = np.arange(2) * dim_unit1
    +        x2 = np.arange(2) * dim_unit2
    +        y1 = np.array([0]) * dim_unit1
    +        y2 = np.arange(3) * dim_unit2
     
             ds1 = xr.Dataset(
                 data_vars={"a": (("x", "y"), array1)}, coords={"x": x1, "y": y1}
    @@ -4374,7 +4436,8 @@ def test_broadcast_like(self, unit, dtype):
             )
             actual = ds1.broadcast_like(ds2)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.parametrize(
             "unit",
    @@ -4387,32 +4450,25 @@ def test_broadcast_like(self, unit, dtype):
             ),
         )
         def test_broadcast_equals(self, unit, dtype):
    +        # TODO: does this use indexes?
             left_array1 = np.ones(shape=(2, 3), dtype=dtype) * unit_registry.m
             left_array2 = np.zeros(shape=(3, 6), dtype=dtype) * unit_registry.m
     
             right_array1 = np.ones(shape=(2,)) * unit
    -        right_array2 = np.ones(shape=(3,)) * unit
    +        right_array2 = np.zeros(shape=(3,)) * unit
     
             left = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=left_array1, dims=("x", "y")),
    -                "b": xr.DataArray(data=left_array2, dims=("y", "z")),
    -            }
    -        )
    -        right = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=right_array1, dims="x"),
    -                "b": xr.DataArray(data=right_array2, dims="y"),
    -            }
    +            {"a": (("x", "y"), left_array1), "b": (("y", "z"), left_array2)},
             )
    +        right = xr.Dataset({"a": ("x", right_array1), "b": ("y", right_array2)})
     
    -        units = {
    -            **extract_units(left),
    -            **({} if left_array1.check(unit) else {"a": None, "b": None}),
    -        }
    -        expected = strip_units(left).broadcast_equals(
    -            strip_units(convert_units(right, units))
    -        ) & left_array1.check(unit)
    +        units = merge_mappings(
    +            extract_units(left),
    +            {} if is_compatible(left_array1, unit) else {"a": None, "b": None},
    +        )
    +        expected = is_compatible(left_array1, unit) and strip_units(
    +            left
    +        ).broadcast_equals(strip_units(convert_units(right, units)))
             actual = left.broadcast_equals(right)
     
             assert expected == actual
    @@ -4422,68 +4478,74 @@ def test_broadcast_equals(self, unit, dtype):
             (method("unstack"), method("reset_index", "v"), method("reorder_levels")),
             ids=repr,
         )
    -    def test_stacking_stacked(self, func, dtype):
    -        array1 = (
    -            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
    -        )
    +    @pytest.mark.parametrize(
    +        "variant",
    +        (
    +            "data",
    +            pytest.param(
    +                "dims", marks=pytest.mark.xfail(reason="indexes don't support units"),
    +            ),
    +        ),
    +    )
    +    def test_stacking_stacked(self, variant, func, dtype):
    +        variants = {
    +            "data": (unit_registry.m, 1),
    +            "dims": (1, unit_registry.m),
    +        }
    +        data_unit, dim_unit = variants.get(variant)
    +
    +        array1 = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit
             array2 = (
                 np.linspace(-10, 0, 5 * 10 * 15).reshape(5, 10, 15).astype(dtype)
    -            * unit_registry.m
    +            * data_unit
             )
     
    -        x = np.arange(array1.shape[0])
    -        y = np.arange(array1.shape[1])
    -        z = np.arange(array2.shape[2])
    +        x = np.arange(array1.shape[0]) * dim_unit
    +        y = np.arange(array1.shape[1]) * dim_unit
    +        z = np.arange(array2.shape[2]) * dim_unit
     
             ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=array1, dims=("x", "y")),
    -                "b": xr.DataArray(data=array2, dims=("x", "y", "z")),
    -            },
    +            data_vars={"a": (("x", "y"), array1), "b": (("x", "y", "z"), array2)},
                 coords={"x": x, "y": y, "z": z},
             )
    +        units = extract_units(ds)
     
             stacked = ds.stack(v=("x", "y"))
     
    -        expected = attach_units(
    -            func(strip_units(stacked)), {"a": unit_registry.m, "b": unit_registry.m}
    -        )
    +        expected = attach_units(func(strip_units(stacked)), units)
             actual = func(stacked)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
    -    @pytest.mark.xfail(reason="does not work with quantities yet")
    +    @pytest.mark.xfail(
    +        reason="stacked dimension's labels have to be hashable, but is a numpy.array"
    +    )
         def test_to_stacked_array(self, dtype):
    -        labels = np.arange(5).astype(dtype) * unit_registry.s
    -        arrays = {name: np.linspace(0, 1, 10) * unit_registry.m for name in labels}
    +        labels = range(5) * unit_registry.s
    +        arrays = {
    +            name: np.linspace(0, 1, 10).astype(dtype) * unit_registry.m
    +            for name in labels
    +        }
     
    -        ds = xr.Dataset(
    -            data_vars={
    -                name: xr.DataArray(data=array, dims="x")
    -                for name, array in arrays.items()
    -            }
    -        )
    +        ds = xr.Dataset({name: ("x", array) for name, array in arrays.items()})
    +        units = {None: unit_registry.m, "y": unit_registry.s}
     
             func = method("to_stacked_array", "z", variable_dim="y", sample_dims=["x"])
     
             actual = func(ds).rename(None)
    -        expected = attach_units(
    -            func(strip_units(ds)).rename(None),
    -            {None: unit_registry.m, "y": unit_registry.s},
    -        )
    +        expected = attach_units(func(strip_units(ds)).rename(None), units,)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
             (
                 method("transpose", "y", "x", "z1", "z2"),
    -            method("stack", a=("x", "y")),
    +            method("stack", u=("x", "y")),
                 method("set_index", x="x2"),
    -            pytest.param(
    -                method("shift", x=2),
    -                marks=pytest.mark.xfail(reason="tries to concatenate nan arrays"),
    -            ),
    +            method("shift", x=2),
                 method("roll", x=2, roll_coords=False),
                 method("sortby", "x2"),
             ),
    @@ -4508,20 +4570,19 @@ def test_stacking_reordering(self, func, dtype):
     
             ds = xr.Dataset(
                 data_vars={
    -                "a": xr.DataArray(data=array1, dims=("x", "y", "z1")),
    -                "b": xr.DataArray(data=array2, dims=("x", "y", "z2")),
    +                "a": (("x", "y", "z1"), array1),
    +                "b": (("x", "y", "z2"), array2),
                 },
                 coords={"x": x, "y": y, "z1": z1, "z2": z2, "x2": ("x", x2)},
             )
    +        units = extract_units(ds)
     
    -        expected = attach_units(
    -            func(strip_units(ds)), {"a": unit_registry.Pa, "b": unit_registry.degK}
    -        )
    +        expected = attach_units(func(strip_units(ds)), units)
             actual = func(ds)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
    -    @pytest.mark.xfail(reason="indexes strip units")
         @pytest.mark.parametrize(
             "indices",
             (
    @@ -4533,22 +4594,14 @@ def test_isel(self, indices, dtype):
             array1 = np.arange(10).astype(dtype) * unit_registry.s
             array2 = np.linspace(0, 1, 10).astype(dtype) * unit_registry.Pa
     
    -        x = np.arange(len(array1)) * unit_registry.m
    -        ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=array1, dims="x"),
    -                "b": xr.DataArray(data=array2, dims="x"),
    -            },
    -            coords={"x": x},
    -        )
    +        ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("x", array2)})
    +        units = extract_units(ds)
     
    -        expected = attach_units(
    -            strip_units(ds).isel(x=indices),
    -            {"a": unit_registry.s, "b": unit_registry.Pa, "x": unit_registry.m},
    -        )
    +        expected = attach_units(strip_units(ds).isel(x=indices), units)
             actual = ds.isel(x=indices)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.xfail(reason="indexes don't support units")
         @pytest.mark.parametrize(
    @@ -4565,7 +4618,7 @@ def test_isel(self, indices, dtype):
                 pytest.param(1, KeyError, id="no_units"),
                 pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"),
                 pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"),
    -            pytest.param(unit_registry.dm, KeyError, id="compatible_unit"),
    +            pytest.param(unit_registry.mm, KeyError, id="compatible_unit"),
                 pytest.param(unit_registry.m, None, id="identical_unit"),
             ),
         )
    @@ -4584,20 +4637,24 @@ def test_sel(self, raw_values, unit, error, dtype):
     
             values = raw_values * unit
     
    -        if error is not None and not (
    -            isinstance(raw_values, (int, float)) and x.check(unit)
    -        ):
    +        # TODO: if we choose dm as compatible unit, single value keys
    +        # can be found. Should we check that?
    +        if error is not None:
                 with pytest.raises(error):
                     ds.sel(x=values)
     
                 return
     
             expected = attach_units(
    -            strip_units(ds).sel(x=strip_units(convert_units(values, {None: x.units}))),
    -            {"a": array1.units, "b": array2.units, "x": x.units},
    +            strip_units(ds).sel(
    +                x=strip_units(convert_units(values, {None: unit_registry.m}))
    +            ),
    +            extract_units(ds),
             )
             actual = ds.sel(x=values)
    -        assert_equal_with_units(expected, actual)
    +
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.xfail(reason="indexes don't support units")
         @pytest.mark.parametrize(
    @@ -4614,7 +4671,7 @@ def test_sel(self, raw_values, unit, error, dtype):
                 pytest.param(1, KeyError, id="no_units"),
                 pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"),
                 pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"),
    -            pytest.param(unit_registry.dm, KeyError, id="compatible_unit"),
    +            pytest.param(unit_registry.mm, KeyError, id="compatible_unit"),
                 pytest.param(unit_registry.m, None, id="identical_unit"),
             ),
         )
    @@ -4633,9 +4690,9 @@ def test_drop_sel(self, raw_values, unit, error, dtype):
     
             values = raw_values * unit
     
    -        if error is not None and not (
    -            isinstance(raw_values, (int, float)) and x.check(unit)
    -        ):
    +        # TODO: if we choose dm as compatible unit, single value keys
    +        # can be found. Should we check that?
    +        if error is not None:
                 with pytest.raises(error):
                     ds.drop_sel(x=values)
     
    @@ -4643,12 +4700,14 @@ def test_drop_sel(self, raw_values, unit, error, dtype):
     
             expected = attach_units(
                 strip_units(ds).drop_sel(
    -                x=strip_units(convert_units(values, {None: x.units}))
    +                x=strip_units(convert_units(values, {None: unit_registry.m}))
                 ),
                 extract_units(ds),
             )
             actual = ds.drop_sel(x=values)
    -        assert_equal_with_units(expected, actual)
    +
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.xfail(reason="indexes don't support units")
         @pytest.mark.parametrize(
    @@ -4665,7 +4724,7 @@ def test_drop_sel(self, raw_values, unit, error, dtype):
                 pytest.param(1, KeyError, id="no_units"),
                 pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"),
                 pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"),
    -            pytest.param(unit_registry.dm, KeyError, id="compatible_unit"),
    +            pytest.param(unit_registry.mm, KeyError, id="compatible_unit"),
                 pytest.param(unit_registry.m, None, id="identical_unit"),
             ),
         )
    @@ -4684,9 +4743,9 @@ def test_loc(self, raw_values, unit, error, dtype):
     
             values = raw_values * unit
     
    -        if error is not None and not (
    -            isinstance(raw_values, (int, float)) and x.check(unit)
    -        ):
    +        # TODO: if we choose dm as compatible unit, single value keys
    +        # can be found. Should we check that?
    +        if error is not None:
                 with pytest.raises(error):
                     ds.loc[{"x": values}]
     
    @@ -4694,12 +4753,14 @@ def test_loc(self, raw_values, unit, error, dtype):
     
             expected = attach_units(
                 strip_units(ds).loc[
    -                {"x": strip_units(convert_units(values, {None: x.units}))}
    +                {"x": strip_units(convert_units(values, {None: unit_registry.m}))}
                 ],
    -            {"a": array1.units, "b": array2.units, "x": x.units},
    +            extract_units(ds),
             )
             actual = ds.loc[{"x": values}]
    -        assert_equal_with_units(expected, actual)
    +
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
    @@ -4710,14 +4771,34 @@ def test_loc(self, raw_values, unit, error, dtype):
             ),
             ids=repr,
         )
    -    def test_head_tail_thin(self, func, dtype):
    -        array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
    -        array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_registry.Pa
    +    @pytest.mark.parametrize(
    +        "variant",
    +        (
    +            "data",
    +            pytest.param(
    +                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +            ),
    +            "coords",
    +        ),
    +    )
    +    def test_head_tail_thin(self, func, variant, dtype):
    +        variants = {
    +            "data": ((unit_registry.degK, unit_registry.Pa), 1, 1),
    +            "dims": ((1, 1), unit_registry.m, 1),
    +            "coords": ((1, 1), 1, unit_registry.m),
    +        }
    +        (unit_a, unit_b), dim_unit, coord_unit = variants.get(variant)
    +
    +        array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_a
    +        array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_b
     
             coords = {
    -            "x": np.arange(10) * unit_registry.m,
    -            "y": np.arange(5) * unit_registry.m,
    -            "z": np.arange(8) * unit_registry.m,
    +            "x": np.arange(10) * dim_unit,
    +            "y": np.arange(5) * dim_unit,
    +            "z": np.arange(8) * dim_unit,
    +            "u": ("x", np.linspace(0, 1, 10) * coord_unit),
    +            "v": ("y", np.linspace(1, 2, 5) * coord_unit),
    +            "w": ("z", np.linspace(-1, 0, 8) * coord_unit),
             }
     
             ds = xr.Dataset(
    @@ -4731,8 +4812,10 @@ def test_head_tail_thin(self, func, dtype):
             expected = attach_units(func(strip_units(ds)), extract_units(ds))
             actual = func(ds)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
    +    @pytest.mark.parametrize("dim", ("x", "y", "z", "t", "all"))
         @pytest.mark.parametrize(
             "shape",
             (
    @@ -4743,13 +4826,9 @@ def test_head_tail_thin(self, func, dtype):
                 pytest.param((1, 10, 1, 20), id="first and last dimension squeezable"),
             ),
         )
    -    def test_squeeze(self, shape, dtype):
    +    def test_squeeze(self, shape, dim, dtype):
             names = "xyzt"
    -        coords = {
    -            name: np.arange(length).astype(dtype)
    -            * (unit_registry.m if name != "t" else unit_registry.s)
    -            for name, length in zip(names, shape)
    -        }
    +        dim_lengths = dict(zip(names, shape))
             array1 = (
                 np.linspace(0, 1, 10 * 20).astype(dtype).reshape(shape) * unit_registry.degK
             )
    @@ -4759,74 +4838,59 @@ def test_squeeze(self, shape, dtype):
     
             ds = xr.Dataset(
                 data_vars={
    -                "a": xr.DataArray(data=array1, dims=tuple(names[: len(shape)])),
    -                "b": xr.DataArray(data=array2, dims=tuple(names[: len(shape)])),
    +                "a": (tuple(names[: len(shape)]), array1),
    +                "b": (tuple(names[: len(shape)]), array2),
                 },
    -            coords=coords,
             )
             units = extract_units(ds)
     
    -        expected = attach_units(strip_units(ds).squeeze(), units)
    +        kwargs = {"dim": dim} if dim != "all" and dim_lengths.get(dim, 0) == 1 else {}
     
    -        actual = ds.squeeze()
    -        assert_equal_with_units(actual, expected)
    +        expected = attach_units(strip_units(ds).squeeze(**kwargs), units)
     
    -        # try squeezing the dimensions separately
    -        names = tuple(dim for dim, coord in coords.items() if len(coord) == 1)
    -        for name in names:
    -            expected = attach_units(strip_units(ds).squeeze(dim=name), units)
    -            actual = ds.squeeze(dim=name)
    -            assert_equal_with_units(actual, expected)
    +        actual = ds.squeeze(**kwargs)
    +
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
    -    @pytest.mark.xfail(reason="ignores units")
    +    @pytest.mark.parametrize("variant", ("data", "coords"))
         @pytest.mark.parametrize(
    -        "unit,error",
    +        "func",
             (
    -            pytest.param(1, DimensionalityError, id="no_unit"),
                 pytest.param(
    -                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
    +                method("interp"), marks=pytest.mark.xfail(reason="uses scipy")
                 ),
    -            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
    -            pytest.param(unit_registry.cm, None, id="compatible_unit"),
    -            pytest.param(unit_registry.m, None, id="identical_unit"),
    +            method("reindex"),
             ),
    +        ids=repr,
         )
    -    def test_interp(self, unit, error):
    -        array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
    -        array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_registry.Pa
    -
    -        coords = {
    -            "x": np.arange(10) * unit_registry.m,
    -            "y": np.arange(5) * unit_registry.m,
    -            "z": np.arange(8) * unit_registry.s,
    +    def test_interp_reindex(self, func, variant, dtype):
    +        variants = {
    +            "data": (unit_registry.m, 1),
    +            "coords": (1, unit_registry.m),
             }
    +        data_unit, coord_unit = variants.get(variant)
     
    -        ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=array1, dims=("x", "y")),
    -                "b": xr.DataArray(data=array2, dims=("x", "z")),
    -            },
    -            coords=coords,
    -        )
    +        array1 = np.linspace(-1, 0, 10).astype(dtype) * data_unit
    +        array2 = np.linspace(0, 1, 10).astype(dtype) * data_unit
     
    -        new_coords = (np.arange(10) + 0.5) * unit
    +        y = np.arange(10) * coord_unit
     
    -        if error is not None:
    -            with pytest.raises(error):
    -                ds.interp(x=new_coords)
    -
    -            return
    +        x = np.arange(10)
    +        new_x = np.arange(8) + 0.5
     
    -        units = extract_units(ds)
    -        expected = attach_units(
    -            strip_units(ds).interp(x=strip_units(convert_units(new_coords, units))),
    -            units,
    +        ds = xr.Dataset(
    +            {"a": ("x", array1), "b": ("x", array2)}, coords={"x": x, "y": ("x", y)}
             )
    -        actual = ds.interp(x=new_coords)
    +        units = extract_units(ds)
    +
    +        expected = attach_units(func(strip_units(ds), x=new_x), units)
    +        actual = func(ds, x=new_x)
     
    -        assert_equal_with_units(actual, expected)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
    -    @pytest.mark.xfail(reason="ignores units")
    +    @pytest.mark.xfail(reason="indexes don't support units")
         @pytest.mark.parametrize(
             "unit,error",
             (
    @@ -4839,106 +4903,67 @@ def test_interp(self, unit, error):
                 pytest.param(unit_registry.m, None, id="identical_unit"),
             ),
         )
    -    def test_interp_like(self, unit, error, dtype):
    -        array1 = (
    -            np.linspace(0, 10, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
    -        )
    -        array2 = (
    -            np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa
    -        )
    +    @pytest.mark.parametrize("func", (method("interp"), method("reindex")), ids=repr)
    +    def test_interp_reindex_indexing(self, func, unit, error, dtype):
    +        array1 = np.linspace(-1, 0, 10).astype(dtype)
    +        array2 = np.linspace(0, 1, 10).astype(dtype)
     
    -        coords = {
    -            "x": np.arange(10) * unit_registry.m,
    -            "y": np.arange(5) * unit_registry.m,
    -            "z": np.arange(8) * unit_registry.m,
    -        }
    -
    -        ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=array1, dims=("x", "y")),
    -                "b": xr.DataArray(data=array2, dims=("x", "z")),
    -            },
    -            coords=coords,
    -        )
    +        x = np.arange(10) * unit_registry.m
    +        new_x = (np.arange(8) + 0.5) * unit
     
    -        other = xr.Dataset(
    -            data_vars={
    -                "c": xr.DataArray(data=np.empty((20, 10)), dims=("x", "y")),
    -                "d": xr.DataArray(data=np.empty((20, 15)), dims=("x", "z")),
    -            },
    -            coords={
    -                "x": (np.arange(20) + 0.3) * unit,
    -                "y": (np.arange(10) - 0.2) * unit,
    -                "z": (np.arange(15) + 0.4) * unit,
    -            },
    -        )
    +        ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}, coords={"x": x})
    +        units = extract_units(ds)
     
             if error is not None:
                 with pytest.raises(error):
    -                ds.interp_like(other)
    +                func(ds, x=new_x)
     
                 return
     
    -        units = extract_units(ds)
    -        expected = attach_units(
    -            strip_units(ds).interp_like(strip_units(convert_units(other, units))), units
    -        )
    -        actual = ds.interp_like(other)
    +        expected = attach_units(func(strip_units(ds), x=new_x), units)
    +        actual = func(ds, x=new_x)
     
    -        assert_equal_with_units(actual, expected)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
    -    @pytest.mark.xfail(reason="indexes don't support units")
    +    @pytest.mark.parametrize("variant", ("data", "coords"))
         @pytest.mark.parametrize(
    -        "unit,error",
    +        "func",
             (
    -            pytest.param(1, DimensionalityError, id="no_unit"),
                 pytest.param(
    -                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
    +                method("interp_like"), marks=pytest.mark.xfail(reason="uses scipy")
                 ),
    -            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
    -            pytest.param(unit_registry.cm, None, id="compatible_unit"),
    -            pytest.param(unit_registry.m, None, id="identical_unit"),
    +            method("reindex_like"),
             ),
    +        ids=repr,
         )
    -    def test_reindex(self, unit, error, dtype):
    -        array1 = (
    -            np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
    -        )
    -        array2 = (
    -            np.linspace(1, 2, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa
    -        )
    -
    -        coords = {
    -            "x": np.arange(10) * unit_registry.m,
    -            "y": np.arange(5) * unit_registry.m,
    -            "z": np.arange(8) * unit_registry.s,
    +    def test_interp_reindex_like(self, func, variant, dtype):
    +        variants = {
    +            "data": (unit_registry.m, 1),
    +            "coords": (1, unit_registry.m),
             }
    +        data_unit, coord_unit = variants.get(variant)
     
    -        ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=array1, dims=("x", "y")),
    -                "b": xr.DataArray(data=array2, dims=("x", "z")),
    -            },
    -            coords=coords,
    -        )
    +        array1 = np.linspace(-1, 0, 10).astype(dtype) * data_unit
    +        array2 = np.linspace(0, 1, 10).astype(dtype) * data_unit
     
    -        new_coords = (np.arange(10) + 0.5) * unit
    +        y = np.arange(10) * coord_unit
     
    -        if error is not None:
    -            with pytest.raises(error):
    -                ds.reindex(x=new_coords)
    -
    -            return
    +        x = np.arange(10)
    +        new_x = np.arange(8) + 0.5
     
    -        expected = attach_units(
    -            strip_units(ds).reindex(
    -                x=strip_units(convert_units(new_coords, {None: coords["x"].units}))
    -            ),
    -            extract_units(ds),
    +        ds = xr.Dataset(
    +            {"a": ("x", array1), "b": ("x", array2)}, coords={"x": x, "y": ("x", y)}
             )
    -        actual = ds.reindex(x=new_coords)
    +        units = extract_units(ds)
    +
    +        other = xr.Dataset({"a": ("x", np.empty_like(new_x))}, coords={"x": new_x})
    +
    +        expected = attach_units(func(strip_units(ds), other), units)
    +        actual = func(ds, other)
     
    -        assert_equal_with_units(actual, expected)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.xfail(reason="indexes don't support units")
         @pytest.mark.parametrize(
    @@ -4953,54 +4978,32 @@ def test_reindex(self, unit, error, dtype):
                 pytest.param(unit_registry.m, None, id="identical_unit"),
             ),
         )
    -    def test_reindex_like(self, unit, error, dtype):
    -        array1 = (
    -            np.linspace(0, 10, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
    -        )
    -        array2 = (
    -            np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa
    -        )
    +    @pytest.mark.parametrize(
    +        "func", (method("interp_like"), method("reindex_like")), ids=repr
    +    )
    +    def test_interp_reindex_like_indexing(self, func, unit, error, dtype):
    +        array1 = np.linspace(-1, 0, 10).astype(dtype)
    +        array2 = np.linspace(0, 1, 10).astype(dtype)
     
    -        coords = {
    -            "x": np.arange(10) * unit_registry.m,
    -            "y": np.arange(5) * unit_registry.m,
    -            "z": np.arange(8) * unit_registry.m,
    -        }
    +        x = np.arange(10) * unit_registry.m
    +        new_x = (np.arange(8) + 0.5) * unit
     
    -        ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=array1, dims=("x", "y")),
    -                "b": xr.DataArray(data=array2, dims=("x", "z")),
    -            },
    -            coords=coords,
    -        )
    +        ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}, coords={"x": x})
    +        units = extract_units(ds)
     
    -        other = xr.Dataset(
    -            data_vars={
    -                "c": xr.DataArray(data=np.empty((20, 10)), dims=("x", "y")),
    -                "d": xr.DataArray(data=np.empty((20, 15)), dims=("x", "z")),
    -            },
    -            coords={
    -                "x": (np.arange(20) + 0.3) * unit,
    -                "y": (np.arange(10) - 0.2) * unit,
    -                "z": (np.arange(15) + 0.4) * unit,
    -            },
    -        )
    +        other = xr.Dataset({"a": ("x", np.empty_like(new_x))}, coords={"x": new_x})
     
             if error is not None:
                 with pytest.raises(error):
    -                ds.reindex_like(other)
    +                func(ds, other)
     
                 return
     
    -        units = extract_units(ds)
    -        expected = attach_units(
    -            strip_units(ds).reindex_like(strip_units(convert_units(other, units))),
    -            units,
    -        )
    -        actual = ds.reindex_like(other)
    +        expected = attach_units(func(strip_units(ds), other), units)
    +        actual = func(ds, other)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
    @@ -5010,30 +5013,46 @@ def test_reindex_like(self, unit, error, dtype):
                 method("integrate", coord="x"),
                 pytest.param(
                     method("quantile", q=[0.25, 0.75]),
    -                marks=pytest.mark.xfail(reason="nanquantile not implemented"),
    +                marks=pytest.mark.xfail(
    +                    LooseVersion(pint.__version__) <= "0.12",
    +                    reason="nanquantile not implemented yet",
    +                ),
                 ),
                 method("reduce", func=np.sum, dim="x"),
                 method("map", np.fabs),
             ),
             ids=repr,
         )
    -    def test_computation(self, func, dtype):
    -        array1 = (
    -            np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
    -        )
    -        array2 = (
    -            np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa
    -        )
    -        x = np.arange(10) * unit_registry.m
    -        y = np.arange(5) * unit_registry.m
    -        z = np.arange(8) * unit_registry.m
    +    @pytest.mark.parametrize(
    +        "variant",
    +        (
    +            "data",
    +            pytest.param(
    +                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +            ),
    +            "coords",
    +        ),
    +    )
    +    def test_computation(self, func, variant, dtype):
    +        variants = {
    +            "data": ((unit_registry.degK, unit_registry.Pa), 1, 1),
    +            "dims": ((1, 1), unit_registry.m, 1),
    +            "coords": ((1, 1), 1, unit_registry.m),
    +        }
    +        (unit1, unit2), dim_unit, coord_unit = variants.get(variant)
    +
    +        array1 = np.linspace(-5, 5, 4 * 5).reshape(4, 5).astype(dtype) * unit1
    +        array2 = np.linspace(10, 20, 4 * 3).reshape(4, 3).astype(dtype) * unit2
    +        x = np.arange(4) * dim_unit
    +        y = np.arange(5) * dim_unit
    +        z = np.arange(3) * dim_unit
     
             ds = xr.Dataset(
                 data_vars={
                     "a": xr.DataArray(data=array1, dims=("x", "y")),
                     "b": xr.DataArray(data=array2, dims=("x", "z")),
                 },
    -            coords={"x": x, "y": y, "z": z},
    +            coords={"x": x, "y": y, "z": z, "y2": ("y", np.arange(5) * coord_unit)},
             )
     
             units = extract_units(ds)
    @@ -5041,69 +5060,105 @@ def test_computation(self, func, dtype):
             expected = attach_units(func(strip_units(ds)), units)
             actual = func(ds)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
             (
                 method("groupby", "x"),
    -            method("groupby_bins", "x", bins=4),
    +            pytest.param(
    +                method("groupby_bins", "x", bins=2),
    +                marks=pytest.mark.xfail(
    +                    LooseVersion(pint.__version__) <= "0.12",
    +                    reason="needs assert_allclose but that does not work with pint",
    +                ),
    +            ),
                 method("coarsen", x=2),
                 pytest.param(
                     method("rolling", x=3), marks=pytest.mark.xfail(reason="strips units")
                 ),
                 pytest.param(
                     method("rolling_exp", x=3),
    -                marks=pytest.mark.xfail(reason="uses numbagg which strips units"),
    +                marks=pytest.mark.xfail(
    +                    reason="numbagg functions are not supported by pint"
    +                ),
                 ),
             ),
             ids=repr,
         )
    -    def test_computation_objects(self, func, dtype):
    -        array1 = (
    -            np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
    -        )
    -        array2 = (
    -            np.linspace(10, 20, 10 * 5 * 8).reshape(10, 5, 8).astype(dtype)
    -            * unit_registry.Pa
    -        )
    -        x = np.arange(10) * unit_registry.m
    -        y = np.arange(5) * unit_registry.m
    -        z = np.arange(8) * unit_registry.m
    +    @pytest.mark.parametrize(
    +        "variant",
    +        (
    +            "data",
    +            pytest.param(
    +                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +            ),
    +            "coords",
    +        ),
    +    )
    +    def test_computation_objects(self, func, variant, dtype):
    +        variants = {
    +            "data": ((unit_registry.degK, unit_registry.Pa), 1, 1),
    +            "dims": ((1, 1), unit_registry.m, 1),
    +            "coords": ((1, 1), 1, unit_registry.m),
    +        }
    +        (unit1, unit2), dim_unit, coord_unit = variants.get(variant)
    +
    +        array1 = np.linspace(-5, 5, 4 * 5).reshape(4, 5).astype(dtype) * unit1
    +        array2 = np.linspace(10, 20, 4 * 3).reshape(4, 3).astype(dtype) * unit2
    +        x = np.arange(4) * dim_unit
    +        y = np.arange(5) * dim_unit
    +        z = np.arange(3) * dim_unit
     
             ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=array1, dims=("x", "y")),
    -                "b": xr.DataArray(data=array2, dims=("x", "y", "z")),
    -            },
    -            coords={"x": x, "y": y, "z": z},
    +            data_vars={"a": (("x", "y"), array1), "b": (("x", "z"), array2)},
    +            coords={"x": x, "y": y, "z": z, "y2": ("y", np.arange(5) * coord_unit)},
             )
             units = extract_units(ds)
     
             args = [] if func.name != "groupby" else ["y"]
    -        reduce_func = method("mean", *args)
    -        expected = attach_units(reduce_func(func(strip_units(ds))), units)
    -        actual = reduce_func(func(ds))
    +        expected = attach_units(func(strip_units(ds)).mean(*args), units)
    +        actual = func(ds).mean(*args)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        # TODO: remove once pint 0.12 has been released
    +        if LooseVersion(pint.__version__) <= "0.12":
    +            assert_equal(expected, actual)
    +        else:
    +            assert_allclose(expected, actual)
    +
    +    @pytest.mark.parametrize(
    +        "variant",
    +        (
    +            "data",
    +            pytest.param(
    +                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +            ),
    +            "coords",
    +        ),
    +    )
    +    def test_resample(self, variant, dtype):
    +        # TODO: move this to test_computation_objects
    +        variants = {
    +            "data": ((unit_registry.degK, unit_registry.Pa), 1, 1),
    +            "dims": ((1, 1), unit_registry.m, 1),
    +            "coords": ((1, 1), 1, unit_registry.m),
    +        }
    +        (unit1, unit2), dim_unit, coord_unit = variants.get(variant)
    +
    +        array1 = np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit1
    +        array2 = np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit2
     
    -    def test_resample(self, dtype):
    -        array1 = (
    -            np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
    -        )
    -        array2 = (
    -            np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa
    -        )
             t = pd.date_range("10-09-2010", periods=array1.shape[0], freq="1y")
    -        y = np.arange(5) * unit_registry.m
    -        z = np.arange(8) * unit_registry.m
    +        y = np.arange(5) * dim_unit
    +        z = np.arange(8) * dim_unit
    +
    +        u = np.linspace(-1, 0, 5) * coord_unit
     
             ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=array1, dims=("time", "y")),
    -                "b": xr.DataArray(data=array2, dims=("time", "z")),
    -            },
    -            coords={"time": t, "y": y, "z": z},
    +            data_vars={"a": (("time", "y"), array1), "b": (("time", "z"), array2)},
    +            coords={"time": t, "y": y, "z": z, "u": ("y", u)},
             )
             units = extract_units(ds)
     
    @@ -5112,43 +5167,59 @@ def test_resample(self, dtype):
             expected = attach_units(func(strip_units(ds)).mean(), units)
             actual = func(ds).mean()
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
             (
                 method("assign", c=lambda ds: 10 * ds.b),
    -            method("assign_coords", v=("x", np.arange(10) * unit_registry.s)),
    +            method("assign_coords", v=("x", np.arange(5) * unit_registry.s)),
                 method("first"),
                 method("last"),
                 pytest.param(
                     method("quantile", q=[0.25, 0.5, 0.75], dim="x"),
    -                marks=pytest.mark.xfail(reason="nanquantile not implemented"),
    +                marks=pytest.mark.xfail(
    +                    LooseVersion(pint.__version__) <= "0.12",
    +                    reason="nanquantile not implemented",
    +                ),
                 ),
             ),
             ids=repr,
         )
    -    def test_grouped_operations(self, func, dtype):
    -        array1 = (
    -            np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
    -        )
    -        array2 = (
    -            np.linspace(10, 20, 10 * 5 * 8).reshape(10, 5, 8).astype(dtype)
    -            * unit_registry.Pa
    -        )
    -        x = np.arange(10) * unit_registry.m
    -        y = np.arange(5) * unit_registry.m
    -        z = np.arange(8) * unit_registry.m
    +    @pytest.mark.parametrize(
    +        "variant",
    +        (
    +            "data",
    +            pytest.param(
    +                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +            ),
    +            "coords",
    +        ),
    +    )
    +    def test_grouped_operations(self, func, variant, dtype):
    +        variants = {
    +            "data": ((unit_registry.degK, unit_registry.Pa), 1, 1),
    +            "dims": ((1, 1), unit_registry.m, 1),
    +            "coords": ((1, 1), 1, unit_registry.m),
    +        }
    +        (unit1, unit2), dim_unit, coord_unit = variants.get(variant)
    +
    +        array1 = np.linspace(-5, 5, 5 * 4).reshape(5, 4).astype(dtype) * unit1
    +        array2 = np.linspace(10, 20, 5 * 4 * 3).reshape(5, 4, 3).astype(dtype) * unit2
    +        x = np.arange(5) * dim_unit
    +        y = np.arange(4) * dim_unit
    +        z = np.arange(3) * dim_unit
    +
    +        u = np.linspace(-1, 0, 4) * coord_unit
     
             ds = xr.Dataset(
    -            data_vars={
    -                "a": xr.DataArray(data=array1, dims=("x", "y")),
    -                "b": xr.DataArray(data=array2, dims=("x", "y", "z")),
    -            },
    -            coords={"x": x, "y": y, "z": z},
    +            data_vars={"a": (("x", "y"), array1), "b": (("x", "y", "z"), array2)},
    +            coords={"x": x, "y": y, "z": z, "u": ("y", u)},
             )
    -        units = extract_units(ds)
    -        units.update({"c": unit_registry.Pa, "v": unit_registry.s})
    +
    +        assigned_units = {"c": unit2, "v": unit_registry.s}
    +        units = merge_mappings(extract_units(ds), assigned_units)
     
             stripped_kwargs = {
                 name: strip_units(value) for name, value in func.kwargs.items()
    @@ -5158,20 +5229,26 @@ def test_grouped_operations(self, func, dtype):
             )
             actual = func(ds.groupby("y"))
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
             (
                 method("pipe", lambda ds: ds * 10),
                 method("assign", d=lambda ds: ds.b * 10),
    -            method("assign_coords", y2=("y", np.arange(5) * unit_registry.mm)),
    +            method("assign_coords", y2=("y", np.arange(4) * unit_registry.mm)),
                 method("assign_attrs", attr1="value"),
                 method("rename", x2="x_mm"),
                 method("rename_vars", c="temperature"),
                 method("rename_dims", x="offset_x"),
    -            method("swap_dims", {"x": "x2"}),
    -            method("expand_dims", v=np.linspace(10, 20, 12) * unit_registry.s, axis=1),
    +            method("swap_dims", {"x": "u"}),
    +            pytest.param(
    +                method(
    +                    "expand_dims", v=np.linspace(10, 20, 12) * unit_registry.s, axis=1
    +                ),
    +                marks=pytest.mark.xfail(reason="indexes don't support units"),
    +            ),
                 method("drop_vars", "x"),
                 method("drop_dims", "z"),
                 method("set_coords", names="c"),
    @@ -5180,40 +5257,55 @@ def test_grouped_operations(self, func, dtype):
             ),
             ids=repr,
         )
    -    def test_content_manipulation(self, func, dtype):
    -        array1 = (
    -            np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype)
    -            * unit_registry.m ** 3
    -        )
    -        array2 = (
    -            np.linspace(10, 20, 10 * 5 * 8).reshape(10, 5, 8).astype(dtype)
    -            * unit_registry.Pa
    -        )
    -        array3 = np.linspace(0, 10, 10).astype(dtype) * unit_registry.degK
    +    @pytest.mark.parametrize(
    +        "variant",
    +        (
    +            "data",
    +            pytest.param(
    +                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
    +            ),
    +            "coords",
    +        ),
    +    )
    +    def test_content_manipulation(self, func, variant, dtype):
    +        variants = {
    +            "data": (
    +                (unit_registry.m ** 3, unit_registry.Pa, unit_registry.degK),
    +                1,
    +                1,
    +            ),
    +            "dims": ((1, 1, 1), unit_registry.m, 1),
    +            "coords": ((1, 1, 1), 1, unit_registry.m),
    +        }
    +        (unit1, unit2, unit3), dim_unit, coord_unit = variants.get(variant)
     
    -        x = np.arange(10) * unit_registry.m
    -        x2 = x.to(unit_registry.mm)
    -        y = np.arange(5) * unit_registry.m
    -        z = np.arange(8) * unit_registry.m
    +        array1 = np.linspace(-5, 5, 5 * 4).reshape(5, 4).astype(dtype) * unit1
    +        array2 = np.linspace(10, 20, 5 * 4 * 3).reshape(5, 4, 3).astype(dtype) * unit2
    +        array3 = np.linspace(0, 10, 5).astype(dtype) * unit3
    +
    +        x = np.arange(5) * dim_unit
    +        y = np.arange(4) * dim_unit
    +        z = np.arange(3) * dim_unit
    +
    +        x2 = np.linspace(-1, 0, 5) * coord_unit
     
             ds = xr.Dataset(
                 data_vars={
    -                "a": xr.DataArray(data=array1, dims=("x", "y")),
    -                "b": xr.DataArray(data=array2, dims=("x", "y", "z")),
    -                "c": xr.DataArray(data=array3, dims="x"),
    +                "a": (("x", "y"), array1),
    +                "b": (("x", "y", "z"), array2),
    +                "c": ("x", array3),
                 },
                 coords={"x": x, "y": y, "z": z, "x2": ("x", x2)},
             )
    -        units = {
    -            **extract_units(ds),
    -            **{
    -                "y2": unit_registry.mm,
    -                "x_mm": unit_registry.mm,
    -                "offset_x": unit_registry.m,
    -                "d": unit_registry.Pa,
    -                "temperature": unit_registry.degK,
    -            },
    +
    +        new_units = {
    +            "y2": unit_registry.mm,
    +            "x_mm": coord_unit,
    +            "offset_x": unit_registry.m,
    +            "d": unit2,
    +            "temperature": unit3,
             }
    +        units = merge_mappings(extract_units(ds), new_units)
     
             stripped_kwargs = {
                 key: strip_units(value) for key, value in func.kwargs.items()
    @@ -5221,7 +5313,8 @@ def test_content_manipulation(self, func, dtype):
             expected = attach_units(func(strip_units(ds), **stripped_kwargs), units)
             actual = func(ds)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
     
         @pytest.mark.parametrize(
             "unit,error",
    @@ -5246,25 +5339,29 @@ def test_content_manipulation(self, func, dtype):
             ),
         )
         def test_merge(self, variant, unit, error, dtype):
    -        original_data_unit = unit_registry.m
    -        original_dim_unit = unit_registry.m
    -        original_coord_unit = unit_registry.m
    +        left_variants = {
    +            "data": (unit_registry.m, 1, 1),
    +            "dims": (1, unit_registry.m, 1),
    +            "coords": (1, 1, unit_registry.m),
    +        }
     
    -        variants = {
    -            "data": (unit, original_dim_unit, original_coord_unit),
    -            "dims": (original_data_unit, unit, original_coord_unit),
    -            "coords": (original_data_unit, original_dim_unit, unit),
    +        left_data_unit, left_dim_unit, left_coord_unit = left_variants.get(variant)
    +
    +        right_variants = {
    +            "data": (unit, 1, 1),
    +            "dims": (1, unit, 1),
    +            "coords": (1, 1, unit),
             }
    -        data_unit, dim_unit, coord_unit = variants.get(variant)
    +        right_data_unit, right_dim_unit, right_coord_unit = right_variants.get(variant)
     
    -        left_array = np.arange(10).astype(dtype) * original_data_unit
    -        right_array = np.arange(-5, 5).astype(dtype) * data_unit
    +        left_array = np.arange(10).astype(dtype) * left_data_unit
    +        right_array = np.arange(-5, 5).astype(dtype) * right_data_unit
     
    -        left_dim = np.arange(10, 20) * original_dim_unit
    -        right_dim = np.arange(5, 15) * dim_unit
    +        left_dim = np.arange(10, 20) * left_dim_unit
    +        right_dim = np.arange(5, 15) * right_dim_unit
     
    -        left_coord = np.arange(-10, 0) * original_coord_unit
    -        right_coord = np.arange(-15, -5) * coord_unit
    +        left_coord = np.arange(-10, 0) * left_coord_unit
    +        right_coord = np.arange(-15, -5) * right_coord_unit
     
             left = xr.Dataset(
                 data_vars={"a": ("x", left_array)},
    @@ -5287,4 +5384,5 @@ def test_merge(self, variant, unit, error, dtype):
             expected = attach_units(strip_units(left).merge(strip_units(converted)), units)
             actual = left.merge(right)
     
    -        assert_equal_with_units(expected, actual)
    +        assert_units_equal(expected, actual)
    +        assert_equal(expected, actual)
    diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
    index 78e3848b8fb..d79d40d67c0 100644
    --- a/xarray/tests/test_variable.py
    +++ b/xarray/tests/test_variable.py
    @@ -1657,7 +1657,7 @@ def test_reduce_funcs(self):
             assert_identical(v.all(dim="x"), Variable([], False))
     
             v = Variable("t", pd.date_range("2000-01-01", periods=3))
    -        assert v.argmax(skipna=True) == 2
    +        assert v.argmax(skipna=True, dim="t") == 2
     
             assert_identical(v.max(), Variable([], pd.Timestamp("2000-01-03")))
     
    @@ -2213,6 +2213,10 @@ def test_full_like(self):
             assert expect.dtype == bool
             assert_identical(expect, full_like(orig, True, dtype=bool))
     
    +        # raise error on non-scalar fill_value
    +        with raises_regex(ValueError, "must be scalar"):
    +            full_like(orig, [1.0, 2.0])
    +
         @requires_dask
         def test_full_like_dask(self):
             orig = Variable(
    diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py
    index 24531215dfb..1bf685cc95d 100644
    --- a/xarray/tests/test_weighted.py
    +++ b/xarray/tests/test_weighted.py
    @@ -59,6 +59,18 @@ def test_weighted_sum_of_weights_nan(weights, expected):
         assert_equal(expected, result)
     
     
    +def test_weighted_sum_of_weights_bool():
    +    # https://github.com/pydata/xarray/issues/4074
    +
    +    da = DataArray([1, 2])
    +    weights = DataArray([True, True])
    +    result = da.weighted(weights).sum_of_weights()
    +
    +    expected = DataArray(2)
    +
    +    assert_equal(expected, result)
    +
    +
     @pytest.mark.parametrize("da", ([1.0, 2], [1, np.nan], [np.nan, np.nan]))
     @pytest.mark.parametrize("factor", [0, 1, 3.14])
     @pytest.mark.parametrize("skipna", (True, False))
    @@ -158,6 +170,17 @@ def test_weighted_mean_nan(weights, expected, skipna):
         assert_equal(expected, result)
     
     
    +def test_weighted_mean_bool():
    +    # https://github.com/pydata/xarray/issues/4074
    +    da = DataArray([1, 1])
    +    weights = DataArray([True, True])
    +    expected = DataArray(1)
    +
    +    result = da.weighted(weights).mean()
    +
    +    assert_equal(expected, result)
    +
    +
     def expected_weighted(da, weights, dim, skipna, operation):
         """
         Generate expected result using ``*`` and ``sum``. This is checked against
    diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py
    index 32051bb6843..96983c83aab 100755
    --- a/xarray/util/print_versions.py
    +++ b/xarray/util/print_versions.py
    @@ -129,7 +129,7 @@ def show_versions(file=sys.stdout):
             ("sphinx", lambda mod: mod.__version__),
         ]
     
    -    deps_blob = list()
    +    deps_blob = []
         for (modname, ver_f) in deps:
             try:
                 if modname in sys.modules: