diff --git a/.deepsource.toml b/.deepsource.toml
new file mode 100644
index 00000000000..e37b41de303
--- /dev/null
+++ b/.deepsource.toml
@@ -0,0 +1,18 @@
+version = 1
+
+test_patterns = [
+    "*/tests/**",
+    "*/test_*.py"
+]
+
+exclude_patterns = [
+    "doc/**",
+    "ci/**"
+]
+
+[[analyzers]]
+name = "python"
+enabled = true
+
+  [analyzers.meta]
+  runtime_version = "3.x.x"
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md
new file mode 100644
index 00000000000..02bc5d0f7b0
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug-report.md
@@ -0,0 +1,39 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+<!-- Please include a self-contained copy-pastable example that generates the issue if possible.
+
+Please be concise with code posted. See guidelines below on how to provide a good bug report:
+
+- Craft Minimal Bug Reports: http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports
+- Minimal Complete Verifiable Examples: https://stackoverflow.com/help/mcve
+
+Bug reports that follow these guidelines are easier to diagnose, and so are often handled much more quickly.
+-->
+
+**What happened**:
+
+**What you expected to happen**:
+
+**Minimal Complete Verifiable Example**:
+
+```python
+# Put your MCVE code here
+```
+
+**Anything else we need to know?**:
+
+**Environment**:
+
+<details><summary>Output of <tt>xr.show_versions()</tt></summary>
+
+<!-- Paste the output here xr.show_versions() here -->
+
+
+</details>
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
deleted file mode 100644
index c712cf27979..00000000000
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ /dev/null
@@ -1,35 +0,0 @@
----
-name: Bug report / Feature request
-about: 'Post a problem or idea'
-title: ''
-labels: ''
-assignees: ''
-
----
-
-<!-- A short summary of the issue, if appropriate -->
-
-
-#### MCVE Code Sample
-<!-- In order for the maintainers to efficiently understand and prioritize issues, we ask you post a "Minimal, Complete and Verifiable Example" (MCVE): http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports -->
-
-```python
-# Your code here
-
-```
-
-#### Expected Output
-
-
-#### Problem Description
-<!-- this should explain why the current behavior is a problem and why the expected output is a better solution -->
-
-
-#### Versions
-
-<details><summary>Output of <tt>xr.show_versions()</tt></summary>
-
-<!-- Paste the output here xr.show_versions() here -->
-
-
-</details>
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 00000000000..3389fbfe071
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,5 @@
+blank_issues_enabled: true
+contact_links:
+  - name: General Question
+    url: https://stackoverflow.com/questions/tagged/python-xarray
+    about: "If you have a question like *How do I append to an xarray.Dataset?* then please ask on Stack Overflow using the #python-xarray tag."
diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md
new file mode 100644
index 00000000000..7021fe490aa
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature-request.md
@@ -0,0 +1,22 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+<!-- Please do a quick search of existing issues to make sure that this has not been asked before. -->
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context about the feature request here.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index a921bddaa23..c9c0b720c35 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -3,4 +3,5 @@
  - [ ] Closes #xxxx
  - [ ] Tests added
  - [ ] Passes `isort -rc . && black . && mypy . && flake8`
- - [ ] Fully documented, including `whats-new.rst` for all changes and `api.rst` for new API
+ - [ ] User visible changes (including notable bug fixes) are documented in `whats-new.rst`
+ - [ ] New functions/methods are listed in `api.rst`
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 26bf4803ef6..447f0007fc2 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -11,12 +11,16 @@ repos:
     rev: stable
     hooks:
       - id: black
+  - repo: https://github.com/keewis/blackdoc
+    rev: stable
+    hooks:
+      - id: blackdoc
   - repo: https://gitlab.com/pycqa/flake8
     rev: 3.7.9
     hooks:
       - id: flake8
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.761  # Must match ci/requirements/*.yml
+    rev: v0.780  # Must match ci/requirements/*.yml
     hooks:
       - id: mypy
   # run this occasionally, ref discussion https://github.com/pydata/xarray/pull/3194
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 00000000000..7a909aefd08
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1 @@
+Xarray's contributor guidelines [can be found in our online documentation](http://xarray.pydata.org/en/stable/contributing.html)
diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md
index 3fdd1d7236d..c890d61d966 100644
--- a/HOW_TO_RELEASE.md
+++ b/HOW_TO_RELEASE.md
@@ -1,4 +1,4 @@
-How to issue an xarray release in 16 easy steps
+# How to issue an xarray release in 17 easy steps
 
 Time required: about an hour.
 
@@ -6,7 +6,16 @@ Time required: about an hour.
       ```
       git pull upstream master
       ```
- 2. Look over whats-new.rst and the docs. Make sure "What's New" is complete
+ 2. Get a list of contributors with:
+    ```
+    git log "$(git tag --sort="v:refname" | sed -n 'x;$p').." --format=%aN | sort -u | perl -pe 's/\n/$1, /'
+    ```
+    or by substituting the _previous_ release in:
+    ```
+    git log v0.X.Y-1.. --format=%aN | sort -u | perl -pe 's/\n/$1, /'
+    ```
+    Add these into `whats-new.rst` somewhere :)
+ 3. Look over whats-new.rst and the docs. Make sure "What's New" is complete
     (check the date!) and consider adding a brief summary note describing the
     release at the top.
     Things to watch out for:
@@ -16,41 +25,41 @@ Time required: about an hour.
       due to a bad merge. Check for these before a release by using git diff,
       e.g., `git diff v0.X.Y whats-new.rst` where 0.X.Y is the previous
       release.
- 3. If you have any doubts, run the full test suite one final time!
+ 4. If you have any doubts, run the full test suite one final time!
       ```
       pytest
       ```
- 4. Check that the ReadTheDocs build is passing.
- 5. On the master branch, commit the release in git:
+ 5. Check that the ReadTheDocs build is passing.
+ 6. On the master branch, commit the release in git:
       ```
       git commit -am 'Release v0.X.Y'
       ```
- 6. Tag the release:
+ 7. Tag the release:
       ```
       git tag -a v0.X.Y -m 'v0.X.Y'
       ```
- 7. Build source and binary wheels for pypi:
+ 8. Build source and binary wheels for pypi:
       ```
       git clean -xdf  # this deletes all uncommited changes!
       python setup.py bdist_wheel sdist
       ```
- 8. Use twine to check the package build:
+ 9. Use twine to check the package build:
       ```
       twine check dist/xarray-0.X.Y*
       ```
- 9. Use twine to register and upload the release on pypi. Be careful, you can't
+10. Use twine to register and upload the release on pypi. Be careful, you can't
     take this back!
       ```
       twine upload dist/xarray-0.X.Y*
       ```
     You will need to be listed as a package owner at
     https://pypi.python.org/pypi/xarray for this to work.
-10. Push your changes to master:
+11. Push your changes to master:
       ```
       git push upstream master
       git push upstream --tags
       ```
-11. Update the stable branch (used by ReadTheDocs) and switch back to master:
+12. Update the stable branch (used by ReadTheDocs) and switch back to master:
      ```
       git checkout stable
       git rebase master
@@ -60,7 +69,7 @@ Time required: about an hour.
     It's OK to force push to 'stable' if necessary. (We also update the stable 
     branch with `git cherrypick` for documentation only fixes that apply the 
     current released version.)
-12. Add a section for the next release (v.X.Y+1) to doc/whats-new.rst:
+13. Add a section for the next release (v.X.Y+1) to doc/whats-new.rst:
      ```
      .. _whats-new.0.X.Y+1:
 
@@ -86,19 +95,19 @@ Time required: about an hour.
      Internal Changes
      ~~~~~~~~~~~~~~~~
      ```
-13. Commit your changes and push to master again:
+14. Commit your changes and push to master again:
       ```
       git commit -am 'New whatsnew section'
       git push upstream master
       ```
     You're done pushing to master!
-14. Issue the release on GitHub. Click on "Draft a new release" at
+15. Issue the release on GitHub. Click on "Draft a new release" at
     https://github.com/pydata/xarray/releases. Type in the version number, but
     don't bother to describe it -- we maintain that on the docs instead.
-15. Update the docs. Login to https://readthedocs.org/projects/xray/versions/
+16. Update the docs. Login to https://readthedocs.org/projects/xray/versions/
     and switch your new release tag (at the bottom) from "Inactive" to "Active".
     It should now build automatically.
-16. Issue the release announcement! For bug fix releases, I usually only email
+17. Issue the release announcement! For bug fix releases, I usually only email
     xarray@googlegroups.com. For major/feature releases, I will email a broader
     list (no more than once every 3-6 months):
       - pydata@googlegroups.com
@@ -109,18 +118,8 @@ Time required: about an hour.
 
     Google search will turn up examples of prior release announcements (look for
     "ANN xarray").
-    You can get a list of contributors with:
-    ```
-    git log "$(git tag --sort="v:refname" | sed -n 'x;$p').." --format="%aN" | sort -u
-    ```
-    or by substituting the _previous_ release in:
-    ```
-    git log v0.X.Y-1.. --format="%aN" | sort -u
-    ```
-    NB: copying this output into a Google Groups form can cause 
-    [issues](https://groups.google.com/forum/#!topic/xarray/hK158wAviPs) with line breaks, so take care
 
-Note on version numbering:
+## Note on version numbering
 
 We follow a rough approximation of semantic version. Only major releases (0.X.0)
 should include breaking changes. Minor releases (0.X.Y) are for bug fixes and
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index ff85501c555..e04c8f74f68 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -108,21 +108,3 @@ jobs:
       python ci/min_deps_check.py ci/requirements/py36-bare-minimum.yml
       python ci/min_deps_check.py ci/requirements/py36-min-all-deps.yml
     displayName: minimum versions policy
-
-- job: Docs
-  pool:
-    vmImage: 'ubuntu-16.04'
-  steps:
-  - template: ci/azure/install.yml
-    parameters:
-      env_file: ci/requirements/doc.yml
-  - bash: |
-      source activate xarray-tests
-      # Replicate the exact environment created by the readthedocs CI
-      conda install --yes --quiet -c pkgs/main mock pillow sphinx sphinx_rtd_theme
-    displayName: Replicate readthedocs CI environment
-  - bash: |
-      source activate xarray-tests
-      cd doc
-      sphinx-build -W --keep-going -j auto -b html -d _build/doctrees . _build/html
-    displayName: Build HTML docs
diff --git a/ci/azure/install.yml b/ci/azure/install.yml
index 60559dd2064..83895eebe01 100644
--- a/ci/azure/install.yml
+++ b/ci/azure/install.yml
@@ -10,16 +10,37 @@ steps:
     conda env create -n xarray-tests --file ${{ parameters.env_file }}
   displayName: Install conda dependencies
 
+# TODO: add sparse back in, once Numba works with the development version of
+# NumPy again: https://github.com/pydata/xarray/issues/4146 
 - bash: |
     source activate xarray-tests
+    conda uninstall -y --force \
+        numpy \
+        scipy \
+        pandas \
+        matplotlib \
+        dask \
+        distributed \
+        zarr \
+        cftime \
+        rasterio \
+        pint \
+        bottleneck \
+        sparse
     python -m pip install \
-        -f https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com \
+        -i https://pypi.anaconda.org/scipy-wheels-nightly/simple \
         --no-deps \
         --pre \
         --upgrade \
-        matplotlib \
         numpy \
-        scipy
+        scipy \
+        pandas
+    python -m pip install \
+        -f https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com \
+        --no-deps \
+        --pre \
+        --upgrade \
+        matplotlib
     python -m pip install \
         --no-deps \
         --upgrade \
@@ -29,8 +50,7 @@ steps:
         git+https://github.com/Unidata/cftime \
         git+https://github.com/mapbox/rasterio \
         git+https://github.com/hgrecco/pint \
-        git+https://github.com/pydata/bottleneck \
-        git+https://github.com/pandas-dev/pandas
+        git+https://github.com/pydata/bottleneck
   condition: eq(variables['UPSTREAM_DEV'], 'true')
   displayName: Install upstream dev dependencies
 
diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml
index 2987303c92a..6caebc46cdf 100644
--- a/ci/requirements/doc.yml
+++ b/ci/requirements/doc.yml
@@ -17,11 +17,10 @@ dependencies:
   - netcdf4>=1.5
   - numba
   - numpy>=1.17
-  - numpydoc
   - pandas>=1.0
   - rasterio>=1.1
   - seaborn
   - setuptools
   - sphinx>=2.3
   - sphinx_rtd_theme>=0.4
-  - zarr>=2.4
\ No newline at end of file
+  - zarr>=2.4
diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml
index 86540197dcc..a72cd000680 100644
--- a/ci/requirements/py36-min-all-deps.yml
+++ b/ci/requirements/py36-min-all-deps.yml
@@ -15,8 +15,8 @@ dependencies:
   - cfgrib=0.9
   - cftime=1.0
   - coveralls
-  - dask=2.2
-  - distributed=2.2
+  - dask=2.5
+  - distributed=2.5
   - flake8
   - h5netcdf=0.7
   - h5py=2.9  # Policy allows for 2.10, but it's a conflict-fest
diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml
index a5eded49cd4..cd2b1a18c77 100644
--- a/ci/requirements/py36-min-nep18.yml
+++ b/ci/requirements/py36-min-nep18.yml
@@ -6,12 +6,11 @@ dependencies:
   # require drastically newer packages than everything else
   - python=3.6
   - coveralls
-  - dask=2.4
-  - distributed=2.4
+  - dask=2.5
+  - distributed=2.5
   - msgpack-python=0.6  # remove once distributed is bumped. distributed GH3491
   - numpy=1.17
   - pandas=0.25
-  - pint=0.11
   - pip
   - pytest
   - pytest-cov
@@ -19,3 +18,5 @@ dependencies:
   - scipy=1.2
   - setuptools=41.2
   - sparse=0.8
+  - pip:
+      - pint==0.13
diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml
index a500173f277..aa2baf9dcce 100644
--- a/ci/requirements/py36.yml
+++ b/ci/requirements/py36.yml
@@ -28,7 +28,6 @@ dependencies:
   - numba
   - numpy
   - pandas
-  - pint
   - pip
   - pseudonetcdf
   - pydap
@@ -45,3 +44,4 @@ dependencies:
   - zarr
   - pip:
     - numbagg
+    - pint
diff --git a/ci/requirements/py37-windows.yml b/ci/requirements/py37-windows.yml
index e9e5c7a900a..8b12704d644 100644
--- a/ci/requirements/py37-windows.yml
+++ b/ci/requirements/py37-windows.yml
@@ -28,7 +28,6 @@ dependencies:
   - numba
   - numpy
   - pandas
-  - pint
   - pip
   - pseudonetcdf
   - pydap
@@ -45,3 +44,4 @@ dependencies:
   - zarr
   - pip:
     - numbagg
+    - pint
diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml
index dba3926596e..70c453e8776 100644
--- a/ci/requirements/py37.yml
+++ b/ci/requirements/py37.yml
@@ -28,7 +28,6 @@ dependencies:
   - numba
   - numpy
   - pandas
-  - pint
   - pip
   - pseudonetcdf
   - pydap
@@ -45,3 +44,4 @@ dependencies:
   - zarr
   - pip:
     - numbagg
+    - pint
diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml
index a375d9e1e5a..6d76eecbd6a 100644
--- a/ci/requirements/py38-all-but-dask.yml
+++ b/ci/requirements/py38-all-but-dask.yml
@@ -25,7 +25,6 @@ dependencies:
   - numba
   - numpy
   - pandas
-  - pint
   - pip
   - pseudonetcdf
   - pydap
@@ -42,3 +41,4 @@ dependencies:
   - zarr
   - pip:
     - numbagg
+    - pint
diff --git a/ci/requirements/py38.yml b/ci/requirements/py38.yml
index 24602f884e9..6f35138978c 100644
--- a/ci/requirements/py38.yml
+++ b/ci/requirements/py38.yml
@@ -22,13 +22,12 @@ dependencies:
   - isort
   - lxml    # Optional dep of pydap
   - matplotlib
-  - mypy=0.761  # Must match .pre-commit-config.yaml
+  - mypy=0.780  # Must match .pre-commit-config.yaml
   - nc-time-axis
   - netcdf4
   - numba
   - numpy
   - pandas
-  - pint
   - pip
   - pseudonetcdf
   - pydap
@@ -45,3 +44,4 @@ dependencies:
   - zarr
   - pip:
     - numbagg
+    - pint
diff --git a/doc/_templates/autosummary/accessor.rst b/doc/_templates/autosummary/accessor.rst
new file mode 100644
index 00000000000..4ba745cd6fd
--- /dev/null
+++ b/doc/_templates/autosummary/accessor.rst
@@ -0,0 +1,6 @@
+{{ fullname }}
+{{ underline }}
+
+.. currentmodule:: {{ module.split('.')[0] }}
+
+.. autoaccessor:: {{ (module.split('.')[1:] + [objname]) | join('.') }}
diff --git a/doc/_templates/autosummary/accessor_attribute.rst b/doc/_templates/autosummary/accessor_attribute.rst
new file mode 100644
index 00000000000..b5ad65d6a73
--- /dev/null
+++ b/doc/_templates/autosummary/accessor_attribute.rst
@@ -0,0 +1,6 @@
+{{ fullname }}
+{{ underline }}
+
+.. currentmodule:: {{ module.split('.')[0] }}
+
+.. autoaccessorattribute:: {{ (module.split('.')[1:] + [objname]) | join('.') }}
diff --git a/doc/_templates/autosummary/accessor_callable.rst b/doc/_templates/autosummary/accessor_callable.rst
new file mode 100644
index 00000000000..7a3301814f5
--- /dev/null
+++ b/doc/_templates/autosummary/accessor_callable.rst
@@ -0,0 +1,6 @@
+{{ fullname }}
+{{ underline }}
+
+.. currentmodule:: {{ module.split('.')[0] }}
+
+.. autoaccessorcallable:: {{ (module.split('.')[1:] + [objname]) | join('.') }}.__call__
diff --git a/doc/_templates/autosummary/accessor_method.rst b/doc/_templates/autosummary/accessor_method.rst
new file mode 100644
index 00000000000..aefbba6ef1b
--- /dev/null
+++ b/doc/_templates/autosummary/accessor_method.rst
@@ -0,0 +1,6 @@
+{{ fullname }}
+{{ underline }}
+
+.. currentmodule:: {{ module.split('.')[0] }}
+
+.. autoaccessormethod:: {{ (module.split('.')[1:] + [objname]) | join('.') }}
diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst
index 313428c29d2..efef4259b74 100644
--- a/doc/api-hidden.rst
+++ b/doc/api-hidden.rst
@@ -9,8 +9,6 @@
 .. autosummary::
    :toctree: generated/
 
-   auto_combine
-
    Dataset.nbytes
    Dataset.chunks
 
@@ -43,8 +41,6 @@
 
    core.rolling.DatasetCoarsen.all
    core.rolling.DatasetCoarsen.any
-   core.rolling.DatasetCoarsen.argmax
-   core.rolling.DatasetCoarsen.argmin
    core.rolling.DatasetCoarsen.count
    core.rolling.DatasetCoarsen.max
    core.rolling.DatasetCoarsen.mean
@@ -70,8 +66,6 @@
    core.groupby.DatasetGroupBy.where
    core.groupby.DatasetGroupBy.all
    core.groupby.DatasetGroupBy.any
-   core.groupby.DatasetGroupBy.argmax
-   core.groupby.DatasetGroupBy.argmin
    core.groupby.DatasetGroupBy.count
    core.groupby.DatasetGroupBy.max
    core.groupby.DatasetGroupBy.mean
@@ -87,8 +81,6 @@
    core.resample.DatasetResample.all
    core.resample.DatasetResample.any
    core.resample.DatasetResample.apply
-   core.resample.DatasetResample.argmax
-   core.resample.DatasetResample.argmin
    core.resample.DatasetResample.assign
    core.resample.DatasetResample.assign_coords
    core.resample.DatasetResample.bfill
@@ -112,8 +104,6 @@
    core.resample.DatasetResample.dims
    core.resample.DatasetResample.groups
 
-   core.rolling.DatasetRolling.argmax
-   core.rolling.DatasetRolling.argmin
    core.rolling.DatasetRolling.count
    core.rolling.DatasetRolling.max
    core.rolling.DatasetRolling.mean
@@ -187,8 +177,6 @@
 
    core.rolling.DataArrayCoarsen.all
    core.rolling.DataArrayCoarsen.any
-   core.rolling.DataArrayCoarsen.argmax
-   core.rolling.DataArrayCoarsen.argmin
    core.rolling.DataArrayCoarsen.count
    core.rolling.DataArrayCoarsen.max
    core.rolling.DataArrayCoarsen.mean
@@ -213,8 +201,6 @@
    core.groupby.DataArrayGroupBy.where
    core.groupby.DataArrayGroupBy.all
    core.groupby.DataArrayGroupBy.any
-   core.groupby.DataArrayGroupBy.argmax
-   core.groupby.DataArrayGroupBy.argmin
    core.groupby.DataArrayGroupBy.count
    core.groupby.DataArrayGroupBy.max
    core.groupby.DataArrayGroupBy.mean
@@ -230,8 +216,6 @@
    core.resample.DataArrayResample.all
    core.resample.DataArrayResample.any
    core.resample.DataArrayResample.apply
-   core.resample.DataArrayResample.argmax
-   core.resample.DataArrayResample.argmin
    core.resample.DataArrayResample.assign_coords
    core.resample.DataArrayResample.bfill
    core.resample.DataArrayResample.count
@@ -254,8 +238,6 @@
    core.resample.DataArrayResample.dims
    core.resample.DataArrayResample.groups
 
-   core.rolling.DataArrayRolling.argmax
-   core.rolling.DataArrayRolling.argmin
    core.rolling.DataArrayRolling.count
    core.rolling.DataArrayRolling.max
    core.rolling.DataArrayRolling.mean
@@ -425,8 +407,6 @@
 
    IndexVariable.all
    IndexVariable.any
-   IndexVariable.argmax
-   IndexVariable.argmin
    IndexVariable.argsort
    IndexVariable.astype
    IndexVariable.broadcast_equals
@@ -566,8 +546,6 @@
    CFTimeIndex.all
    CFTimeIndex.any
    CFTimeIndex.append
-   CFTimeIndex.argmax
-   CFTimeIndex.argmin
    CFTimeIndex.argsort
    CFTimeIndex.asof
    CFTimeIndex.asof_locs
diff --git a/doc/api.rst b/doc/api.rst
index b37c84e7a81..603e3e8f6cf 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -21,14 +21,16 @@ Top-level functions
    broadcast
    concat
    merge
-   auto_combine
    combine_by_coords
    combine_nested
    where
    set_options
+   infer_freq
    full_like
    zeros_like
    ones_like
+   cov
+   corr
    dot
    polyval
    map_blocks
@@ -173,6 +175,7 @@ Computation
    Dataset.quantile
    Dataset.differentiate
    Dataset.integrate
+   Dataset.map_blocks
    Dataset.polyfit
 
 **Aggregation**:
@@ -229,6 +232,15 @@ Reshaping and reorganizing
    Dataset.sortby
    Dataset.broadcast_like
 
+Plotting
+--------
+
+.. autosummary::
+   :toctree: generated/
+   :template: autosummary/accessor_method.rst
+
+   Dataset.plot.scatter
+
 DataArray
 =========
 
@@ -358,6 +370,8 @@ Computation
    DataArray.integrate
    DataArray.polyfit
    DataArray.str
+   DataArray.map_blocks
+
 
 **Aggregation**:
 :py:attr:`~DataArray.all`
@@ -397,6 +411,122 @@ Computation
 :py:attr:`~core.groupby.DataArrayGroupBy.where`
 :py:attr:`~core.groupby.DataArrayGroupBy.quantile`
 
+
+String manipulation
+-------------------
+
+.. autosummary::
+   :toctree: generated/
+   :template: autosummary/accessor_method.rst
+
+   DataArray.str.capitalize
+   DataArray.str.center
+   DataArray.str.contains
+   DataArray.str.count
+   DataArray.str.decode
+   DataArray.str.encode
+   DataArray.str.endswith
+   DataArray.str.find
+   DataArray.str.get
+   DataArray.str.index
+   DataArray.str.isalnum
+   DataArray.str.isalpha
+   DataArray.str.isdecimal
+   DataArray.str.isdigit
+   DataArray.str.isnumeric
+   DataArray.str.isspace
+   DataArray.str.istitle
+   DataArray.str.isupper
+   DataArray.str.len
+   DataArray.str.ljust
+   DataArray.str.lower
+   DataArray.str.lstrip
+   DataArray.str.match
+   DataArray.str.pad
+   DataArray.str.repeat
+   DataArray.str.replace
+   DataArray.str.rfind
+   DataArray.str.rindex
+   DataArray.str.rjust
+   DataArray.str.rstrip
+   DataArray.str.slice
+   DataArray.str.slice_replace
+   DataArray.str.startswith
+   DataArray.str.strip
+   DataArray.str.swapcase
+   DataArray.str.title
+   DataArray.str.translate
+   DataArray.str.upper
+   DataArray.str.wrap
+   DataArray.str.zfill
+
+Datetimelike properties
+-----------------------
+
+**Datetime properties**:
+
+.. autosummary::
+   :toctree: generated/
+   :template: autosummary/accessor_attribute.rst
+
+   DataArray.dt.year
+   DataArray.dt.month
+   DataArray.dt.day
+   DataArray.dt.hour
+   DataArray.dt.minute
+   DataArray.dt.second
+   DataArray.dt.microsecond
+   DataArray.dt.nanosecond
+   DataArray.dt.weekofyear
+   DataArray.dt.week
+   DataArray.dt.dayofweek
+   DataArray.dt.weekday
+   DataArray.dt.weekday_name
+   DataArray.dt.dayofyear
+   DataArray.dt.quarter
+   DataArray.dt.days_in_month
+   DataArray.dt.daysinmonth
+   DataArray.dt.season
+   DataArray.dt.time
+   DataArray.dt.is_month_start
+   DataArray.dt.is_month_end
+   DataArray.dt.is_quarter_end
+   DataArray.dt.is_year_start
+   DataArray.dt.is_leap_year
+
+**Datetime methods**:
+
+.. autosummary::
+   :toctree: generated/
+   :template: autosummary/accessor_method.rst
+
+   DataArray.dt.floor
+   DataArray.dt.ceil
+   DataArray.dt.round
+   DataArray.dt.strftime
+
+**Timedelta properties**:
+
+.. autosummary::
+   :toctree: generated/
+   :template: autosummary/accessor_attribute.rst
+
+   DataArray.dt.days
+   DataArray.dt.seconds
+   DataArray.dt.microseconds
+   DataArray.dt.nanoseconds
+
+**Timedelta methods**:
+
+.. autosummary::
+   :toctree: generated/
+   :template: autosummary/accessor_method.rst
+
+   DataArray.dt.floor
+   DataArray.dt.ceil
+   DataArray.dt.round
+
+
 Reshaping and reorganizing
 --------------------------
 
@@ -413,6 +543,27 @@ Reshaping and reorganizing
    DataArray.sortby
    DataArray.broadcast_like
 
+Plotting
+--------
+
+.. autosummary::
+   :toctree: generated/
+   :template: autosummary/accessor_callable.rst
+
+   DataArray.plot
+
+.. autosummary::
+   :toctree: generated/
+   :template: autosummary/accessor_method.rst
+
+   DataArray.plot.contourf
+   DataArray.plot.contour
+   DataArray.plot.hist
+   DataArray.plot.imshow
+   DataArray.plot.line
+   DataArray.plot.pcolormesh
+   DataArray.plot.step
+
 .. _api.ufuncs:
 
 Universal functions
@@ -518,7 +669,6 @@ Dataset methods
    Dataset.load
    Dataset.chunk
    Dataset.unify_chunks
-   Dataset.map_blocks
    Dataset.filter_by_attrs
    Dataset.info
 
@@ -550,7 +700,6 @@ DataArray methods
    DataArray.load
    DataArray.chunk
    DataArray.unify_chunks
-   DataArray.map_blocks
 
 Coordinates objects
 ===================
@@ -660,25 +809,6 @@ Creating custom indexes
 
    cftime_range
 
-Plotting
-========
-
-.. autosummary::
-   :toctree: generated/
-
-   Dataset.plot
-   plot.scatter
-   DataArray.plot
-   plot.plot
-   plot.contourf
-   plot.contour
-   plot.hist
-   plot.imshow
-   plot.line
-   plot.pcolormesh
-   plot.step
-   plot.FacetGrid
-
 Faceting
 --------
 .. autosummary::
diff --git a/doc/combining.rst b/doc/combining.rst
index 05b7f2efc50..ffc6575c579 100644
--- a/doc/combining.rst
+++ b/doc/combining.rst
@@ -4,11 +4,12 @@ Combining data
 --------------
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
     import numpy as np
     import pandas as pd
     import xarray as xr
+
     np.random.seed(123456)
 
 * For combining datasets or data arrays along a single dimension, see concatenate_.
@@ -28,11 +29,10 @@ that dimension:
 
 .. ipython:: python
 
-    arr = xr.DataArray(np.random.randn(2, 3),
-                       [('x', ['a', 'b']), ('y', [10, 20, 30])])
+    arr = xr.DataArray(np.random.randn(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])])
     arr[:, :1]
     # this resembles how you would use np.concatenate
-    xr.concat([arr[:, :1], arr[:, 1:]], dim='y')
+    xr.concat([arr[:, :1], arr[:, 1:]], dim="y")
 
 In addition to combining along an existing dimension, ``concat`` can create a
 new dimension by stacking lower dimensional arrays together:
@@ -41,7 +41,7 @@ new dimension by stacking lower dimensional arrays together:
 
     arr[0]
     # to combine these 1d arrays into a 2d array in numpy, you would use np.array
-    xr.concat([arr[0], arr[1]], 'x')
+    xr.concat([arr[0], arr[1]], "x")
 
 If the second argument to ``concat`` is a new dimension name, the arrays will
 be concatenated along that new dimension, which is always inserted as the first
@@ -49,7 +49,7 @@ dimension:
 
 .. ipython:: python
 
-    xr.concat([arr[0], arr[1]], 'new_dim')
+    xr.concat([arr[0], arr[1]], "new_dim")
 
 The second argument to ``concat`` can also be an :py:class:`~pandas.Index` or
 :py:class:`~xarray.DataArray` object as well as a string, in which case it is
@@ -57,14 +57,14 @@ used to label the values along the new dimension:
 
 .. ipython:: python
 
-    xr.concat([arr[0], arr[1]], pd.Index([-90, -100], name='new_dim'))
+    xr.concat([arr[0], arr[1]], pd.Index([-90, -100], name="new_dim"))
 
 Of course, ``concat`` also works on ``Dataset`` objects:
 
 .. ipython:: python
 
-    ds = arr.to_dataset(name='foo')
-    xr.concat([ds.sel(x='a'), ds.sel(x='b')], 'x')
+    ds = arr.to_dataset(name="foo")
+    xr.concat([ds.sel(x="a"), ds.sel(x="b")], "x")
 
 :py:func:`~xarray.concat` has a number of options which provide deeper control
 over which variables are concatenated and how it handles conflicting variables
@@ -84,8 +84,8 @@ To combine variables and coordinates between multiple ``DataArray`` and/or
 
 .. ipython:: python
 
-    xr.merge([ds, ds.rename({'foo': 'bar'})])
-    xr.merge([xr.DataArray(n, name='var%d' % n) for n in range(5)])
+    xr.merge([ds, ds.rename({"foo": "bar"})])
+    xr.merge([xr.DataArray(n, name="var%d" % n) for n in range(5)])
 
 If you merge another dataset (or a dictionary including data array objects), by
 default the resulting dataset will be aligned on the **union** of all index
@@ -93,7 +93,7 @@ coordinates:
 
 .. ipython:: python
 
-    other = xr.Dataset({'bar': ('x', [1, 2, 3, 4]), 'x': list('abcd')})
+    other = xr.Dataset({"bar": ("x", [1, 2, 3, 4]), "x": list("abcd")})
     xr.merge([ds, other])
 
 This ensures that ``merge`` is non-destructive. ``xarray.MergeError`` is raised
@@ -116,7 +116,7 @@ used in the :py:class:`~xarray.Dataset` constructor:
 
 .. ipython:: python
 
-    xr.Dataset({'a': arr[:-1], 'b': arr[1:]})
+    xr.Dataset({"a": arr[:-1], "b": arr[1:]})
 
 .. _combine:
 
@@ -131,8 +131,8 @@ are filled with ``NaN``. For example:
 
 .. ipython:: python
 
-    ar0 = xr.DataArray([[0, 0], [0, 0]], [('x', ['a', 'b']), ('y', [-1, 0])])
-    ar1 = xr.DataArray([[1, 1], [1, 1]], [('x', ['b', 'c']), ('y', [0, 1])])
+    ar0 = xr.DataArray([[0, 0], [0, 0]], [("x", ["a", "b"]), ("y", [-1, 0])])
+    ar1 = xr.DataArray([[1, 1], [1, 1]], [("x", ["b", "c"]), ("y", [0, 1])])
     ar0.combine_first(ar1)
     ar1.combine_first(ar0)
 
@@ -152,7 +152,7 @@ variables with new values:
 
 .. ipython:: python
 
-    ds.update({'space': ('space', [10.2, 9.4, 3.9])})
+    ds.update({"space": ("space", [10.2, 9.4, 3.9])})
 
 However, dimensions are still required to be consistent between different
 Dataset variables, so you cannot change the size of a dimension unless you
@@ -170,7 +170,7 @@ syntax:
 
 .. ipython:: python
 
-    ds['baz'] = xr.DataArray([9, 9, 9, 9, 9], coords=[('x', list('abcde'))])
+    ds["baz"] = xr.DataArray([9, 9, 9, 9, 9], coords=[("x", list("abcde"))])
     ds.baz
 
 Equals and identical
@@ -193,7 +193,7 @@ object:
 
 .. ipython:: python
 
-    arr.identical(arr.rename('bar'))
+    arr.identical(arr.rename("bar"))
 
 :py:attr:`~xarray.Dataset.broadcast_equals` does a more relaxed form of equality
 check that allows variables to have different dimensions, as long as values
@@ -201,8 +201,8 @@ are constant along those new dimensions:
 
 .. ipython:: python
 
-    left = xr.Dataset(coords={'x': 0})
-    right = xr.Dataset({'x': [0, 0, 0]})
+    left = xr.Dataset(coords={"x": 0})
+    right = xr.Dataset({"x": [0, 0, 0]})
     left.broadcast_equals(right)
 
 Like pandas objects, two xarray objects are still equal or identical if they have
@@ -231,9 +231,9 @@ coordinates as long as any non-missing values agree or are disjoint:
 
 .. ipython:: python
 
-    ds1 = xr.Dataset({'a': ('x', [10, 20, 30, np.nan])}, {'x': [1, 2, 3, 4]})
-    ds2 = xr.Dataset({'a': ('x', [np.nan, 30, 40, 50])}, {'x': [2, 3, 4, 5]})
-    xr.merge([ds1, ds2], compat='no_conflicts')
+    ds1 = xr.Dataset({"a": ("x", [10, 20, 30, np.nan])}, {"x": [1, 2, 3, 4]})
+    ds2 = xr.Dataset({"a": ("x", [np.nan, 30, 40, 50])}, {"x": [2, 3, 4, 5]})
+    xr.merge([ds1, ds2], compat="no_conflicts")
 
 Note that due to the underlying representation of missing values as floating
 point numbers (``NaN``), variable data type is not always preserved when merging
@@ -273,10 +273,12 @@ datasets into a doubly-nested list, e.g:
 
 .. ipython:: python
 
-    arr = xr.DataArray(name='temperature', data=np.random.randint(5, size=(2, 2)), dims=['x', 'y'])
+    arr = xr.DataArray(
+        name="temperature", data=np.random.randint(5, size=(2, 2)), dims=["x", "y"]
+    )
     arr
     ds_grid = [[arr, arr], [arr, arr]]
-    xr.combine_nested(ds_grid, concat_dim=['x', 'y'])
+    xr.combine_nested(ds_grid, concat_dim=["x", "y"])
 
 :py:func:`~xarray.combine_nested` can also be used to explicitly merge datasets
 with different variables. For example if we have 4 datasets, which are divided
@@ -286,10 +288,10 @@ we wish to use ``merge`` instead of ``concat``:
 
 .. ipython:: python
 
-    temp = xr.DataArray(name='temperature', data=np.random.randn(2), dims=['t'])
-    precip = xr.DataArray(name='precipitation', data=np.random.randn(2), dims=['t'])
+    temp = xr.DataArray(name="temperature", data=np.random.randn(2), dims=["t"])
+    precip = xr.DataArray(name="precipitation", data=np.random.randn(2), dims=["t"])
     ds_grid = [[temp, precip], [temp, precip]]
-    xr.combine_nested(ds_grid, concat_dim=['t', None])
+    xr.combine_nested(ds_grid, concat_dim=["t", None])
 
 :py:func:`~xarray.combine_by_coords` is for combining objects which have dimension
 coordinates which specify their relationship to and order relative to one
@@ -302,8 +304,8 @@ coordinates, not on their position in the list passed to ``combine_by_coords``.
 .. ipython:: python
     :okwarning:
 
-    x1 = xr.DataArray(name='foo', data=np.random.randn(3), coords=[('x', [0, 1, 2])])
-    x2 = xr.DataArray(name='foo', data=np.random.randn(3), coords=[('x', [3, 4, 5])])
+    x1 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [0, 1, 2])])
+    x2 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [3, 4, 5])])
     xr.combine_by_coords([x2, x1])
 
 These functions can be used by :py:func:`~xarray.open_mfdataset` to open many
diff --git a/doc/computation.rst b/doc/computation.rst
index 4b8014c4782..3660aed93ed 100644
--- a/doc/computation.rst
+++ b/doc/computation.rst
@@ -18,17 +18,19 @@ Arithmetic operations with a single DataArray automatically vectorize (like
 numpy) over all array values:
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
     import numpy as np
     import pandas as pd
     import xarray as xr
+
     np.random.seed(123456)
 
 .. ipython:: python
 
-    arr = xr.DataArray(np.random.RandomState(0).randn(2, 3),
-                       [('x', ['a', 'b']), ('y', [10, 20, 30])])
+    arr = xr.DataArray(
+        np.random.RandomState(0).randn(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])]
+    )
     arr - 3
     abs(arr)
 
@@ -45,7 +47,7 @@ Use :py:func:`~xarray.where` to conditionally switch between values:
 
 .. ipython:: python
 
-    xr.where(arr > 0, 'positive', 'negative')
+    xr.where(arr > 0, "positive", "negative")
 
 Use `@` to perform matrix multiplication:
 
@@ -73,14 +75,14 @@ methods for working with missing data from pandas:
 
 .. ipython:: python
 
-    x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=['x'])
+    x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"])
     x.isnull()
     x.notnull()
     x.count()
-    x.dropna(dim='x')
+    x.dropna(dim="x")
     x.fillna(-1)
-    x.ffill('x')
-    x.bfill('x')
+    x.ffill("x")
+    x.bfill("x")
 
 Like pandas, xarray uses the float value ``np.nan`` (not-a-number) to represent
 missing values.
@@ -90,9 +92,12 @@ for filling missing values via 1D interpolation.
 
 .. ipython:: python
 
-    x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=['x'],
-                     coords={'xx': xr.Variable('x', [0, 1, 1.1, 1.9, 3])})
-    x.interpolate_na(dim='x', method='linear', use_coordinate='xx')
+    x = xr.DataArray(
+        [0, 1, np.nan, np.nan, 2],
+        dims=["x"],
+        coords={"xx": xr.Variable("x", [0, 1, 1.1, 1.9, 3])},
+    )
+    x.interpolate_na(dim="x", method="linear", use_coordinate="xx")
 
 Note that xarray slightly diverges from the pandas ``interpolate`` syntax by
 providing the ``use_coordinate`` keyword which facilitates a clear specification
@@ -110,8 +115,8 @@ applied along particular dimension(s):
 
 .. ipython:: python
 
-    arr.sum(dim='x')
-    arr.std(['x', 'y'])
+    arr.sum(dim="x")
+    arr.std(["x", "y"])
     arr.min()
 
 
@@ -121,7 +126,7 @@ for wrapping code designed to work with numpy arrays), you can use the
 
 .. ipython:: python
 
-    arr.get_axis_num('y')
+    arr.get_axis_num("y")
 
 These operations automatically skip missing values, like in pandas:
 
@@ -142,8 +147,7 @@ method supports rolling window aggregation:
 
 .. ipython:: python
 
-    arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5),
-                       dims=('x', 'y'))
+    arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), dims=("x", "y"))
     arr
 
 :py:meth:`~xarray.DataArray.rolling` is applied along one dimension using the
@@ -194,8 +198,9 @@ We can also manually iterate through ``Rolling`` objects:
 
 .. code:: python
 
-   for label, arr_window in r:
-      # arr_window is a view of x
+    for label, arr_window in r:
+        # arr_window is a view of x
+        ...
 
 .. _comput.rolling_exp:
 
@@ -222,9 +227,9 @@ windowed rolling, convolution, short-time FFT etc.
 .. ipython:: python
 
     # rolling with 2-point stride
-    rolling_da = r.construct('window_dim', stride=2)
+    rolling_da = r.construct("window_dim", stride=2)
     rolling_da
-    rolling_da.mean('window_dim', skipna=False)
+    rolling_da.mean("window_dim", skipna=False)
 
 Because the ``DataArray`` given by ``r.construct('window_dim')`` is a view
 of the original array, it is memory efficient.
@@ -232,8 +237,8 @@ You can also use ``construct`` to compute a weighted rolling sum:
 
 .. ipython:: python
 
-   weight = xr.DataArray([0.25, 0.5, 0.25], dims=['window'])
-   arr.rolling(y=3).construct('window').dot(weight)
+    weight = xr.DataArray([0.25, 0.5, 0.25], dims=["window"])
+    arr.rolling(y=3).construct("window").dot(weight)
 
 .. note::
   numpy's Nan-aggregation functions such as ``nansum`` copy the original array.
@@ -254,52 +259,52 @@ support weighted ``sum`` and weighted ``mean``.
 
 .. ipython:: python
 
-  coords = dict(month=('month', [1, 2, 3]))
+    coords = dict(month=("month", [1, 2, 3]))
 
-  prec = xr.DataArray([1.1, 1.0, 0.9], dims=('month', ), coords=coords)
-  weights = xr.DataArray([31, 28, 31], dims=('month', ), coords=coords)
+    prec = xr.DataArray([1.1, 1.0, 0.9], dims=("month",), coords=coords)
+    weights = xr.DataArray([31, 28, 31], dims=("month",), coords=coords)
 
 Create a weighted object:
 
 .. ipython:: python
 
-  weighted_prec = prec.weighted(weights)
-  weighted_prec
+    weighted_prec = prec.weighted(weights)
+    weighted_prec
 
 Calculate the weighted sum:
 
 .. ipython:: python
 
-  weighted_prec.sum()
+    weighted_prec.sum()
 
 Calculate the weighted mean:
 
 .. ipython:: python
 
-        weighted_prec.mean(dim="month")
+    weighted_prec.mean(dim="month")
 
 The weighted sum corresponds to:
 
 .. ipython:: python
 
-  weighted_sum = (prec * weights).sum()
-  weighted_sum
+    weighted_sum = (prec * weights).sum()
+    weighted_sum
 
 and the weighted mean to:
 
 .. ipython:: python
 
-  weighted_mean = weighted_sum / weights.sum()
-  weighted_mean
+    weighted_mean = weighted_sum / weights.sum()
+    weighted_mean
 
 However, the functions also take missing values in the data into account:
 
 .. ipython:: python
 
-  data = xr.DataArray([np.NaN, 2, 4])
-  weights = xr.DataArray([8, 1, 1])
+    data = xr.DataArray([np.NaN, 2, 4])
+    weights = xr.DataArray([8, 1, 1])
 
-  data.weighted(weights).mean()
+    data.weighted(weights).mean()
 
 Using ``(data * weights).sum() / weights.sum()`` would (incorrectly) result
 in 0.6.
@@ -309,16 +314,16 @@ If the weights add up to to 0, ``sum`` returns 0:
 
 .. ipython:: python
 
-  data = xr.DataArray([1.0, 1.0])
-  weights = xr.DataArray([-1.0, 1.0])
+    data = xr.DataArray([1.0, 1.0])
+    weights = xr.DataArray([-1.0, 1.0])
 
-  data.weighted(weights).sum()
+    data.weighted(weights).sum()
 
 and ``mean`` returns ``NaN``:
 
 .. ipython:: python
 
-  data.weighted(weights).mean()
+    data.weighted(weights).mean()
 
 
 .. note::
@@ -336,18 +341,21 @@ methods. This supports the block aggregation along multiple dimensions,
 
 .. ipython:: python
 
-  x = np.linspace(0, 10, 300)
-  t = pd.date_range('15/12/1999', periods=364)
-  da = xr.DataArray(np.sin(x) * np.cos(np.linspace(0, 1, 364)[:, np.newaxis]),
-                    dims=['time', 'x'], coords={'time': t, 'x': x})
-  da
+    x = np.linspace(0, 10, 300)
+    t = pd.date_range("15/12/1999", periods=364)
+    da = xr.DataArray(
+        np.sin(x) * np.cos(np.linspace(0, 1, 364)[:, np.newaxis]),
+        dims=["time", "x"],
+        coords={"time": t, "x": x},
+    )
+    da
 
 In order to take a block mean for every 7 days along ``time`` dimension and
 every 2 points along ``x`` dimension,
 
 .. ipython:: python
 
-  da.coarsen(time=7, x=2).mean()
+    da.coarsen(time=7, x=2).mean()
 
 :py:meth:`~xarray.DataArray.coarsen` raises an ``ValueError`` if the data
 length is not a multiple of the corresponding window size.
@@ -356,14 +364,14 @@ the excess entries or padding ``nan`` to insufficient entries,
 
 .. ipython:: python
 
-  da.coarsen(time=30, x=2, boundary='trim').mean()
+    da.coarsen(time=30, x=2, boundary="trim").mean()
 
 If you want to apply a specific function to coordinate, you can pass the
 function or method name to ``coord_func`` option,
 
 .. ipython:: python
 
-  da.coarsen(time=7, x=2, coord_func={'time': 'min'}).mean()
+    da.coarsen(time=7, x=2, coord_func={"time": "min"}).mean()
 
 
 .. _compute.using_coordinates:
@@ -377,24 +385,25 @@ central finite differences using their coordinates,
 
 .. ipython:: python
 
-    a = xr.DataArray([0, 1, 2, 3], dims=['x'], coords=[[0.1, 0.11, 0.2, 0.3]])
+    a = xr.DataArray([0, 1, 2, 3], dims=["x"], coords=[[0.1, 0.11, 0.2, 0.3]])
     a
-    a.differentiate('x')
+    a.differentiate("x")
 
 This method can be used also for multidimensional arrays,
 
 .. ipython:: python
 
-    a = xr.DataArray(np.arange(8).reshape(4, 2), dims=['x', 'y'],
-                     coords={'x': [0.1, 0.11, 0.2, 0.3]})
-    a.differentiate('x')
+    a = xr.DataArray(
+        np.arange(8).reshape(4, 2), dims=["x", "y"], coords={"x": [0.1, 0.11, 0.2, 0.3]}
+    )
+    a.differentiate("x")
 
 :py:meth:`~xarray.DataArray.integrate` computes integration based on
 trapezoidal rule using their coordinates,
 
 .. ipython:: python
 
-    a.integrate('x')
+    a.integrate("x")
 
 .. note::
     These methods are limited to simple cartesian geometry. Differentiation
@@ -412,9 +421,9 @@ best fitting coefficients along a given dimension and for a given order,
 
 .. ipython:: python
 
-    x = xr.DataArray(np.arange(10), dims=['x'], name='x')
-    a = xr.DataArray(3 + 4 * x, dims=['x'], coords={'x': x})
-    out = a.polyfit(dim='x', deg=1, full=True)
+    x = xr.DataArray(np.arange(10), dims=["x"], name="x")
+    a = xr.DataArray(3 + 4 * x, dims=["x"], coords={"x": x})
+    out = a.polyfit(dim="x", deg=1, full=True)
     out
 
 The method outputs a dataset containing the coefficients (and more if `full=True`).
@@ -443,9 +452,9 @@ arrays with different sizes aligned along different dimensions:
 
 .. ipython:: python
 
-    a = xr.DataArray([1, 2], [('x', ['a', 'b'])])
+    a = xr.DataArray([1, 2], [("x", ["a", "b"])])
     a
-    b = xr.DataArray([-1, -2, -3], [('y', [10, 20, 30])])
+    b = xr.DataArray([-1, -2, -3], [("y", [10, 20, 30])])
     b
 
 With xarray, we can apply binary mathematical operations to these arrays, and
@@ -460,7 +469,7 @@ appeared:
 
 .. ipython:: python
 
-    c = xr.DataArray(np.arange(6).reshape(3, 2), [b['y'], a['x']])
+    c = xr.DataArray(np.arange(6).reshape(3, 2), [b["y"], a["x"]])
     c
     a + c
 
@@ -494,7 +503,7 @@ operations. The default result of a binary operation is by the *intersection*
 
 .. ipython:: python
 
-    arr = xr.DataArray(np.arange(3), [('x', range(3))])
+    arr = xr.DataArray(np.arange(3), [("x", range(3))])
     arr + arr[:-1]
 
 If coordinate values for a dimension are missing on either argument, all
@@ -503,7 +512,7 @@ matching dimensions must have the same size:
 .. ipython::
     :verbatim:
 
-    In [1]: arr + xr.DataArray([1, 2], dims='x')
+    In [1]: arr + xr.DataArray([1, 2], dims="x")
     ValueError: arguments without labels along dimension 'x' cannot be aligned because they have different dimension size(s) {2} than the size of the aligned dimension labels: 3
 
 
@@ -562,16 +571,20 @@ variables:
 
 .. ipython:: python
 
-    ds = xr.Dataset({'x_and_y': (('x', 'y'), np.random.randn(3, 5)),
-                     'x_only': ('x', np.random.randn(3))},
-                     coords=arr.coords)
+    ds = xr.Dataset(
+        {
+            "x_and_y": (("x", "y"), np.random.randn(3, 5)),
+            "x_only": ("x", np.random.randn(3)),
+        },
+        coords=arr.coords,
+    )
     ds > 0
 
 Datasets support most of the same methods found on data arrays:
 
 .. ipython:: python
 
-    ds.mean(dim='x')
+    ds.mean(dim="x")
     abs(ds)
 
 Datasets also support NumPy ufuncs (requires NumPy v1.13 or newer), or
@@ -594,7 +607,7 @@ Arithmetic between two datasets matches data variables of the same name:
 
 .. ipython:: python
 
-    ds2 = xr.Dataset({'x_and_y': 0, 'x_only': 100})
+    ds2 = xr.Dataset({"x_and_y": 0, "x_only": 100})
     ds - ds2
 
 Similarly to index based alignment, the result has the intersection of all
@@ -638,7 +651,7 @@ any additional arguments:
 .. ipython:: python
 
     squared_error = lambda x, y: (x - y) ** 2
-    arr1 = xr.DataArray([0, 1, 2, 3], dims='x')
+    arr1 = xr.DataArray([0, 1, 2, 3], dims="x")
     xr.apply_ufunc(squared_error, arr1, 1)
 
 For using more complex operations that consider some array values collectively,
@@ -658,21 +671,21 @@ to set ``axis=-1``. As an example, here is how we would wrap
 .. code-block:: python
 
     def vector_norm(x, dim, ord=None):
-        return xr.apply_ufunc(np.linalg.norm, x,
-                              input_core_dims=[[dim]],
-                              kwargs={'ord': ord, 'axis': -1})
+        return xr.apply_ufunc(
+            np.linalg.norm, x, input_core_dims=[[dim]], kwargs={"ord": ord, "axis": -1}
+        )
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
     def vector_norm(x, dim, ord=None):
-        return xr.apply_ufunc(np.linalg.norm, x,
-                              input_core_dims=[[dim]],
-                              kwargs={'ord': ord, 'axis': -1})
+        return xr.apply_ufunc(
+            np.linalg.norm, x, input_core_dims=[[dim]], kwargs={"ord": ord, "axis": -1}
+        )
 
 .. ipython:: python
 
-    vector_norm(arr1, dim='x')
+    vector_norm(arr1, dim="x")
 
 Because ``apply_ufunc`` follows a standard convention for ufuncs, it plays
 nicely with tools for building vectorized functions, like
diff --git a/doc/conf.py b/doc/conf.py
index 578f9cf550d..d3d126cb33f 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -20,6 +20,12 @@
 import sys
 from contextlib import suppress
 
+# --------- autosummary templates ------------------
+# TODO: eventually replace this with a sphinx.ext.auto_accessor module
+import sphinx
+from sphinx.ext.autodoc import AttributeDocumenter, Documenter, MethodDocumenter
+from sphinx.util import rpartition
+
 # make sure the source version is preferred (#3567)
 root = pathlib.Path(__file__).absolute().parent.parent
 os.environ["PYTHONPATH"] = str(root)
@@ -79,7 +85,6 @@
     "sphinx.ext.extlinks",
     "sphinx.ext.mathjax",
     "sphinx.ext.napoleon",
-    "numpydoc",
     "IPython.sphinxext.ipython_directive",
     "IPython.sphinxext.ipython_console_highlighting",
     "nbsphinx",
@@ -352,10 +357,120 @@
     "python": ("https://docs.python.org/3/", None),
     "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None),
     "iris": ("https://scitools.org.uk/iris/docs/latest", None),
-    "numpy": ("https://docs.scipy.org/doc/numpy", None),
+    "numpy": ("https://numpy.org/doc/stable", None),
     "scipy": ("https://docs.scipy.org/doc/scipy/reference", None),
     "numba": ("https://numba.pydata.org/numba-doc/latest", None),
     "matplotlib": ("https://matplotlib.org", None),
     "dask": ("https://docs.dask.org/en/latest", None),
     "cftime": ("https://unidata.github.io/cftime", None),
 }
+
+
+# --------- autosummary templates ------------------
+# TODO: eventually replace this with a sphinx.ext.auto_accessor module
+class AccessorDocumenter(MethodDocumenter):
+    """
+    Specialized Documenter subclass for accessors.
+    """
+
+    objtype = "accessor"
+    directivetype = "method"
+
+    # lower than MethodDocumenter so this is not chosen for normal methods
+    priority = 0.6
+
+    def format_signature(self):
+        # this method gives an error/warning for the accessors, therefore
+        # overriding it (accessor has no arguments)
+        return ""
+
+
+class AccessorLevelDocumenter(Documenter):
+    """
+    Specialized Documenter subclass for objects on accessor level (methods,
+    attributes).
+    """
+
+    # This is the simple straightforward version
+    # modname is None, base the last elements (eg 'hour')
+    # and path the part before (eg 'Series.dt')
+    # def resolve_name(self, modname, parents, path, base):
+    #     modname = 'pandas'
+    #     mod_cls = path.rstrip('.')
+    #     mod_cls = mod_cls.split('.')
+    #
+    #     return modname, mod_cls + [base]
+
+    def resolve_name(self, modname, parents, path, base):
+        if modname is None:
+            if path:
+                mod_cls = path.rstrip(".")
+            else:
+                mod_cls = None
+                # if documenting a class-level object without path,
+                # there must be a current class, either from a parent
+                # auto directive ...
+                mod_cls = self.env.temp_data.get("autodoc:class")
+                # ... or from a class directive
+                if mod_cls is None:
+                    mod_cls = self.env.temp_data.get("py:class")
+                # ... if still None, there's no way to know
+                if mod_cls is None:
+                    return None, []
+            # HACK: this is added in comparison to ClassLevelDocumenter
+            # mod_cls still exists of class.accessor, so an extra
+            # rpartition is needed
+            modname, accessor = rpartition(mod_cls, ".")
+            modname, cls = rpartition(modname, ".")
+            parents = [cls, accessor]
+            # if the module name is still missing, get it like above
+            if not modname:
+                modname = self.env.temp_data.get("autodoc:module")
+            if not modname:
+                if sphinx.__version__ > "1.3":
+                    modname = self.env.ref_context.get("py:module")
+                else:
+                    modname = self.env.temp_data.get("py:module")
+            # ... else, it stays None, which means invalid
+        return modname, parents + [base]
+
+
+class AccessorAttributeDocumenter(AccessorLevelDocumenter, AttributeDocumenter):
+
+    objtype = "accessorattribute"
+    directivetype = "attribute"
+
+    # lower than AttributeDocumenter so this is not chosen for normal attributes
+    priority = 0.6
+
+
+class AccessorMethodDocumenter(AccessorLevelDocumenter, MethodDocumenter):
+
+    objtype = "accessormethod"
+    directivetype = "method"
+
+    # lower than MethodDocumenter so this is not chosen for normal methods
+    priority = 0.6
+
+
+class AccessorCallableDocumenter(AccessorLevelDocumenter, MethodDocumenter):
+    """
+    This documenter lets us removes .__call__ from the method signature for
+    callable accessors like Series.plot
+    """
+
+    objtype = "accessorcallable"
+    directivetype = "method"
+
+    # lower than MethodDocumenter; otherwise the doc build prints warnings
+    priority = 0.5
+
+    def format_name(self):
+        return MethodDocumenter.format_name(self).rstrip(".__call__")
+
+
+def setup(app):
+    app.add_autodocumenter(AccessorDocumenter)
+    app.add_autodocumenter(AccessorAttributeDocumenter)
+    app.add_autodocumenter(AccessorMethodDocumenter)
+    app.add_autodocumenter(AccessorCallableDocumenter)
diff --git a/doc/contributing.rst b/doc/contributing.rst
index f581bcd9741..9e6a3c250e9 100644
--- a/doc/contributing.rst
+++ b/doc/contributing.rst
@@ -148,7 +148,7 @@ We'll now kick off a two-step process:
 1. Install the build dependencies
 2. Build and install xarray
 
-.. code-block:: none
+.. code-block:: sh
 
    # Create and activate the build environment
    # This is for Linux and MacOS. On Windows, use py37-windows.yml instead.
@@ -162,7 +162,10 @@ We'll now kick off a two-step process:
    # Build and install xarray
    pip install -e .
 
-At this point you should be able to import *xarray* from your locally built version::
+At this point you should be able to import *xarray* from your locally
+built version:
+
+.. code-block:: sh
 
    $ python  # start an interpreter
    >>> import xarray
@@ -256,18 +259,20 @@ Some other important things to know about the docs:
 - The tutorials make heavy use of the `ipython directive
   <http://matplotlib.org/sampledoc/ipython_directive.html>`_ sphinx extension.
   This directive lets you put code in the documentation which will be run
-  during the doc build. For example::
+  during the doc build. For example:
+
+  .. code:: rst
 
       .. ipython:: python
 
           x = 2
-          x**3
+          x ** 3
 
   will be rendered as::
 
       In [1]: x = 2
 
-      In [2]: x**3
+      In [2]: x ** 3
       Out[2]: 8
 
   Almost all code examples in the docs are run (and the output saved) during the
@@ -290,7 +295,7 @@ Requirements
 Make sure to follow the instructions on :ref:`creating a development environment above <contributing.dev_env>`, but
 to build the docs you need to use the environment file ``ci/requirements/doc.yml``.
 
-.. code-block:: none
+.. code-block:: sh
 
     # Create and activate the docs environment
     conda env create -f ci/requirements/doc.yml
@@ -347,7 +352,10 @@ Code Formatting
 
 xarray uses several tools to ensure a consistent code format throughout the project:
 
-- `Black <https://black.readthedocs.io/en/stable/>`_ for standardized code formatting
+- `Black <https://black.readthedocs.io/en/stable/>`_ for standardized
+  code formatting
+- `blackdoc <https://blackdoc.readthedocs.io/en/stable/>`_ for
+  standardized code formatting in documentation
 - `Flake8 <http://flake8.pycqa.org/en/latest/>`_ for general code quality
 - `isort <https://github.com/timothycrosley/isort>`_ for standardized order in imports.
   See also `flake8-isort <https://github.com/gforcada/flake8-isort>`_.
@@ -356,12 +364,13 @@ xarray uses several tools to ensure a consistent code format throughout the proj
 
 ``pip``::
 
-   pip install black flake8 isort mypy
+   pip install black flake8 isort mypy blackdoc
 
 and then run from the root of the Xarray repository::
 
    isort -rc .
    black -t py36 .
+   blackdoc -t py36 .
    flake8
    mypy .
 
@@ -467,7 +476,7 @@ typically find tests wrapped in a class.
 .. code-block:: python
 
     class TestReallyCoolFeature:
-        ....
+        ...
 
 Going forward, we are moving to a more *functional* style using the
 `pytest <http://doc.pytest.org/en/latest/>`__ framework, which offers a richer
@@ -477,7 +486,7 @@ writing test classes, we will write test functions like this:
 .. code-block:: python
 
     def test_really_cool_feature():
-        ....
+        ...
 
 Using ``pytest``
 ~~~~~~~~~~~~~~~~
@@ -508,17 +517,23 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place
     from xarray.testing import assert_equal
 
 
-    @pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64'])
+    @pytest.mark.parametrize("dtype", ["int8", "int16", "int32", "int64"])
     def test_dtypes(dtype):
         assert str(np.dtype(dtype)) == dtype
 
 
-    @pytest.mark.parametrize('dtype', ['float32',
-                             pytest.param('int16', marks=pytest.mark.skip),
-                             pytest.param('int32', marks=pytest.mark.xfail(
-                                reason='to show how it works'))])
+    @pytest.mark.parametrize(
+        "dtype",
+        [
+            "float32",
+            pytest.param("int16", marks=pytest.mark.skip),
+            pytest.param(
+                "int32", marks=pytest.mark.xfail(reason="to show how it works")
+            ),
+        ],
+    )
     def test_mark(dtype):
-        assert str(np.dtype(dtype)) == 'float32'
+        assert str(np.dtype(dtype)) == "float32"
 
 
     @pytest.fixture
@@ -526,7 +541,7 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place
         return xr.DataArray([1, 2, 3])
 
 
-    @pytest.fixture(params=['int8', 'int16', 'int32', 'int64'])
+    @pytest.fixture(params=["int8", "int16", "int32", "int64"])
     def dtype(request):
         return request.param
 
diff --git a/doc/dask.rst b/doc/dask.rst
index 07b3939af6e..de25ee2200e 100644
--- a/doc/dask.rst
+++ b/doc/dask.rst
@@ -56,19 +56,26 @@ argument to :py:func:`~xarray.open_dataset` or using the
     import numpy as np
     import pandas as pd
     import xarray as xr
+
     np.random.seed(123456)
     np.set_printoptions(precision=3, linewidth=100, threshold=100, edgeitems=3)
 
-    ds = xr.Dataset({'temperature': (('time', 'latitude', 'longitude'),
-                                     np.random.randn(30, 180, 180)),
-                     'time': pd.date_range('2015-01-01', periods=30),
-                     'longitude': np.arange(180),
-                     'latitude': np.arange(89.5, -90.5, -1)})
-    ds.to_netcdf('example-data.nc')
+    ds = xr.Dataset(
+        {
+            "temperature": (
+                ("time", "latitude", "longitude"),
+                np.random.randn(30, 180, 180),
+            ),
+            "time": pd.date_range("2015-01-01", periods=30),
+            "longitude": np.arange(180),
+            "latitude": np.arange(89.5, -90.5, -1),
+        }
+    )
+    ds.to_netcdf("example-data.nc")
 
 .. ipython:: python
 
-    ds = xr.open_dataset('example-data.nc', chunks={'time': 10})
+    ds = xr.open_dataset("example-data.nc", chunks={"time": 10})
     ds
 
 In this example ``latitude`` and ``longitude`` do not appear in the ``chunks``
@@ -106,7 +113,7 @@ usual way.
 
 .. ipython:: python
 
-    ds.to_netcdf('manipulated-example-data.nc')
+    ds.to_netcdf("manipulated-example-data.nc")
 
 By setting the ``compute`` argument to ``False``, :py:meth:`~xarray.Dataset.to_netcdf`
 will return a ``dask.delayed`` object that can be computed later.
@@ -114,8 +121,9 @@ will return a ``dask.delayed`` object that can be computed later.
 .. ipython:: python
 
     from dask.diagnostics import ProgressBar
+
     # or distributed.progress when using the distributed scheduler
-    delayed_obj = ds.to_netcdf('manipulated-example-data.nc', compute=False)
+    delayed_obj = ds.to_netcdf("manipulated-example-data.nc", compute=False)
     with ProgressBar():
         results = delayed_obj.compute()
 
@@ -141,8 +149,9 @@ Dask DataFrames do not support multi-indexes so the coordinate variables from th
     :suppress:
 
     import os
-    os.remove('example-data.nc')
-    os.remove('manipulated-example-data.nc')
+
+    os.remove("example-data.nc")
+    os.remove("manipulated-example-data.nc")
 
 Using Dask with xarray
 ----------------------
@@ -199,7 +208,7 @@ Dask arrays using the :py:meth:`~xarray.Dataset.persist` method:
 
 .. ipython:: python
 
-   ds = ds.persist()
+    ds = ds.persist()
 
 :py:meth:`~xarray.Dataset.persist` is particularly useful when using a
 distributed cluster because the data will be loaded into distributed memory
@@ -224,11 +233,11 @@ sizes of Dask arrays is done with the :py:meth:`~xarray.Dataset.chunk` method:
 .. ipython:: python
     :suppress:
 
-    ds = ds.chunk({'time': 10})
+    ds = ds.chunk({"time": 10})
 
 .. ipython:: python
 
-    rechunked = ds.chunk({'latitude': 100, 'longitude': 100})
+    rechunked = ds.chunk({"latitude": 100, "longitude": 100})
 
 You can view the size of existing chunks on an array by viewing the
 :py:attr:`~xarray.Dataset.chunks` attribute:
@@ -256,6 +265,7 @@ lazy Dask arrays, in the :ref:`xarray.ufuncs <api.ufuncs>` module:
 .. ipython:: python
 
     import xarray.ufuncs as xu
+
     xu.sin(rechunked)
 
 To access Dask arrays directly, use the new
@@ -274,12 +284,21 @@ loaded into Dask or not:
 
 .. _dask.automatic-parallelization:
 
-Automatic parallelization
--------------------------
+Automatic parallelization with ``apply_ufunc`` and ``map_blocks``
+-----------------------------------------------------------------
 
 Almost all of xarray's built-in operations work on Dask arrays. If you want to
-use a function that isn't wrapped by xarray, one option is to extract Dask
-arrays from xarray objects (``.data``) and use Dask directly.
+use a function that isn't wrapped by xarray, and have it applied in parallel on
+each block of your xarray object, you have three options:
+
+1. Extract Dask arrays from xarray objects (``.data``) and use Dask directly.
+2. Use :py:func:`~xarray.apply_ufunc` to apply functions that consume and return NumPy arrays.
+3. Use :py:func:`~xarray.map_blocks`, :py:meth:`Dataset.map_blocks` or :py:meth:`DataArray.map_blocks`
+   to apply functions that consume and return xarray objects.
+
+
+``apply_ufunc``
+~~~~~~~~~~~~~~~
 
 Another option is to use xarray's :py:func:`~xarray.apply_ufunc`, which can
 automate `embarrassingly parallel
@@ -302,24 +321,32 @@ we use to calculate `Spearman's rank-correlation coefficient <https://en.wikiped
     import xarray as xr
     import bottleneck
 
+
     def covariance_gufunc(x, y):
-        return ((x - x.mean(axis=-1, keepdims=True))
-                * (y - y.mean(axis=-1, keepdims=True))).mean(axis=-1)
+        return (
+            (x - x.mean(axis=-1, keepdims=True)) * (y - y.mean(axis=-1, keepdims=True))
+        ).mean(axis=-1)
+
 
     def pearson_correlation_gufunc(x, y):
         return covariance_gufunc(x, y) / (x.std(axis=-1) * y.std(axis=-1))
 
+
     def spearman_correlation_gufunc(x, y):
         x_ranks = bottleneck.rankdata(x, axis=-1)
         y_ranks = bottleneck.rankdata(y, axis=-1)
         return pearson_correlation_gufunc(x_ranks, y_ranks)
 
+
     def spearman_correlation(x, y, dim):
         return xr.apply_ufunc(
-            spearman_correlation_gufunc, x, y,
+            spearman_correlation_gufunc,
+            x,
+            y,
             input_core_dims=[[dim], [dim]],
-            dask='parallelized',
-            output_dtypes=[float])
+            dask="parallelized",
+            output_dtypes=[float],
+        )
 
 The only aspect of this example that is different from standard usage of
 ``apply_ufunc()`` is that we needed to supply the ``output_dtypes`` arguments.
@@ -335,7 +362,7 @@ work as a streaming operation, when run on arrays loaded from disk:
 
     In [56]: rs = np.random.RandomState(0)
 
-    In [57]: array1 = xr.DataArray(rs.randn(1000, 100000), dims=['place', 'time'])  # 800MB
+    In [57]: array1 = xr.DataArray(rs.randn(1000, 100000), dims=["place", "time"])  # 800MB
 
     In [58]: array2 = array1 + 0.5 * rs.randn(1000, 100000)
 
@@ -344,12 +371,12 @@ work as a streaming operation, when run on arrays loaded from disk:
     CPU times: user 21.6 s, sys: 2.84 s, total: 24.5 s
     Wall time: 24.9 s
 
-    In [8]: chunked1 = array1.chunk({'place': 10})
+    In [8]: chunked1 = array1.chunk({"place": 10})
 
-    In [9]: chunked2 = array2.chunk({'place': 10})
+    In [9]: chunked2 = array2.chunk({"place": 10})
 
     # using all my laptop's cores, with Dask
-    In [63]: r = spearman_correlation(chunked1, chunked2, 'time').compute()
+    In [63]: r = spearman_correlation(chunked1, chunked2, "time").compute()
 
     In [64]: %time _ = r.compute()
     CPU times: user 30.9 s, sys: 1.74 s, total: 32.6 s
@@ -361,7 +388,7 @@ multiple chunks along a core dimension:
 .. ipython::
     :verbatim:
 
-    In [63]: spearman_correlation(chunked1, chunked2, 'place')
+    In [63]: spearman_correlation(chunked1, chunked2, "place")
     ValueError: dimension 'place' on 0th function argument to apply_ufunc with
     dask='parallelized' consists of multiple chunks, but is also a core
     dimension. To fix, rechunk into a single Dask array chunk along this
@@ -382,6 +409,106 @@ application.
     structure of a problem, unlike the generic speedups offered by
     ``dask='parallelized'``.
 
+
+``map_blocks``
+~~~~~~~~~~~~~~
+
+Functions that consume and return xarray objects can be easily applied in parallel using :py:func:`map_blocks`.
+Your function will receive an xarray Dataset or DataArray subset to one chunk
+along each chunked dimension.
+
+.. ipython:: python
+
+    ds.temperature
+
+This DataArray has 3 chunks each with length 10 along the time dimension.
+At compute time, a function applied with :py:func:`map_blocks` will receive a DataArray corresponding to a single block of shape 10x180x180
+(time x latitude x longitude) with values loaded. The following snippet illustrates how to check the shape of the object
+received by the applied function.
+
+.. ipython:: python
+
+    def func(da):
+        print(da.sizes)
+        return da.time
+
+
+    mapped = xr.map_blocks(func, ds.temperature)
+    mapped
+
+Notice that the :py:meth:`map_blocks` call printed
+``Frozen({'time': 0, 'latitude': 0, 'longitude': 0})`` to screen.
+``func`` is received 0-sized blocks! :py:meth:`map_blocks` needs to know what the final result
+looks like in terms of dimensions, shapes etc. It does so by running the provided function on 0-shaped
+inputs (*automated inference*). This works in many cases, but not all. If automatic inference does not
+work for your function, provide the ``template`` kwarg (see below).
+
+In this case, automatic inference has worked so let's check that the result is as expected.
+
+.. ipython:: python
+
+    mapped.load(scheduler="single-threaded")
+    mapped.identical(ds.time)
+
+Note that we use ``.load(scheduler="single-threaded")`` to execute the computation.
+This executes the Dask graph in `serial` using a for loop, but allows for printing to screen and other
+debugging techniques. We can easily see that our function is receiving blocks of shape 10x180x180 and
+the returned result is identical to ``ds.time`` as expected.
+
+
+Here is a common example where automated inference will not work.
+
+.. ipython:: python
+    :okexcept:
+
+    def func(da):
+        print(da.sizes)
+        return da.isel(time=[1])
+
+
+    mapped = xr.map_blocks(func, ds.temperature)
+
+``func`` cannot be run on 0-shaped inputs because it is not possible to extract element 1 along a
+dimension of size 0. In this case we need to tell :py:func:`map_blocks` what the returned result looks
+like using the ``template`` kwarg. ``template`` must be an xarray Dataset or DataArray (depending on
+what the function returns) with dimensions, shapes, chunk sizes, attributes, coordinate variables *and* data
+variables that look exactly like the expected result. The variables should be dask-backed and hence not
+incur much memory cost.
+
+.. note::
+
+    Note that when ``template`` is provided, ``attrs`` from ``template`` are copied over to the result. Any
+    ``attrs`` set in ``func`` will be ignored.
+
+
+.. ipython:: python
+
+    template = ds.temperature.isel(time=[1, 11, 21])
+    mapped = xr.map_blocks(func, ds.temperature, template=template)
+
+
+Notice that the 0-shaped sizes were not printed to screen. Since ``template`` has been provided
+:py:func:`map_blocks` does not need to infer it by running ``func`` on 0-shaped inputs.
+
+.. ipython:: python
+
+    mapped.identical(template)
+
+
+:py:func:`map_blocks` also allows passing ``args`` and ``kwargs`` down to the user function ``func``.
+``func`` will be executed as ``func(block_xarray, *args, **kwargs)`` so ``args`` must be a list and ``kwargs`` must be a dictionary.
+
+.. ipython:: python
+
+    def func(obj, a, b=0):
+        return obj + a + b
+
+
+    mapped = ds.map_blocks(func, args=[10], kwargs={"b": 10})
+    expected = ds + 10 + 10
+    mapped.identical(expected)
+
+
 Chunking and performance
 ------------------------
 
diff --git a/doc/data-structures.rst b/doc/data-structures.rst
index 70e34adabed..ac78e1769d5 100644
--- a/doc/data-structures.rst
+++ b/doc/data-structures.rst
@@ -4,11 +4,12 @@ Data Structures
 ===============
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
     import numpy as np
     import pandas as pd
     import xarray as xr
+
     np.random.seed(123456)
     np.set_printoptions(threshold=10)
 
@@ -57,9 +58,9 @@ The :py:class:`~xarray.DataArray` constructor takes:
 .. ipython:: python
 
     data = np.random.rand(4, 3)
-    locs = ['IA', 'IL', 'IN']
-    times = pd.date_range('2000-01-01', periods=4)
-    foo = xr.DataArray(data, coords=[times, locs], dims=['time', 'space'])
+    locs = ["IA", "IL", "IN"]
+    times = pd.date_range("2000-01-01", periods=4)
+    foo = xr.DataArray(data, coords=[times, locs], dims=["time", "space"])
     foo
 
 Only ``data`` is required; all of other arguments will be filled
@@ -105,23 +106,37 @@ As a list of tuples:
 
 .. ipython:: python
 
-    xr.DataArray(data, coords=[('time', times), ('space', locs)])
+    xr.DataArray(data, coords=[("time", times), ("space", locs)])
 
 As a dictionary:
 
 .. ipython:: python
 
-    xr.DataArray(data, coords={'time': times, 'space': locs, 'const': 42,
-                               'ranking': ('space', [1, 2, 3])},
-                 dims=['time', 'space'])
+    xr.DataArray(
+        data,
+        coords={
+            "time": times,
+            "space": locs,
+            "const": 42,
+            "ranking": ("space", [1, 2, 3]),
+        },
+        dims=["time", "space"],
+    )
 
 As a dictionary with coords across multiple dimensions:
 
 .. ipython:: python
 
-    xr.DataArray(data, coords={'time': times, 'space': locs, 'const': 42,
-                               'ranking': (('time', 'space'), np.arange(12).reshape(4,3))},
-                 dims=['time', 'space'])
+    xr.DataArray(
+        data,
+        coords={
+            "time": times,
+            "space": locs,
+            "const": 42,
+            "ranking": (("time", "space"), np.arange(12).reshape(4, 3)),
+        },
+        dims=["time", "space"],
+    )
 
 If you create a ``DataArray`` by supplying a pandas
 :py:class:`~pandas.Series`, :py:class:`~pandas.DataFrame` or
@@ -130,9 +145,9 @@ If you create a ``DataArray`` by supplying a pandas
 
 .. ipython:: python
 
-    df = pd.DataFrame({'x': [0, 1], 'y': [2, 3]}, index=['a', 'b'])
-    df.index.name = 'abc'
-    df.columns.name = 'xyz'
+    df = pd.DataFrame({"x": [0, 1], "y": [2, 3]}, index=["a", "b"])
+    df.index.name = "abc"
+    df.columns.name = "xyz"
     df
     xr.DataArray(df)
 
@@ -153,7 +168,7 @@ You can modify ``values`` inplace:
 
 .. ipython:: python
 
-   foo.values = 1.0 * foo.values
+    foo.values = 1.0 * foo.values
 
 .. note::
 
@@ -166,8 +181,8 @@ Now fill in some of that missing metadata:
 
 .. ipython:: python
 
-    foo.name = 'foo'
-    foo.attrs['units'] = 'meters'
+    foo.name = "foo"
+    foo.attrs["units"] = "meters"
     foo
 
 The :py:meth:`~xarray.DataArray.rename` method is another option, returning a
@@ -175,7 +190,7 @@ new data array:
 
 .. ipython:: python
 
-   foo.rename('bar')
+    foo.rename("bar")
 
 DataArray Coordinates
 ~~~~~~~~~~~~~~~~~~~~~
@@ -186,8 +201,8 @@ itself:
 
 .. ipython:: python
 
-    foo.coords['time']
-    foo['time']
+    foo.coords["time"]
+    foo["time"]
 
 These are also :py:class:`~xarray.DataArray` objects, which contain tick-labels
 for each dimension.
@@ -196,9 +211,9 @@ Coordinates can also be set or removed by using the dictionary like syntax:
 
 .. ipython:: python
 
-    foo['ranking'] = ('space', [1, 2, 3])
+    foo["ranking"] = ("space", [1, 2, 3])
     foo.coords
-    del foo['ranking']
+    del foo["ranking"]
     foo.coords
 
 For more details, see :ref:`coordinates` below.
@@ -276,12 +291,18 @@ Let's create some fake data for the example we show above:
 
     # for real use cases, its good practice to supply array attributes such as
     # units, but we won't bother here for the sake of brevity
-    ds = xr.Dataset({'temperature': (['x', 'y', 'time'],  temp),
-                     'precipitation': (['x', 'y', 'time'], precip)},
-                    coords={'lon': (['x', 'y'], lon),
-                            'lat': (['x', 'y'], lat),
-                            'time': pd.date_range('2014-09-06', periods=3),
-                            'reference_time': pd.Timestamp('2014-09-05')})
+    ds = xr.Dataset(
+        {
+            "temperature": (["x", "y", "time"], temp),
+            "precipitation": (["x", "y", "time"], precip),
+        },
+        coords={
+            "lon": (["x", "y"], lon),
+            "lat": (["x", "y"], lat),
+            "time": pd.date_range("2014-09-06", periods=3),
+            "reference_time": pd.Timestamp("2014-09-05"),
+        },
+    )
     ds
 
 Here we pass :py:class:`xarray.DataArray` objects or a pandas object as values
@@ -289,12 +310,12 @@ in the dictionary:
 
 .. ipython:: python
 
-    xr.Dataset({'bar': foo})
+    xr.Dataset({"bar": foo})
 
 
 .. ipython:: python
 
-    xr.Dataset({'bar': foo.to_pandas()})
+    xr.Dataset({"bar": foo.to_pandas()})
 
 Where a pandas object is supplied as a value, the names of its indexes are used as dimension
 names, and its data is aligned to any existing dimensions.
@@ -315,8 +336,8 @@ values given by :py:class:`xarray.DataArray` objects:
 
 .. ipython:: python
 
-    'temperature' in ds
-    ds['temperature']
+    "temperature" in ds
+    ds["temperature"]
 
 Valid keys include each listed coordinate and data variable.
 
@@ -336,7 +357,7 @@ of `attributes`:
 
     ds.attrs
 
-    ds.attrs['title'] = 'example attribute'
+    ds.attrs["title"] = "example attribute"
     ds
 
 xarray does not enforce any restrictions on attributes, but serialization to
@@ -364,13 +385,13 @@ example, to create this example dataset from scratch, we could have written:
 .. ipython:: python
 
     ds = xr.Dataset()
-    ds['temperature'] = (('x', 'y', 'time'), temp)
-    ds['temperature_double'] = (('x', 'y', 'time'), temp * 2 )
-    ds['precipitation'] = (('x', 'y', 'time'), precip)
-    ds.coords['lat'] = (('x', 'y'), lat)
-    ds.coords['lon'] = (('x', 'y'), lon)
-    ds.coords['time'] = pd.date_range('2014-09-06', periods=3)
-    ds.coords['reference_time'] = pd.Timestamp('2014-09-05')
+    ds["temperature"] = (("x", "y", "time"), temp)
+    ds["temperature_double"] = (("x", "y", "time"), temp * 2)
+    ds["precipitation"] = (("x", "y", "time"), precip)
+    ds.coords["lat"] = (("x", "y"), lat)
+    ds.coords["lon"] = (("x", "y"), lon)
+    ds.coords["time"] = pd.date_range("2014-09-06", periods=3)
+    ds.coords["reference_time"] = pd.Timestamp("2014-09-05")
 
 To change the variables in a ``Dataset``, you can use all the standard dictionary
 methods, including ``values``, ``items``, ``__delitem__``, ``get`` and
@@ -400,16 +421,16 @@ operations keep around coordinates:
 
 .. ipython:: python
 
-    ds[['temperature']]
-    ds[['temperature', 'temperature_double']]
-    ds.drop_vars('temperature')
+    ds[["temperature"]]
+    ds[["temperature", "temperature_double"]]
+    ds.drop_vars("temperature")
 
 To remove a dimension, you can use :py:meth:`~xarray.Dataset.drop_dims` method.
 Any variables using that dimension are dropped:
 
 .. ipython:: python
 
-    ds.drop_dims('time')
+    ds.drop_dims("time")
 
 As an alternate to dictionary-like modifications, you can use
 :py:meth:`~xarray.Dataset.assign` and :py:meth:`~xarray.Dataset.assign_coords`.
@@ -417,7 +438,7 @@ These methods return a new dataset with additional (or replaced) values:
 
 .. ipython:: python
 
-    ds.assign(temperature2 = 2 * ds.temperature)
+    ds.assign(temperature2=2 * ds.temperature)
 
 There is also the :py:meth:`~xarray.Dataset.pipe` method that allows you to use
 a method call with an external function (e.g., ``ds.pipe(func)``) instead of
@@ -429,12 +450,8 @@ follow nested function calls:
 
     # these lines are equivalent, but with pipe we can make the logic flow
     # entirely from left to right
-    plt.plot((2 * ds.temperature.sel(x=0)).mean('y'))
-    (ds.temperature
-     .sel(x=0)
-     .pipe(lambda x: 2 * x)
-     .mean('y')
-     .pipe(plt.plot))
+    plt.plot((2 * ds.temperature.sel(x=0)).mean("y"))
+    (ds.temperature.sel(x=0).pipe(lambda x: 2 * x).mean("y").pipe(plt.plot))
 
 Both ``pipe`` and ``assign`` replicate the pandas methods of the same names
 (:py:meth:`DataFrame.pipe <pandas.DataFrame.pipe>` and
@@ -453,15 +470,15 @@ dataset variables:
 
 .. ipython:: python
 
-    ds.rename({'temperature': 'temp', 'precipitation': 'precip'})
+    ds.rename({"temperature": "temp", "precipitation": "precip"})
 
 The related :py:meth:`~xarray.Dataset.swap_dims` method allows you do to swap
 dimension and non-dimension variables:
 
 .. ipython:: python
 
-    ds.coords['day'] = ('time', [6, 7, 8])
-    ds.swap_dims({'time': 'day'})
+    ds.coords["day"] = ("time", [6, 7, 8])
+    ds.swap_dims({"time": "day"})
 
 .. _coordinates:
 
@@ -519,8 +536,8 @@ To convert back and forth between data and coordinates, you can use the
 .. ipython:: python
 
     ds.reset_coords()
-    ds.set_coords(['temperature', 'precipitation'])
-    ds['temperature'].reset_coords(drop=True)
+    ds.set_coords(["temperature", "precipitation"])
+    ds["temperature"].reset_coords(drop=True)
 
 Notice that these operations skip coordinates with names given by dimensions,
 as used for indexing. This mostly because we are not entirely sure how to
@@ -544,7 +561,7 @@ logic used for merging coordinates in arithmetic operations
 
 .. ipython:: python
 
-    alt = xr.Dataset(coords={'z': [10], 'lat': 0, 'lon': 0})
+    alt = xr.Dataset(coords={"z": [10], "lat": 0, "lon": 0})
     ds.coords.merge(alt.coords)
 
 The ``coords.merge`` method may be useful if you want to implement your own
@@ -560,7 +577,7 @@ To convert a coordinate (or any ``DataArray``) into an actual
 
 .. ipython:: python
 
-    ds['time'].to_index()
+    ds["time"].to_index()
 
 A useful shortcut is the ``indexes`` property (on both ``DataArray`` and
 ``Dataset``), which lazily constructs a dictionary whose keys are given by each
@@ -577,9 +594,10 @@ Xarray supports labeling coordinate values with a :py:class:`pandas.MultiIndex`:
 
 .. ipython:: python
 
-    midx = pd.MultiIndex.from_arrays([['R', 'R', 'V', 'V'], [.1, .2, .7, .9]],
-                                     names=('band', 'wn'))
-    mda = xr.DataArray(np.random.rand(4), coords={'spec': midx}, dims='spec')
+    midx = pd.MultiIndex.from_arrays(
+        [["R", "R", "V", "V"], [0.1, 0.2, 0.7, 0.9]], names=("band", "wn")
+    )
+    mda = xr.DataArray(np.random.rand(4), coords={"spec": midx}, dims="spec")
     mda
 
 For convenience multi-index levels are directly accessible as "virtual" or
@@ -587,8 +605,8 @@ For convenience multi-index levels are directly accessible as "virtual" or
 
 .. ipython:: python
 
-     mda['band']
-     mda.wn
+    mda["band"]
+    mda.wn
 
 Indexing with multi-index levels is also possible using the ``sel`` method
 (see :ref:`multi-level indexing`).
diff --git a/doc/faq.rst b/doc/faq.rst
index 576cec5c2b1..a2b8be47e06 100644
--- a/doc/faq.rst
+++ b/doc/faq.rst
@@ -4,11 +4,12 @@ Frequently Asked Questions
 ==========================
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
     import numpy as np
     import pandas as pd
     import xarray as xr
+
     np.random.seed(123456)
 
 
@@ -103,21 +104,21 @@ code fragment
 .. ipython:: python
 
     arr = xr.DataArray([1, 2, 3])
-    pd.Series({'x': arr[0], 'mean': arr.mean(), 'std': arr.std()})
+    pd.Series({"x": arr[0], "mean": arr.mean(), "std": arr.std()})
 
 does not yield the pandas DataFrame we expected. We need to specify the type
 conversion ourselves:
 
 .. ipython:: python
 
-    pd.Series({'x': arr[0], 'mean': arr.mean(), 'std': arr.std()}, dtype=float)
+    pd.Series({"x": arr[0], "mean": arr.mean(), "std": arr.std()}, dtype=float)
 
 Alternatively, we could use the ``item`` method or the ``float`` constructor to
 convert values one at a time
 
 .. ipython:: python
 
-    pd.Series({'x': arr[0].item(), 'mean': float(arr.mean())})
+    pd.Series({"x": arr[0].item(), "mean": float(arr.mean())})
 
 
 .. _approach to metadata:
diff --git a/doc/groupby.rst b/doc/groupby.rst
index 223185bd0d5..c72a26c45ea 100644
--- a/doc/groupby.rst
+++ b/doc/groupby.rst
@@ -26,11 +26,12 @@ Split
 Let's create a simple example dataset:
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
     import numpy as np
     import pandas as pd
     import xarray as xr
+
     np.random.seed(123456)
 
 .. ipython:: python
@@ -47,20 +48,20 @@ use a DataArray directly), we get back a ``GroupBy`` object:
 
 .. ipython:: python
 
-    ds.groupby('letters')
+    ds.groupby("letters")
 
 This object works very similarly to a pandas GroupBy object. You can view
 the group indices with the ``groups`` attribute:
 
 .. ipython:: python
 
-    ds.groupby('letters').groups
+    ds.groupby("letters").groups
 
 You can also iterate over groups in ``(label, group)`` pairs:
 
 .. ipython:: python
 
-    list(ds.groupby('letters'))
+    list(ds.groupby("letters"))
 
 Just like in pandas, creating a GroupBy object is cheap: it does not actually
 split the data until you access particular values.
@@ -75,8 +76,8 @@ a customized coordinate, but xarray facilitates this via the
 
 .. ipython:: python
 
-    x_bins = [0,25,50]
-    ds.groupby_bins('x', x_bins).groups
+    x_bins = [0, 25, 50]
+    ds.groupby_bins("x", x_bins).groups
 
 The binning is implemented via :func:`pandas.cut`, whose documentation details how
 the bins are assigned. As seen in the example above, by default, the bins are
@@ -86,8 +87,8 @@ choose `float` labels which identify the bin centers:
 
 .. ipython:: python
 
-    x_bin_labels = [12.5,37.5]
-    ds.groupby_bins('x', x_bins, labels=x_bin_labels).groups
+    x_bin_labels = [12.5, 37.5]
+    ds.groupby_bins("x", x_bins, labels=x_bin_labels).groups
 
 
 Apply
@@ -102,7 +103,8 @@ concatenated back together along the group axis:
     def standardize(x):
         return (x - x.mean()) / x.std()
 
-    arr.groupby('letters').map(standardize)
+
+    arr.groupby("letters").map(standardize)
 
 GroupBy objects also have a :py:meth:`~xarray.core.groupby.DatasetGroupBy.reduce` method and
 methods like :py:meth:`~xarray.core.groupby.DatasetGroupBy.mean` as shortcuts for applying an
@@ -110,14 +112,14 @@ aggregation function:
 
 .. ipython:: python
 
-    arr.groupby('letters').mean(dim='x')
+    arr.groupby("letters").mean(dim="x")
 
 Using a groupby is thus also a convenient shortcut for aggregating over all
 dimensions *other than* the provided one:
 
 .. ipython:: python
 
-    ds.groupby('x').std(...)
+    ds.groupby("x").std(...)
 
 .. note::
 
@@ -134,7 +136,7 @@ values for group along the grouped dimension:
 
 .. ipython:: python
 
-    ds.groupby('letters').first(...)
+    ds.groupby("letters").first(...)
 
 By default, they skip missing values (control this with ``skipna``).
 
@@ -149,9 +151,9 @@ coordinates. For example:
 
 .. ipython:: python
 
-    alt = arr.groupby('letters').mean(...)
+    alt = arr.groupby("letters").mean(...)
     alt
-    ds.groupby('letters') - alt
+    ds.groupby("letters") - alt
 
 This last line is roughly equivalent to the following::
 
@@ -169,11 +171,11 @@ the ``squeeze`` parameter:
 
 .. ipython:: python
 
-    next(iter(arr.groupby('x')))
+    next(iter(arr.groupby("x")))
 
 .. ipython:: python
 
-    next(iter(arr.groupby('x', squeeze=False)))
+    next(iter(arr.groupby("x", squeeze=False)))
 
 Although xarray will attempt to automatically
 :py:attr:`~xarray.DataArray.transpose` dimensions back into their original order
@@ -197,13 +199,17 @@ __ http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dimen
 
 .. ipython:: python
 
-    da = xr.DataArray([[0,1],[2,3]],
-        coords={'lon': (['ny','nx'], [[30,40],[40,50]] ),
-                'lat': (['ny','nx'], [[10,10],[20,20]] ),},
-        dims=['ny','nx'])
+    da = xr.DataArray(
+        [[0, 1], [2, 3]],
+        coords={
+            "lon": (["ny", "nx"], [[30, 40], [40, 50]]),
+            "lat": (["ny", "nx"], [[10, 10], [20, 20]]),
+        },
+        dims=["ny", "nx"],
+    )
     da
-    da.groupby('lon').sum(...)
-    da.groupby('lon').map(lambda x: x - x.mean(), shortcut=False)
+    da.groupby("lon").sum(...)
+    da.groupby("lon").map(lambda x: x - x.mean(), shortcut=False)
 
 Because multidimensional groups have the ability to generate a very large
 number of bins, coarse-binning via :py:meth:`~xarray.Dataset.groupby_bins`
@@ -211,7 +217,7 @@ may be desirable:
 
 .. ipython:: python
 
-    da.groupby_bins('lon', [0,45,50]).sum()
+    da.groupby_bins("lon", [0, 45, 50]).sum()
 
 These methods group by `lon` values. It is also possible to groupby each
 cell in a grid, regardless of value, by stacking multiple dimensions, 
@@ -219,5 +225,5 @@ applying your function, and then unstacking the result:
 
 .. ipython:: python
 
-   stacked = da.stack(gridcell=['ny', 'nx'])
-   stacked.groupby('gridcell').sum(...).unstack('gridcell')
+    stacked = da.stack(gridcell=["ny", "nx"])
+    stacked.groupby("gridcell").sum(...).unstack("gridcell")
\ No newline at end of file
diff --git a/doc/indexing.rst b/doc/indexing.rst
index cfbb84a8343..af8e44fb80b 100644
--- a/doc/indexing.rst
+++ b/doc/indexing.rst
@@ -4,11 +4,12 @@ Indexing and selecting data
 ===========================
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
     import numpy as np
     import pandas as pd
     import xarray as xr
+
     np.random.seed(123456)
 
 xarray offers extremely flexible indexing routines that combine the best
@@ -60,9 +61,13 @@ DataArray:
 
 .. ipython:: python
 
-    da = xr.DataArray(np.random.rand(4, 3),
-                      [('time', pd.date_range('2000-01-01', periods=4)),
-                       ('space', ['IA', 'IL', 'IN'])])
+    da = xr.DataArray(
+        np.random.rand(4, 3),
+        [
+            ("time", pd.date_range("2000-01-01", periods=4)),
+            ("space", ["IA", "IL", "IN"]),
+        ],
+    )
     da[:2]
     da[0, 0]
     da[:, [2, 1]]
@@ -81,7 +86,7 @@ fast. To do label based indexing, use the :py:attr:`~xarray.DataArray.loc` attri
 
 .. ipython:: python
 
-    da.loc['2000-01-01':'2000-01-02', 'IA']
+    da.loc["2000-01-01":"2000-01-02", "IA"]
 
 In this example, the selected is a subpart of the array
 in the range '2000-01-01':'2000-01-02' along the first coordinate `time`
@@ -98,7 +103,7 @@ Setting values with label based indexing is also supported:
 
 .. ipython:: python
 
-    da.loc['2000-01-01', ['IL', 'IN']] = -10
+    da.loc["2000-01-01", ["IL", "IN"]] = -10
     da
 
 
@@ -117,7 +122,7 @@ use them explicitly to slice data. There are two ways to do this:
         da[dict(space=0, time=slice(None, 2))]
 
         # index by dimension coordinate labels
-        da.loc[dict(time=slice('2000-01-01', '2000-01-02'))]
+        da.loc[dict(time=slice("2000-01-01", "2000-01-02"))]
 
 2. Use the :py:meth:`~xarray.DataArray.sel` and :py:meth:`~xarray.DataArray.isel`
    convenience methods:
@@ -128,7 +133,7 @@ use them explicitly to slice data. There are two ways to do this:
         da.isel(space=0, time=slice(None, 2))
 
         # index by dimension coordinate labels
-        da.sel(time=slice('2000-01-01', '2000-01-02'))
+        da.sel(time=slice("2000-01-01", "2000-01-02"))
 
 The arguments to these methods can be any objects that could index the array
 along the dimension given by the keyword, e.g., labels for an individual value,
@@ -156,16 +161,16 @@ enabling nearest neighbor (inexact) lookups by use of the methods ``'pad'``,
 
 .. ipython:: python
 
-   da = xr.DataArray([1, 2, 3], [('x', [0, 1, 2])])
-   da.sel(x=[1.1, 1.9], method='nearest')
-   da.sel(x=0.1, method='backfill')
-   da.reindex(x=[0.5, 1, 1.5, 2, 2.5], method='pad')
+    da = xr.DataArray([1, 2, 3], [("x", [0, 1, 2])])
+    da.sel(x=[1.1, 1.9], method="nearest")
+    da.sel(x=0.1, method="backfill")
+    da.reindex(x=[0.5, 1, 1.5, 2, 2.5], method="pad")
 
 Tolerance limits the maximum distance for valid matches with an inexact lookup:
 
 .. ipython:: python
 
-   da.reindex(x=[1.1, 1.5], method='nearest', tolerance=0.2)
+    da.reindex(x=[1.1, 1.5], method="nearest", tolerance=0.2)
 
 The method parameter is not yet supported if any of the arguments
 to ``.sel()`` is a ``slice`` object:
@@ -173,7 +178,7 @@ to ``.sel()`` is a ``slice`` object:
 .. ipython::
    :verbatim:
 
-   In [1]: da.sel(x=slice(1, 3), method='nearest')
+   In [1]: da.sel(x=slice(1, 3), method="nearest")
    NotImplementedError
 
 However, you don't need to use ``method`` to do inexact slicing. Slicing
@@ -182,15 +187,15 @@ labels are monotonic increasing:
 
 .. ipython:: python
 
-   da.sel(x=slice(0.9, 3.1))
+    da.sel(x=slice(0.9, 3.1))
 
 Indexing axes with monotonic decreasing labels also works, as long as the
 ``slice`` or ``.loc`` arguments are also decreasing:
 
 .. ipython:: python
 
-   reversed_da = da[::-1]
-   reversed_da.loc[3.1:0.9]
+    reversed_da = da[::-1]
+    reversed_da.loc[3.1:0.9]
 
 
 .. note::
@@ -227,7 +232,7 @@ arrays). However, you can do normal indexing with dimension names:
 .. ipython:: python
 
     ds[dict(space=[0], time=[0])]
-    ds.loc[dict(time='2000-01-01')]
+    ds.loc[dict(time="2000-01-01")]
 
 Using indexing to *assign* values to a subset of dataset (e.g.,
 ``ds[dict(space=0)] = 1``) is not yet supported.
@@ -240,7 +245,7 @@ index labels along a dimension dropped:
 
 .. ipython:: python
 
-    ds.drop_sel(space=['IN', 'IL'])
+    ds.drop_sel(space=["IN", "IL"])
 
 ``drop_sel`` is both a ``Dataset`` and ``DataArray`` method.
 
@@ -249,7 +254,7 @@ Any variables with these dimensions are also dropped:
 
 .. ipython:: python
 
-    ds.drop_dims('time')
+    ds.drop_dims("time")
 
 .. _masking with where:
 
@@ -263,7 +268,7 @@ xarray, use :py:meth:`~xarray.DataArray.where`:
 
 .. ipython:: python
 
-    da = xr.DataArray(np.arange(16).reshape(4, 4), dims=['x', 'y'])
+    da = xr.DataArray(np.arange(16).reshape(4, 4), dims=["x", "y"])
     da.where(da.x + da.y < 4)
 
 This is particularly useful for ragged indexing of multi-dimensional data,
@@ -296,7 +301,7 @@ multiple values, use :py:meth:`~xarray.DataArray.isin`:
 
 .. ipython:: python
 
-    da = xr.DataArray([1, 2, 3, 4, 5], dims=['x'])
+    da = xr.DataArray([1, 2, 3, 4, 5], dims=["x"])
     da.isin([2, 4])
 
 :py:meth:`~xarray.DataArray.isin` works particularly well with
@@ -305,7 +310,7 @@ already labels of an array:
 
 .. ipython:: python
 
-    lookup = xr.DataArray([-1, -2, -3, -4, -5], dims=['x'])
+    lookup = xr.DataArray([-1, -2, -3, -4, -5], dims=["x"])
     da.where(lookup.isin([-2, -4]), drop=True)
 
 However, some caution is in order: when done repeatedly, this type of indexing
@@ -328,7 +333,6 @@ MATLAB, or after using the :py:func:`numpy.ix_` helper:
 
 .. ipython:: python
 
-
     da = xr.DataArray(
         np.arange(12).reshape((3, 4)),
         dims=["x", "y"],
@@ -344,8 +348,8 @@ dimensions:
 
 .. ipython:: python
 
-    ind_x = xr.DataArray([0, 1], dims=['x'])
-    ind_y = xr.DataArray([0, 1], dims=['y'])
+    ind_x = xr.DataArray([0, 1], dims=["x"])
+    ind_y = xr.DataArray([0, 1], dims=["y"])
     da[ind_x, ind_y]  # orthogonal indexing
     da[ind_x, ind_x]  # vectorized indexing
 
@@ -364,7 +368,7 @@ indexers' dimension:
 
 .. ipython:: python
 
-    ind = xr.DataArray([[0, 1], [0, 1]], dims=['a', 'b'])
+    ind = xr.DataArray([[0, 1], [0, 1]], dims=["a", "b"])
     da[ind]
 
 Similar to how NumPy's `advanced indexing`_ works, vectorized
@@ -378,18 +382,18 @@ Vectorized indexing also works with ``isel``, ``loc``, and ``sel``:
 
 .. ipython:: python
 
-    ind = xr.DataArray([[0, 1], [0, 1]], dims=['a', 'b'])
+    ind = xr.DataArray([[0, 1], [0, 1]], dims=["a", "b"])
     da.isel(y=ind)  # same as da[:, ind]
 
-    ind = xr.DataArray([['a', 'b'], ['b', 'a']], dims=['a', 'b'])
+    ind = xr.DataArray([["a", "b"], ["b", "a"]], dims=["a", "b"])
     da.loc[:, ind]  # same as da.sel(y=ind)
 
 These methods may also be applied to ``Dataset`` objects
 
 .. ipython:: python
 
-    ds = da.to_dataset(name='bar')
-    ds.isel(x=xr.DataArray([0, 1, 2], dims=['points']))
+    ds = da.to_dataset(name="bar")
+    ds.isel(x=xr.DataArray([0, 1, 2], dims=["points"]))
 
 .. tip::
 
@@ -476,8 +480,8 @@ Like ``numpy.ndarray``, value assignment sometimes works differently from what o
 
 .. ipython:: python
 
-    da = xr.DataArray([0, 1, 2, 3], dims=['x'])
-    ind = xr.DataArray([0, 0, 0], dims=['x'])
+    da = xr.DataArray([0, 1, 2, 3], dims=["x"])
+    ind = xr.DataArray([0, 0, 0], dims=["x"])
     da[ind] -= 1
     da
 
@@ -511,7 +515,7 @@ __ https://docs.scipy.org/doc/numpy/user/basics.indexing.html#assigning-values-t
 
   .. ipython:: python
 
-      da = xr.DataArray([0, 1, 2, 3], dims=['x'])
+      da = xr.DataArray([0, 1, 2, 3], dims=["x"])
       # DO NOT do this
       da.isel(x=[0, 1, 2])[1] = -1
       da
@@ -581,15 +585,15 @@ To reindex a particular dimension, use :py:meth:`~xarray.DataArray.reindex`:
 
 .. ipython:: python
 
-    da.reindex(space=['IA', 'CA'])
+    da.reindex(space=["IA", "CA"])
 
 The :py:meth:`~xarray.DataArray.reindex_like` method is a useful shortcut.
 To demonstrate, we will make a subset DataArray with new values:
 
 .. ipython:: python
 
-    foo = da.rename('foo')
-    baz = (10 * da[:2, :2]).rename('baz')
+    foo = da.rename("foo")
+    baz = (10 * da[:2, :2]).rename("baz")
     baz
 
 Reindexing ``foo`` with ``baz`` selects out the first two values along each
@@ -611,8 +615,8 @@ The :py:func:`~xarray.align` function lets us perform more flexible database-lik
 
 .. ipython:: python
 
-    xr.align(foo, baz, join='inner')
-    xr.align(foo, baz, join='outer')
+    xr.align(foo, baz, join="inner")
+    xr.align(foo, baz, join="outer")
 
 Both ``reindex_like`` and ``align`` work interchangeably between
 :py:class:`~xarray.DataArray` and :py:class:`~xarray.Dataset` objects, and with any number of matching dimension names:
@@ -621,7 +625,7 @@ Both ``reindex_like`` and ``align`` work interchangeably between
 
     ds
     ds.reindex_like(baz)
-    other = xr.DataArray(['a', 'b', 'c'], dims='other')
+    other = xr.DataArray(["a", "b", "c"], dims="other")
     # this is a no-op, because there are no shared dimension names
     ds.reindex_like(other)
 
@@ -636,7 +640,7 @@ integer-based indexing as a fallback for dimensions without a coordinate label:
 
 .. ipython:: python
 
-    da = xr.DataArray([1, 2, 3], dims='x')
+    da = xr.DataArray([1, 2, 3], dims="x")
     da.sel(x=[0, -1])
 
 Alignment between xarray objects where one or both do not have coordinate labels
@@ -675,9 +679,9 @@ labels:
 
 .. ipython:: python
 
-    da = xr.DataArray([1, 2, 3], dims='x')
+    da = xr.DataArray([1, 2, 3], dims="x")
     da
-    da.get_index('x')
+    da.get_index("x")
 
 
 .. _copies_vs_views:
@@ -721,7 +725,6 @@ pandas:
 
 .. ipython:: python
 
-
     midx = pd.MultiIndex.from_product([list("abc"), [0, 1]], names=("one", "two"))
     mda = xr.DataArray(np.random.rand(6, 3), [("x", midx), ("y", range(3))])
     mda
@@ -732,20 +735,20 @@ a slice of tuples:
 
 .. ipython:: python
 
-    mda.sel(x=[('a', 0), ('b', 1)])
+    mda.sel(x=[("a", 0), ("b", 1)])
 
 Additionally, xarray supports dictionaries:
 
 .. ipython:: python
 
-    mda.sel(x={'one': 'a', 'two': 0})
+    mda.sel(x={"one": "a", "two": 0})
 
 For convenience, ``sel`` also accepts multi-index levels directly
 as keyword arguments:
 
 .. ipython:: python
 
-    mda.sel(one='a', two=0)
+    mda.sel(one="a", two=0)
 
 Note that using ``sel`` it is not possible to mix a dimension
 indexer with level indexers for that dimension
@@ -757,7 +760,7 @@ multi-index is reduced to a single index.
 
 .. ipython:: python
 
-    mda.loc[{'one': 'a'}, ...]
+    mda.loc[{"one": "a"}, ...]
 
 Unlike pandas, xarray does not guess whether you provide index levels or
 dimensions when using ``loc`` in some ambiguous cases. For example, for
diff --git a/doc/internals.rst b/doc/internals.rst
index a4870f2316a..46c117e312b 100644
--- a/doc/internals.rst
+++ b/doc/internals.rst
@@ -46,11 +46,12 @@ Extending xarray
 ----------------
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
     import numpy as np
     import pandas as pd
     import xarray as xr
+
     np.random.seed(123456)
 
 xarray is designed as a general purpose library, and hence tries to avoid
@@ -87,11 +88,12 @@ defined that returns an instance of your class:
 
 .. code-block:: python
 
-  class Dataset:
-      ...
-      @property
-      def geo(self)
-          return GeoAccessor(self)
+    class Dataset:
+        ...
+
+        @property
+        def geo(self):
+            return GeoAccessor(self)
 
 However, using the register accessor decorators is preferable to simply adding
 your own ad-hoc property (i.e., ``Dataset.geo = property(...)``), for several
@@ -116,14 +118,13 @@ reasons:
 Back in an interactive IPython session, we can use these properties:
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
-   exec(open("examples/_code/accessor_example.py").read())
+    exec(open("examples/_code/accessor_example.py").read())
 
 .. ipython:: python
 
-    ds = xr.Dataset({'longitude': np.linspace(0, 10),
-                     'latitude': np.linspace(0, 20)})
+    ds = xr.Dataset({"longitude": np.linspace(0, 10), "latitude": np.linspace(0, 20)})
     ds.geo.center
     ds.geo.plot()
 
@@ -137,3 +138,54 @@ To help users keep things straight, please `let us know
 <https://github.com/pydata/xarray/issues>`_ if you plan to write a new accessor
 for an open source library. In the future, we will maintain a list of accessors
 and the libraries that implement them on this page.
+
+.. _zarr_encoding:
+
+Zarr Encoding Specification
+---------------------------
+
+In implementing support for the `Zarr <https://zarr.readthedocs.io/>`_ storage
+format, Xarray developers made some *ad hoc* choices about how to store
+NetCDF data in Zarr.
+Future versions of the Zarr spec will likely include a more formal convention
+for the storage of the NetCDF data model in Zarr; see
+`Zarr spec repo <https://github.com/zarr-developers/zarr-specs>`_ for ongoing
+discussion.
+
+First, Xarray can only read and write Zarr groups. There is currently no support
+for reading / writting individual Zarr arrays. Zarr groups are mapped to
+Xarray ``Dataset`` objects.
+
+Second, from Xarray's point of view, the key difference between
+NetCDF and Zarr is that all NetCDF arrays have *dimension names* while Zarr
+arrays do not. Therefore, in order to store NetCDF data in Zarr, Xarray must
+somehow encode and decode the name of each array's dimensions.
+
+To accomplish this, Xarray developers decided to define a special Zarr array
+attribute: ``_ARRAY_DIMENSIONS``. The value of this attribute is a list of
+dimension names (strings), for example ``["time", "lon", "lat"]``. When writing
+data to Zarr, Xarray sets this attribute on all variables based on the variable
+dimensions. When reading a Zarr group, Xarray looks for this attribute on all
+arrays, raising an error if it can't be found. The attribute is used to define
+the variable dimension names and then removed from the attributes dictionary
+returned to the user.
+
+Because of these choices, Xarray cannot read arbitrary array data, but only
+Zarr data with valid ``_ARRAY_DIMENSIONS`` attributes on each array.
+
+After decoding the ``_ARRAY_DIMENSIONS`` attribute and assigning the variable
+dimensions, Xarray proceeds to [optionally] decode each variable using its
+standard CF decoding machinery used for NetCDF data (see :py:func:`decode_cf`).
+
+As a concrete example, here we write a tutorial dataset to Zarr and then
+re-open it directly with Zarr:
+
+.. ipython:: python
+
+    ds = xr.tutorial.load_dataset("rasm")
+    ds.to_zarr("rasm.zarr", mode="w")
+    import zarr
+
+    zgroup = zarr.open("rasm.zarr")
+    print(zgroup.tree())
+    dict(zgroup["Tair"].attrs)
\ No newline at end of file
diff --git a/doc/interpolation.rst b/doc/interpolation.rst
index 4cf39807e5a..c2922813e15 100644
--- a/doc/interpolation.rst
+++ b/doc/interpolation.rst
@@ -4,11 +4,12 @@ Interpolating data
 ==================
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
     import numpy as np
     import pandas as pd
     import xarray as xr
+
     np.random.seed(123456)
 
 xarray offers flexible interpolation routines, which have a similar interface
@@ -27,9 +28,10 @@ indexing of a :py:class:`~xarray.DataArray`,
 
 .. ipython:: python
 
-    da = xr.DataArray(np.sin(0.3 * np.arange(12).reshape(4, 3)),
-                      [('time', np.arange(4)),
-                       ('space', [0.1, 0.2, 0.3])])
+    da = xr.DataArray(
+        np.sin(0.3 * np.arange(12).reshape(4, 3)),
+        [("time", np.arange(4)), ("space", [0.1, 0.2, 0.3])],
+    )
     # label lookup
     da.sel(time=3)
 
@@ -52,16 +54,17 @@ To interpolate data with a :py:doc:`numpy.datetime64 <reference/arrays.datetime>
 
 .. ipython:: python
 
-    da_dt64 = xr.DataArray([1, 3],
-                           [('time', pd.date_range('1/1/2000', '1/3/2000', periods=2))])
-    da_dt64.interp(time='2000-01-02')
+    da_dt64 = xr.DataArray(
+        [1, 3], [("time", pd.date_range("1/1/2000", "1/3/2000", periods=2))]
+    )
+    da_dt64.interp(time="2000-01-02")
 
 The interpolated data can be merged into the original :py:class:`~xarray.DataArray`
 by specifying the time periods required.
 
 .. ipython:: python
 
-    da_dt64.interp(time=pd.date_range('1/1/2000', '1/3/2000', periods=3))
+    da_dt64.interp(time=pd.date_range("1/1/2000", "1/3/2000", periods=3))
 
 Interpolation of data indexed by a :py:class:`~xarray.CFTimeIndex` is also
 allowed.  See :ref:`CFTimeIndex` for examples.
@@ -108,9 +111,10 @@ different coordinates,
 
 .. ipython:: python
 
-  other = xr.DataArray(np.sin(0.4 * np.arange(9).reshape(3, 3)),
-                       [('time', [0.9, 1.9, 2.9]),
-                       ('space', [0.15, 0.25, 0.35])])
+    other = xr.DataArray(
+        np.sin(0.4 * np.arange(9).reshape(3, 3)),
+        [("time", [0.9, 1.9, 2.9]), ("space", [0.15, 0.25, 0.35])],
+    )
 
 it might be a good idea to first interpolate ``da`` so that it will stay on the
 same coordinates of ``other``, and then subtract it.
@@ -118,9 +122,9 @@ same coordinates of ``other``, and then subtract it.
 
 .. ipython:: python
 
-  # interpolate da along other's coordinates
-  interpolated = da.interp_like(other)
-  interpolated
+    # interpolate da along other's coordinates
+    interpolated = da.interp_like(other)
+    interpolated
 
 It is now possible to safely compute the difference ``other - interpolated``.
 
@@ -135,12 +139,15 @@ The interpolation method can be specified by the optional ``method`` argument.
 
 .. ipython:: python
 
-    da = xr.DataArray(np.sin(np.linspace(0, 2 * np.pi, 10)), dims='x',
-                      coords={'x': np.linspace(0, 1, 10)})
+    da = xr.DataArray(
+        np.sin(np.linspace(0, 2 * np.pi, 10)),
+        dims="x",
+        coords={"x": np.linspace(0, 1, 10)},
+    )
 
-    da.plot.line('o', label='original')
-    da.interp(x=np.linspace(0, 1, 100)).plot.line(label='linear (default)')
-    da.interp(x=np.linspace(0, 1, 100), method='cubic').plot.line(label='cubic')
+    da.plot.line("o", label="original")
+    da.interp(x=np.linspace(0, 1, 100)).plot.line(label="linear (default)")
+    da.interp(x=np.linspace(0, 1, 100), method="cubic").plot.line(label="cubic")
     @savefig interpolation_sample1.png width=4in
     plt.legend()
 
@@ -149,15 +156,16 @@ Additional keyword arguments can be passed to scipy's functions.
 .. ipython:: python
 
     # fill 0 for the outside of the original coordinates.
-    da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={'fill_value': 0.0})
+    da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={"fill_value": 0.0})
     # 1-dimensional extrapolation
-    da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={'fill_value': 'extrapolate'})
+    da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={"fill_value": "extrapolate"})
     # multi-dimensional extrapolation
-    da = xr.DataArray(np.sin(0.3 * np.arange(12).reshape(4, 3)),
-                      [('time', np.arange(4)),
-                       ('space', [0.1, 0.2, 0.3])])
+    da = xr.DataArray(
+        np.sin(0.3 * np.arange(12).reshape(4, 3)),
+        [("time", np.arange(4)), ("space", [0.1, 0.2, 0.3])],
+    )
 
-    da.interp(time=4, space=np.linspace(-0.1, 0.5, 10), kwargs={'fill_value': None})
+    da.interp(time=4, space=np.linspace(-0.1, 0.5, 10), kwargs={"fill_value": None})
 
 
 Advanced Interpolation
@@ -181,17 +189,18 @@ For example:
 
 .. ipython:: python
 
-    da = xr.DataArray(np.sin(0.3 * np.arange(20).reshape(5, 4)),
-                      [('x', np.arange(5)),
-                       ('y', [0.1, 0.2, 0.3, 0.4])])
+    da = xr.DataArray(
+        np.sin(0.3 * np.arange(20).reshape(5, 4)),
+        [("x", np.arange(5)), ("y", [0.1, 0.2, 0.3, 0.4])],
+    )
     # advanced indexing
-    x = xr.DataArray([0, 2, 4], dims='z')
-    y = xr.DataArray([0.1, 0.2, 0.3], dims='z')
+    x = xr.DataArray([0, 2, 4], dims="z")
+    y = xr.DataArray([0.1, 0.2, 0.3], dims="z")
     da.sel(x=x, y=y)
 
     # advanced interpolation
-    x = xr.DataArray([0.5, 1.5, 2.5], dims='z')
-    y = xr.DataArray([0.15, 0.25, 0.35], dims='z')
+    x = xr.DataArray([0.5, 1.5, 2.5], dims="z")
+    y = xr.DataArray([0.15, 0.25, 0.35], dims="z")
     da.interp(x=x, y=y)
 
 where values on the original coordinates
@@ -203,9 +212,8 @@ If you want to add a coordinate to the new dimension ``z``, you can supply
 
 .. ipython:: python
 
-    x = xr.DataArray([0.5, 1.5, 2.5], dims='z', coords={'z': ['a', 'b','c']})
-    y = xr.DataArray([0.15, 0.25, 0.35], dims='z',
-                     coords={'z': ['a', 'b','c']})
+    x = xr.DataArray([0.5, 1.5, 2.5], dims="z", coords={"z": ["a", "b", "c"]})
+    y = xr.DataArray([0.15, 0.25, 0.35], dims="z", coords={"z": ["a", "b", "c"]})
     da.interp(x=x, y=y)
 
 For the details of the advanced indexing,
@@ -224,19 +232,18 @@ while other methods such as ``cubic`` or ``quadratic`` return all NaN arrays.
 
 .. ipython:: python
 
-    da = xr.DataArray([0, 2, np.nan, 3, 3.25], dims='x',
-                      coords={'x': range(5)})
+    da = xr.DataArray([0, 2, np.nan, 3, 3.25], dims="x", coords={"x": range(5)})
     da.interp(x=[0.5, 1.5, 2.5])
-    da.interp(x=[0.5, 1.5, 2.5], method='cubic')
+    da.interp(x=[0.5, 1.5, 2.5], method="cubic")
 
 To avoid this, you can drop NaN by :py:meth:`~xarray.DataArray.dropna`, and
 then make the interpolation
 
 .. ipython:: python
 
-    dropped = da.dropna('x')
+    dropped = da.dropna("x")
     dropped
-    dropped.interp(x=[0.5, 1.5, 2.5], method='cubic')
+    dropped.interp(x=[0.5, 1.5, 2.5], method="cubic")
 
 If NaNs are distributed randomly in your multidimensional array,
 dropping all the columns containing more than one NaNs by
@@ -246,7 +253,7 @@ which is similar to :py:meth:`pandas.Series.interpolate`.
 
 .. ipython:: python
 
-    filled = da.interpolate_na(dim='x')
+    filled = da.interpolate_na(dim="x")
     filled
 
 This fills NaN by interpolating along the specified dimension.
@@ -254,7 +261,7 @@ After filling NaNs, you can interpolate:
 
 .. ipython:: python
 
-    filled.interp(x=[0.5, 1.5, 2.5], method='cubic')
+    filled.interp(x=[0.5, 1.5, 2.5], method="cubic")
 
 For the details of :py:meth:`~xarray.DataArray.interpolate_na`,
 see :ref:`Missing values <missing_values>`.
@@ -268,18 +275,18 @@ Let's see how :py:meth:`~xarray.DataArray.interp` works on real data.
 .. ipython:: python
 
     # Raw data
-    ds = xr.tutorial.open_dataset('air_temperature').isel(time=0)
+    ds = xr.tutorial.open_dataset("air_temperature").isel(time=0)
     fig, axes = plt.subplots(ncols=2, figsize=(10, 4))
     ds.air.plot(ax=axes[0])
-    axes[0].set_title('Raw data')
+    axes[0].set_title("Raw data")
 
     # Interpolated data
-    new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.dims['lon'] * 4)
-    new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.dims['lat'] * 4)
+    new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.dims["lon"] * 4)
+    new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.dims["lat"] * 4)
     dsi = ds.interp(lat=new_lat, lon=new_lon)
     dsi.air.plot(ax=axes[1])
     @savefig interpolation_sample3.png width=8in
-    axes[1].set_title('Interpolated data')
+    axes[1].set_title("Interpolated data")
 
 Our advanced interpolation can be used to remap the data to the new coordinate.
 Consider the new coordinates x and z on the two dimensional plane.
@@ -291,20 +298,23 @@ The remapping can be done as follows
     x = np.linspace(240, 300, 100)
     z = np.linspace(20, 70, 100)
     # relation between new and original coordinates
-    lat = xr.DataArray(z, dims=['z'], coords={'z': z})
-    lon = xr.DataArray((x[:, np.newaxis]-270)/np.cos(z*np.pi/180)+270,
-                       dims=['x', 'z'], coords={'x': x, 'z': z})
+    lat = xr.DataArray(z, dims=["z"], coords={"z": z})
+    lon = xr.DataArray(
+        (x[:, np.newaxis] - 270) / np.cos(z * np.pi / 180) + 270,
+        dims=["x", "z"],
+        coords={"x": x, "z": z},
+    )
 
     fig, axes = plt.subplots(ncols=2, figsize=(10, 4))
     ds.air.plot(ax=axes[0])
     # draw the new coordinate on the original coordinates.
     for idx in [0, 33, 66, 99]:
-        axes[0].plot(lon.isel(x=idx), lat, '--k')
+        axes[0].plot(lon.isel(x=idx), lat, "--k")
     for idx in [0, 33, 66, 99]:
-        axes[0].plot(*xr.broadcast(lon.isel(z=idx), lat.isel(z=idx)), '--k')
-    axes[0].set_title('Raw data')
+        axes[0].plot(*xr.broadcast(lon.isel(z=idx), lat.isel(z=idx)), "--k")
+    axes[0].set_title("Raw data")
 
     dsi = ds.interp(lon=lon, lat=lat)
     dsi.air.plot(ax=axes[1])
     @savefig interpolation_sample4.png width=8in
-    axes[1].set_title('Remapped data')
+    axes[1].set_title("Remapped data")
\ No newline at end of file
diff --git a/doc/io.rst b/doc/io.rst
index 0c666099df8..4aac5e0b6f7 100644
--- a/doc/io.rst
+++ b/doc/io.rst
@@ -9,11 +9,12 @@ simple :ref:`io.pickle` files to the more flexible :ref:`io.netcdf`
 format (recommended).
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
     import numpy as np
     import pandas as pd
     import xarray as xr
+
     np.random.seed(123456)
 
 .. _io.netcdf:
@@ -52,12 +53,16 @@ We can save a Dataset to disk using the
 
 .. ipython:: python
 
-    ds = xr.Dataset({'foo': (('x', 'y'), np.random.rand(4, 5))},
-                    coords={'x': [10, 20, 30, 40],
-                            'y': pd.date_range('2000-01-01', periods=5),
-                            'z': ('x', list('abcd'))})
+    ds = xr.Dataset(
+        {"foo": (("x", "y"), np.random.rand(4, 5))},
+        coords={
+            "x": [10, 20, 30, 40],
+            "y": pd.date_range("2000-01-01", periods=5),
+            "z": ("x", list("abcd")),
+        },
+    )
 
-    ds.to_netcdf('saved_on_disk.nc')
+    ds.to_netcdf("saved_on_disk.nc")
 
 By default, the file is saved as netCDF4 (assuming netCDF4-Python is
 installed). You can control the format and engine used to write the file with
@@ -76,7 +81,7 @@ We can load netCDF files to create a new Dataset using
 
 .. ipython:: python
 
-    ds_disk = xr.open_dataset('saved_on_disk.nc')
+    ds_disk = xr.open_dataset("saved_on_disk.nc")
     ds_disk
 
 Similarly, a DataArray can be saved to disk using the
@@ -117,7 +122,7 @@ netCDF file. However, it's often cleaner to use a ``with`` statement:
 .. ipython:: python
 
     # this automatically closes the dataset after use
-    with xr.open_dataset('saved_on_disk.nc') as ds:
+    with xr.open_dataset("saved_on_disk.nc") as ds:
         print(ds.keys())
 
 Although xarray provides reasonable support for incremental reads of files on
@@ -171,7 +176,7 @@ You can view this encoding information (among others) in the
 .. ipython::
     :verbatim:
 
-    In [1]: ds_disk['y'].encoding
+    In [1]: ds_disk["y"].encoding
     Out[1]:
     {'zlib': False,
      'shuffle': False,
@@ -458,7 +463,7 @@ This is not CF-compliant but again facilitates roundtripping of xarray datasets.
 Invalid netCDF files
 ~~~~~~~~~~~~~~~~~~~~
 
-The library ``h5netcdf`` allows writing some dtypes (booleans, complex, ...) that aren't 
+The library ``h5netcdf`` allows writing some dtypes (booleans, complex, ...) that aren't
 allowed in netCDF4 (see
 `h5netcdf documentation <https://github.com/shoyer/h5netcdf#invalid-netcdf-files>`_).
 This feature is availabe through :py:meth:`DataArray.to_netcdf` and
@@ -469,7 +474,7 @@ and currently raises a warning unless ``invalid_netcdf=True`` is set:
     :okwarning:
 
     # Writing complex valued data
-    da = xr.DataArray([1.+1.j, 2.+2.j, 3.+3.j])
+    da = xr.DataArray([1.0 + 1.0j, 2.0 + 2.0j, 3.0 + 3.0j])
     da.to_netcdf("complex.nc", engine="h5netcdf", invalid_netcdf=True)
 
     # Reading it back
@@ -479,7 +484,8 @@ and currently raises a warning unless ``invalid_netcdf=True`` is set:
     :suppress:
 
     import os
-    os.remove('complex.nc')
+
+    os.remove("complex.nc")
 
 .. warning::
 
@@ -499,9 +505,11 @@ installed xarray can convert a ``DataArray`` into a ``Cube`` using
 
 .. ipython:: python
 
-    da = xr.DataArray(np.random.rand(4, 5), dims=['x', 'y'],
-                      coords=dict(x=[10, 20, 30, 40],
-                                  y=pd.date_range('2000-01-01', periods=5)))
+    da = xr.DataArray(
+        np.random.rand(4, 5),
+        dims=["x", "y"],
+        coords=dict(x=[10, 20, 30, 40], y=pd.date_range("2000-01-01", periods=5)),
+    )
 
     cube = da.to_iris()
     cube
@@ -548,8 +556,9 @@ __ http://iri.columbia.edu/
     :verbatim:
 
     In [3]: remote_data = xr.open_dataset(
-       ...:     'http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods',
-       ...:     decode_times=False)
+       ...:     "http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods",
+       ...:     decode_times=False,
+       ...: )
 
     In [4]: remote_data
     Out[4]:
@@ -587,7 +596,7 @@ over the network until we look at particular values:
 .. ipython::
     :verbatim:
 
-    In [4]: tmax = remote_data['tmax'][:500, ::3, ::3]
+    In [4]: tmax = remote_data["tmax"][:500, ::3, ::3]
 
     In [5]: tmax
     Out[5]:
@@ -715,7 +724,8 @@ search indices or other automated data discovery tools.
     :suppress:
 
     import os
-    os.remove('saved_on_disk.nc')
+
+    os.remove("saved_on_disk.nc")
 
 .. _io.rasterio:
 
@@ -729,7 +739,7 @@ rasterio is installed. Here is an example of how to use
 .. ipython::
     :verbatim:
 
-    In [7]: rio = xr.open_rasterio('RGB.byte.tif')
+    In [7]: rio = xr.open_rasterio("RGB.byte.tif")
 
     In [8]: rio
     Out[8]:
@@ -769,7 +779,7 @@ GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIF
 
     In [1]: import rioxarray
 
-    In [2]: rds = rioxarray.open_rasterio('RGB.byte.tif')
+    In [2]: rds = rioxarray.open_rasterio("RGB.byte.tif")
 
     In [3]: rds
     Out[3]:
@@ -794,12 +804,12 @@ GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIF
     In [4]: rds.rio.crs
     Out[4]: CRS.from_epsg(32618)
 
-    In [5]: rds4326 = rio.rio.reproject("epsg:4326")
+    In [5]: rds4326 = rds.rio.reproject("epsg:4326")
 
     In [6]: rds4326.rio.crs
     Out[6]: CRS.from_epsg(4326)
 
-    In [7]: rds4326.rio.to_raster('RGB.byte.4326.tif')
+    In [7]: rds4326.rio.to_raster("RGB.byte.4326.tif")
 
 
 .. _rasterio: https://rasterio.readthedocs.io/en/latest/
@@ -827,21 +837,27 @@ Xarray's Zarr backend allows xarray to leverage these capabilities.
 Xarray can't open just any zarr dataset, because xarray requires special
 metadata (attributes) describing the dataset dimensions and coordinates.
 At this time, xarray can only open zarr datasets that have been written by
-xarray. To write a dataset with zarr, we use the :py:attr:`Dataset.to_zarr` method.
+xarray. For implementation details, see :ref:`zarr_encoding`.
+
+To write a dataset with zarr, we use the :py:attr:`Dataset.to_zarr` method.
 To write to a local directory, we pass a path to a directory
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
     ! rm -rf path/to/directory.zarr
 
 .. ipython:: python
 
-    ds = xr.Dataset({'foo': (('x', 'y'), np.random.rand(4, 5))},
-                    coords={'x': [10, 20, 30, 40],
-                            'y': pd.date_range('2000-01-01', periods=5),
-                            'z': ('x', list('abcd'))})
-    ds.to_zarr('path/to/directory.zarr')
+    ds = xr.Dataset(
+        {"foo": (("x", "y"), np.random.rand(4, 5))},
+        coords={
+            "x": [10, 20, 30, 40],
+            "y": pd.date_range("2000-01-01", periods=5),
+            "z": ("x", list("abcd")),
+        },
+    )
+    ds.to_zarr("path/to/directory.zarr")
 
 (The suffix ``.zarr`` is optional--just a reminder that a zarr store lives
 there.) If the directory does not exist, it will be created. If a zarr
@@ -854,22 +870,30 @@ It is also possible to append to an existing store. For that, set
 can be omitted as it will internally be set to ``'a'``.
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
     ! rm -rf path/to/directory.zarr
 
 .. ipython:: python
 
-    ds1 = xr.Dataset({'foo': (('x', 'y', 't'), np.random.rand(4, 5, 2))},
-                     coords={'x': [10, 20, 30, 40],
-                             'y': [1,2,3,4,5],
-                             't': pd.date_range('2001-01-01', periods=2)})
-    ds1.to_zarr('path/to/directory.zarr')
-    ds2 = xr.Dataset({'foo': (('x', 'y', 't'), np.random.rand(4, 5, 2))},
-                     coords={'x': [10, 20, 30, 40],
-                             'y': [1,2,3,4,5],
-                             't': pd.date_range('2001-01-03', periods=2)})
-    ds2.to_zarr('path/to/directory.zarr', append_dim='t')
+    ds1 = xr.Dataset(
+        {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))},
+        coords={
+            "x": [10, 20, 30, 40],
+            "y": [1, 2, 3, 4, 5],
+            "t": pd.date_range("2001-01-01", periods=2),
+        },
+    )
+    ds1.to_zarr("path/to/directory.zarr")
+    ds2 = xr.Dataset(
+        {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))},
+        coords={
+            "x": [10, 20, 30, 40],
+            "y": [1, 2, 3, 4, 5],
+            "t": pd.date_range("2001-01-03", periods=2),
+        },
+    )
+    ds2.to_zarr("path/to/directory.zarr", append_dim="t")
 
 To store variable length strings use ``dtype=object``.
 
@@ -878,7 +902,7 @@ To read back a zarr dataset that has been created this way, we use the
 
 .. ipython:: python
 
-    ds_zarr = xr.open_zarr('path/to/directory.zarr')
+    ds_zarr = xr.open_zarr("path/to/directory.zarr")
     ds_zarr
 
 Cloud Storage Buckets
@@ -912,15 +936,16 @@ These options can be passed to the ``to_zarr`` method as variable encoding.
 For example:
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
     ! rm -rf foo.zarr
 
 .. ipython:: python
 
     import zarr
-    compressor = zarr.Blosc(cname='zstd', clevel=3, shuffle=2)
-    ds.to_zarr('foo.zarr', encoding={'foo': {'compressor': compressor}})
+
+    compressor = zarr.Blosc(cname="zstd", clevel=3, shuffle=2)
+    ds.to_zarr("foo.zarr", encoding={"foo": {"compressor": compressor}})
 
 .. note::
 
@@ -959,31 +984,30 @@ be done directly from zarr, as described in the
 .. _io.cfgrib:
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
     import shutil
-    shutil.rmtree('foo.zarr')
-    shutil.rmtree('path/to/directory.zarr')
+
+    shutil.rmtree("foo.zarr")
+    shutil.rmtree("path/to/directory.zarr")
 
 GRIB format via cfgrib
 ----------------------
 
-xarray supports reading GRIB files via ECMWF cfgrib_ python driver and ecCodes_
-C-library, if they are installed. To open a GRIB file supply ``engine='cfgrib'``
+xarray supports reading GRIB files via ECMWF cfgrib_ python driver,
+if it is installed. To open a GRIB file supply ``engine='cfgrib'``
 to :py:func:`open_dataset`:
 
 .. ipython::
     :verbatim:
 
-    In [1]: ds_grib = xr.open_dataset('example.grib', engine='cfgrib')
+    In [1]: ds_grib = xr.open_dataset("example.grib", engine="cfgrib")
 
-We recommend installing ecCodes via conda::
+We recommend installing cfgrib via conda::
 
-    conda install -c conda-forge eccodes
-    pip install cfgrib
+    conda install -c conda-forge cfgrib
 
 .. _cfgrib: https://github.com/ecmwf/cfgrib
-.. _ecCodes: https://confluence.ecmwf.int/display/ECC/ecCodes+Home
 
 .. _io.pynio:
 
diff --git a/doc/pandas.rst b/doc/pandas.rst
index b0ec2a117dc..acf1d16b6ee 100644
--- a/doc/pandas.rst
+++ b/doc/pandas.rst
@@ -20,6 +20,7 @@ __ http://seaborn.pydata.org/
     import numpy as np
     import pandas as pd
     import xarray as xr
+
     np.random.seed(123456)
 
 Hierarchical and tidy data
@@ -47,10 +48,15 @@ To convert any dataset to a ``DataFrame`` in tidy form, use the
 
 .. ipython:: python
 
-    ds = xr.Dataset({'foo': (('x', 'y'), np.random.randn(2, 3))},
-                     coords={'x': [10, 20], 'y': ['a', 'b', 'c'],
-                             'along_x': ('x', np.random.randn(2)),
-                             'scalar': 123})
+    ds = xr.Dataset(
+        {"foo": (("x", "y"), np.random.randn(2, 3))},
+        coords={
+            "x": [10, 20],
+            "y": ["a", "b", "c"],
+            "along_x": ("x", np.random.randn(2)),
+            "scalar": 123,
+        },
+    )
     ds
     df = ds.to_dataframe()
     df
@@ -91,7 +97,7 @@ DataFrames:
 
 .. ipython:: python
 
-    s = ds['foo'].to_series()
+    s = ds["foo"].to_series()
     s
     # or equivalently, with Series.to_xarray()
     xr.DataArray.from_series(s)
@@ -117,8 +123,9 @@ available in pandas (i.e., a 1D array is converted to a
 
 .. ipython:: python
 
-    arr = xr.DataArray(np.random.randn(2, 3),
-                       coords=[('x', [10, 20]), ('y', ['a', 'b', 'c'])])
+    arr = xr.DataArray(
+        np.random.randn(2, 3), coords=[("x", [10, 20]), ("y", ["a", "b", "c"])]
+    )
     df = arr.to_pandas()
     df
 
@@ -136,9 +143,10 @@ preserve all use of multi-indexes:
 
 .. ipython:: python
 
-    index = pd.MultiIndex.from_arrays([['a', 'a', 'b'], [0, 1, 2]],
-                                      names=['one', 'two'])
-    df = pd.DataFrame({'x': 1, 'y': 2}, index=index)
+    index = pd.MultiIndex.from_arrays(
+        [["a", "a", "b"], [0, 1, 2]], names=["one", "two"]
+    )
+    df = pd.DataFrame({"x": 1, "y": 2}, index=index)
     ds = xr.Dataset(df)
     ds
 
@@ -175,9 +183,9 @@ Let's take a look:
 .. ipython:: python
 
     data = np.random.RandomState(0).rand(2, 3, 4)
-    items = list('ab')
-    major_axis = list('mno')
-    minor_axis = pd.date_range(start='2000', periods=4, name='date')
+    items = list("ab")
+    major_axis = list("mno")
+    minor_axis = pd.date_range(start="2000", periods=4, name="date")
 
 With old versions of pandas (prior to 0.25), this could stored in a ``Panel``:
 
@@ -207,7 +215,7 @@ You can also easily convert this data into ``Dataset``:
 
 .. ipython:: python
 
-    array.to_dataset(dim='dim_0')
+    array.to_dataset(dim="dim_0")
 
 Here, there are two data variables, each representing a DataFrame on panel's
 ``items`` axis, and labeled as such. Each variable is a 2D array of the
diff --git a/doc/plotting.rst b/doc/plotting.rst
index f3d9c0213de..72248e31b1e 100644
--- a/doc/plotting.rst
+++ b/doc/plotting.rst
@@ -13,7 +13,7 @@ labels can also be used to easily create informative plots.
 xarray's plotting capabilities are centered around
 :py:class:`DataArray` objects.
 To plot :py:class:`Dataset` objects
-simply access the relevant DataArrays, ie ``dset['var1']``.
+simply access the relevant DataArrays, i.e. ``dset['var1']``.
 Dataset specific plotting routines are also available (see :ref:`plot-dataset`).
 Here we focus mostly on arrays 2d or larger. If your data fits
 nicely into a pandas DataFrame then you're better off using one of the more
@@ -56,6 +56,7 @@ Imports
 
     # Use defaults so we don't get gridlines in generated docs
     import matplotlib as mpl
+
     mpl.rcdefaults()
 
 The following imports are necessary for all of the examples.
@@ -71,7 +72,7 @@ For these examples we'll use the North American air temperature dataset.
 
 .. ipython:: python
 
-    airtemps = xr.tutorial.open_dataset('air_temperature')
+    airtemps = xr.tutorial.open_dataset("air_temperature")
     airtemps
 
     # Convert to celsius
@@ -79,7 +80,7 @@ For these examples we'll use the North American air temperature dataset.
 
     # copy attributes to get nice figure labels and change Kelvin to Celsius
     air.attrs = airtemps.air.attrs
-    air.attrs['units'] = 'deg C'
+    air.attrs["units"] = "deg C"
 
 .. note::
    Until :issue:`1614` is solved, you might need to copy over the metadata in ``attrs`` to get informative figure labels (as was done above).
@@ -126,7 +127,7 @@ can be used:
 .. ipython:: python
 
     @savefig plotting_1d_additional_args.png width=4in
-    air1d[:200].plot.line('b-^')
+    air1d[:200].plot.line("b-^")
 
 .. note::
     Not all xarray plotting methods support passing positional arguments
@@ -138,7 +139,7 @@ Keyword arguments work the same way, and are more explicit.
 .. ipython:: python
 
     @savefig plotting_example_sin3.png width=4in
-    air1d[:200].plot.line(color='purple', marker='o')
+    air1d[:200].plot.line(color="purple", marker="o")
 
 =========================
  Adding to Existing Axis
@@ -208,6 +209,44 @@ entire figure (as for matplotlib's ``figsize`` argument).
 
 .. _plotting.multiplelines:
 
+=========================
+ Determine x-axis values
+=========================
+
+Per default dimension coordinates are used for the x-axis (here the time coordinates).
+However, you can also use non-dimension coordinates, MultiIndex levels, and dimensions
+without coordinates along the x-axis. To illustrate this, let's calculate a 'decimal day' (epoch)
+from the time and assign it as a non-dimension coordinate:
+
+.. ipython:: python
+
+    decimal_day = (air1d.time - air1d.time[0]) / pd.Timedelta("1d")
+    air1d_multi = air1d.assign_coords(decimal_day=("time", decimal_day))
+    air1d_multi
+
+To use ``'decimal_day'`` as x coordinate it must be explicitly specified:
+
+.. ipython:: python
+
+    air1d_multi.plot(x="decimal_day")
+
+Creating a new MultiIndex named ``'date'`` from ``'time'`` and ``'decimal_day'``,
+it is also possible to use a MultiIndex level as x-axis:
+
+.. ipython:: python
+
+    air1d_multi = air1d_multi.set_index(date=("time", "decimal_day"))
+    air1d_multi.plot(x="decimal_day")
+
+Finally, if a dataset does not have any coordinates it enumerates all data points:
+
+.. ipython:: python
+
+    air1d_multi = air1d_multi.drop("date")
+    air1d_multi.plot()
+
+The same applies to 2D plots below.
+
 ====================================================
  Multiple lines showing variation along a dimension
 ====================================================
@@ -219,7 +258,7 @@ plots to check the variation of air temperature at three different latitudes alo
 .. ipython:: python
 
     @savefig plotting_example_multiple_lines_x_kwarg.png
-    air.isel(lon=10, lat=[19,21,22]).plot.line(x='time')
+    air.isel(lon=10, lat=[19, 21, 22]).plot.line(x="time")
 
 It is required to explicitly specify either
 
@@ -240,7 +279,7 @@ It is also possible to make line plots such that the data are on the x-axis and
 .. ipython:: python
 
     @savefig plotting_example_xy_kwarg.png
-    air.isel(time=10, lon=[10, 11]).plot(y='lat', hue='lon')
+    air.isel(time=10, lon=[10, 11]).plot(y="lat", hue="lon")
 
 ============
  Step plots
@@ -253,7 +292,7 @@ made using 1D data.
     :okwarning:
 
     @savefig plotting_example_step.png width=4in
-    air1d[:20].plot.step(where='mid')
+    air1d[:20].plot.step(where="mid")
 
 The argument ``where`` defines where the steps should be placed, options are
 ``'pre'`` (default), ``'post'``, and ``'mid'``. This is particularly handy
@@ -261,15 +300,15 @@ when plotting data grouped with :py:meth:`Dataset.groupby_bins`.
 
 .. ipython:: python
 
-    air_grp = air.mean(['time','lon']).groupby_bins('lat',[0,23.5,66.5,90])
+    air_grp = air.mean(["time", "lon"]).groupby_bins("lat", [0, 23.5, 66.5, 90])
     air_mean = air_grp.mean()
     air_std = air_grp.std()
     air_mean.plot.step()
-    (air_mean + air_std).plot.step(ls=':')
-    (air_mean - air_std).plot.step(ls=':')
-    plt.ylim(-20,30)
+    (air_mean + air_std).plot.step(ls=":")
+    (air_mean - air_std).plot.step(ls=":")
+    plt.ylim(-20, 30)
     @savefig plotting_example_step_groupby.png width=4in
-    plt.title('Zonal mean temperature')
+    plt.title("Zonal mean temperature")
 
 In this case, the actual boundaries of the bins are used and the ``where`` argument
 is ignored.
@@ -284,7 +323,9 @@ The keyword arguments ``xincrease`` and ``yincrease`` let you control the axes d
 .. ipython:: python
 
     @savefig plotting_example_xincrease_yincrease_kwarg.png
-    air.isel(time=10, lon=[10, 11]).plot.line(y='lat', hue='lon', xincrease=False, yincrease=False)
+    air.isel(time=10, lon=[10, 11]).plot.line(
+        y="lat", hue="lon", xincrease=False, yincrease=False
+    )
 
 In addition, one can use ``xscale, yscale`` to set axes scaling; ``xticks, yticks`` to set axes ticks and ``xlim, ylim`` to set axes limits. These accept the same values as the matplotlib methods ``Axes.set_(x,y)scale()``, ``Axes.set_(x,y)ticks()``, ``Axes.set_(x,y)lim()`` respectively.
 
@@ -348,7 +389,7 @@ produce plots with nonuniform coordinates.
 
     b = air2d.copy()
     # Apply a nonlinear transformation to one of the coords
-    b.coords['lat'] = np.log(b.coords['lat'])
+    b.coords["lat"] = np.log(b.coords["lat"])
 
     @savefig plotting_nonuniform_coords.png width=4in
     b.plot()
@@ -363,9 +404,9 @@ matplotlib is available.
 .. ipython:: python
 
     air2d.plot(cmap=plt.cm.Blues)
-    plt.title('These colors prove North America\nhas fallen in the ocean')
-    plt.ylabel('latitude')
-    plt.xlabel('longitude')
+    plt.title("These colors prove North America\nhas fallen in the ocean")
+    plt.ylabel("latitude")
+    plt.xlabel("longitude")
     plt.tight_layout()
 
     @savefig plotting_2d_call_matplotlib.png width=4in
@@ -381,7 +422,7 @@ matplotlib is available.
 
     .. ipython:: python
 
-        plt.xlabel('Never gonna see this.')
+        plt.xlabel("Never gonna see this.")
         air2d.plot()
 
         @savefig plotting_2d_call_matplotlib2.png width=4in
@@ -473,10 +514,10 @@ if using ``imshow`` or ``pcolormesh`` (but not with ``contour`` or ``contourf``,
 since levels are chosen automatically).
 
 .. ipython:: python
-   :okwarning:
+    :okwarning:
 
     @savefig plotting_seaborn_palette.png width=4in
-    air2d.plot(levels=10, cmap='husl')
+    air2d.plot(levels=10, cmap="husl")
     plt.draw()
 
 .. _plotting.faceting:
@@ -520,14 +561,16 @@ arguments to the xarray plotting methods/functions. This returns a
 .. ipython:: python
 
     @savefig plot_facet_dataarray.png
-    g_simple = t.plot(x='lon', y='lat', col='time', col_wrap=3)
+    g_simple = t.plot(x="lon", y="lat", col="time", col_wrap=3)
 
 Faceting also works for line plots.
 
 .. ipython:: python
 
     @savefig plot_facet_dataarray_line.png
-    g_simple_line = t.isel(lat=slice(0,None,4)).plot(x='lon', hue='lat', col='time', col_wrap=3)
+    g_simple_line = t.isel(lat=slice(0, None, 4)).plot(
+        x="lon", hue="lat", col="time", col_wrap=3
+    )
 
 ===============
  4 dimensional
@@ -541,12 +584,12 @@ one were much hotter.
 .. ipython:: python
 
     t2 = t.isel(time=slice(0, 2))
-    t4d = xr.concat([t2, t2 + 40], pd.Index(['normal', 'hot'], name='fourth_dim'))
+    t4d = xr.concat([t2, t2 + 40], pd.Index(["normal", "hot"], name="fourth_dim"))
     # This is a 4d array
     t4d.coords
 
     @savefig plot_facet_4d.png
-    t4d.plot(x='lon', y='lat', col='time', row='fourth_dim')
+    t4d.plot(x="lon", y="lat", col="time", row="fourth_dim")
 
 ================
  Other features
@@ -555,9 +598,9 @@ one were much hotter.
 Faceted plotting supports other arguments common to xarray 2d plots.
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
-      plt.close('all')
+    plt.close("all")
 
 .. ipython:: python
 
@@ -566,9 +609,15 @@ Faceted plotting supports other arguments common to xarray 2d plots.
     hasoutliers[-1, -1, -1] = 400
 
     @savefig plot_facet_robust.png
-    g = hasoutliers.plot.pcolormesh('lon', 'lat', col='time', col_wrap=3,
-                                    robust=True, cmap='viridis',
-				    cbar_kwargs={'label': 'this has outliers'})
+    g = hasoutliers.plot.pcolormesh(
+        "lon",
+        "lat",
+        col="time",
+        col_wrap=3,
+        robust=True,
+        cmap="viridis",
+        cbar_kwargs={"label": "this has outliers"},
+    )
 
 ===================
  FacetGrid Objects
@@ -594,20 +643,20 @@ It's possible to select the :py:class:`xarray.DataArray` or
 
 .. ipython:: python
 
-   g.data.loc[g.name_dicts[0, 0]]
+    g.data.loc[g.name_dicts[0, 0]]
 
 Here is an example of using the lower level API and then modifying the axes after
 they have been plotted.
 
 .. ipython:: python
 
-    g = t.plot.imshow('lon', 'lat', col='time', col_wrap=3, robust=True)
+    g = t.plot.imshow("lon", "lat", col="time", col_wrap=3, robust=True)
 
     for i, ax in enumerate(g.axes.flat):
-        ax.set_title('Air Temperature %d' % i)
+        ax.set_title("Air Temperature %d" % i)
 
     bottomright = g.axes[-1, -1]
-    bottomright.annotate('bottom right', (240, 40))
+    bottomright.annotate("bottom right", (240, 40))
 
     @savefig plot_facet_iterator.png
     plt.draw()
@@ -632,8 +681,8 @@ Consider this dataset
 
 .. ipython:: python
 
-   ds = xr.tutorial.scatter_example_dataset()
-   ds
+    ds = xr.tutorial.scatter_example_dataset()
+    ds
 
 
 Suppose we want to scatter ``A`` against ``B``
@@ -641,14 +690,14 @@ Suppose we want to scatter ``A`` against ``B``
 .. ipython:: python
 
     @savefig ds_simple_scatter.png
-    ds.plot.scatter(x='A', y='B')
+    ds.plot.scatter(x="A", y="B")
 
 The ``hue`` kwarg lets you vary the color by variable value
 
 .. ipython:: python
 
     @savefig ds_hue_scatter.png
-    ds.plot.scatter(x='A', y='B', hue='w')
+    ds.plot.scatter(x="A", y="B", hue="w")
 
 When ``hue`` is specified, a colorbar is added for numeric ``hue`` DataArrays by
 default and a legend is added for non-numeric ``hue`` DataArrays (as above).
@@ -659,21 +708,21 @@ Additionally, the boolean kwarg ``add_guide`` can be used to prevent the display
 
     ds = ds.assign(w=[1, 2, 3, 5])
     @savefig ds_discrete_legend_hue_scatter.png
-    ds.plot.scatter(x='A', y='B', hue='w', hue_style='discrete')
+    ds.plot.scatter(x="A", y="B", hue="w", hue_style="discrete")
 
 The ``markersize`` kwarg lets you vary the point's size by variable value. You can additionally pass ``size_norm`` to control how the variable's values are mapped to point sizes.
 
 .. ipython:: python
 
     @savefig ds_hue_size_scatter.png
-    ds.plot.scatter(x='A', y='B', hue='z', hue_style='discrete', markersize='z')
+    ds.plot.scatter(x="A", y="B", hue="z", hue_style="discrete", markersize="z")
 
 Faceting is also possible
 
 .. ipython:: python
 
     @savefig ds_facet_scatter.png
-    ds.plot.scatter(x='A', y='B', col='x', row='z', hue='w', hue_style='discrete')
+    ds.plot.scatter(x="A", y="B", col="x", row="z", hue="w", hue_style="discrete")
 
 
 For more advanced scatter plots, we recommend converting the relevant data variables to a pandas DataFrame and using the extensive plotting capabilities of ``seaborn``.
@@ -691,25 +740,33 @@ This script will plot the air temperature on a map.
 .. ipython:: python
 
     import cartopy.crs as ccrs
-    air = xr.tutorial.open_dataset('air_temperature').air
+
+    air = xr.tutorial.open_dataset("air_temperature").air
+
     ax = plt.axes(projection=ccrs.Orthographic(-80, 35))
-    air.isel(time=0).plot.contourf(ax=ax, transform=ccrs.PlateCarree());
+    air.isel(time=0).plot.contourf(ax=ax, transform=ccrs.PlateCarree())
+    ax.set_global()
+
     @savefig plotting_maps_cartopy.png width=100%
-    ax.set_global(); ax.coastlines();
+    ax.coastlines()
 
 When faceting on maps, the projection can be transferred to the ``plot``
 function using the ``subplot_kws`` keyword. The axes for the subplots created
 by faceting are accessible in the object returned by ``plot``:
 
 .. ipython:: python
+    :okwarning:
 
-    p = air.isel(time=[0, 4]).plot(transform=ccrs.PlateCarree(), col='time',
-                                   subplot_kws={'projection': ccrs.Orthographic(-80, 35)})
+    p = air.isel(time=[0, 4]).plot(
+        transform=ccrs.PlateCarree(),
+        col="time",
+        subplot_kws={"projection": ccrs.Orthographic(-80, 35)},
+    )
     for ax in p.axes.flat:
         ax.coastlines()
         ax.gridlines()
     @savefig plotting_maps_cartopy_facetting.png width=100%
-    plt.draw();
+    plt.draw()
 
 
 Details
@@ -732,6 +789,7 @@ These are provided for user convenience; they all call the same code.
 .. ipython:: python
 
     import xarray.plot as xplt
+
     da = xr.DataArray(range(5))
     fig, axes = plt.subplots(ncols=2, nrows=2)
     da.plot(ax=axes[0, 0])
@@ -766,8 +824,7 @@ read on.
 
 .. ipython:: python
 
-    a0 = xr.DataArray(np.zeros((4, 3, 2)), dims=('y', 'x', 'z'),
-                      name='temperature')
+    a0 = xr.DataArray(np.zeros((4, 3, 2)), dims=("y", "x", "z"), name="temperature")
     a0[0, 0, 0] = 1
     a = a0.isel(z=0)
     a
@@ -801,14 +858,16 @@ instead of the default ones:
 .. ipython:: python
 
     lon, lat = np.meshgrid(np.linspace(-20, 20, 5), np.linspace(0, 30, 4))
-    lon += lat/10
-    lat += lon/10
-    da = xr.DataArray(np.arange(20).reshape(4, 5), dims=['y', 'x'],
-                      coords = {'lat': (('y', 'x'), lat),
-                                'lon': (('y', 'x'), lon)})
+    lon += lat / 10
+    lat += lon / 10
+    da = xr.DataArray(
+        np.arange(20).reshape(4, 5),
+        dims=["y", "x"],
+        coords={"lat": (("y", "x"), lat), "lon": (("y", "x"), lon)},
+    )
 
     @savefig plotting_example_2d_irreg.png width=4in
-    da.plot.pcolormesh('lon', 'lat');
+    da.plot.pcolormesh("lon", "lat")
 
 Note that in this case, xarray still follows the pixel centered convention.
 This might be undesirable in some cases, for example when your data is defined
@@ -818,22 +877,25 @@ this convention when plotting on a map:
 .. ipython:: python
 
     import cartopy.crs as ccrs
-    ax = plt.subplot(projection=ccrs.PlateCarree());
-    da.plot.pcolormesh('lon', 'lat', ax=ax);
-    ax.scatter(lon, lat, transform=ccrs.PlateCarree());
+
+    ax = plt.subplot(projection=ccrs.PlateCarree())
+    da.plot.pcolormesh("lon", "lat", ax=ax)
+    ax.scatter(lon, lat, transform=ccrs.PlateCarree())
+    ax.coastlines()
     @savefig plotting_example_2d_irreg_map.png width=4in
-    ax.coastlines(); ax.gridlines(draw_labels=True);
+    ax.gridlines(draw_labels=True)
 
 You can however decide to infer the cell boundaries and use the
 ``infer_intervals`` keyword:
 
 .. ipython:: python
 
-    ax = plt.subplot(projection=ccrs.PlateCarree());
-    da.plot.pcolormesh('lon', 'lat', ax=ax, infer_intervals=True);
-    ax.scatter(lon, lat, transform=ccrs.PlateCarree());
+    ax = plt.subplot(projection=ccrs.PlateCarree())
+    da.plot.pcolormesh("lon", "lat", ax=ax, infer_intervals=True)
+    ax.scatter(lon, lat, transform=ccrs.PlateCarree())
+    ax.coastlines()
     @savefig plotting_example_2d_irreg_map_infer.png width=4in
-    ax.coastlines(); ax.gridlines(draw_labels=True);
+    ax.gridlines(draw_labels=True)
 
 .. note::
     The data model of xarray does not support datasets with `cell boundaries`_
@@ -847,6 +909,6 @@ One can also make line plots with multidimensional coordinates. In this case, ``
 .. ipython:: python
 
     f, ax = plt.subplots(2, 1)
-    da.plot.line(x='lon', hue='y', ax=ax[0]);
+    da.plot.line(x="lon", hue="y", ax=ax[0])
     @savefig plotting_example_2d_hue_xy.png
-    da.plot.line(x='lon', hue='x', ax=ax[1]);
+    da.plot.line(x="lon", hue="x", ax=ax[1])
\ No newline at end of file
diff --git a/doc/quick-overview.rst b/doc/quick-overview.rst
index 741b3d1a5fe..09b0d4c6fbb 100644
--- a/doc/quick-overview.rst
+++ b/doc/quick-overview.rst
@@ -22,16 +22,14 @@ array or list, with optional *dimensions* and *coordinates*:
 
 .. ipython:: python
 
-    data = xr.DataArray(np.random.randn(2, 3),
-                        dims=('x', 'y'),
-                        coords={'x': [10, 20]})
+    data = xr.DataArray(np.random.randn(2, 3), dims=("x", "y"), coords={"x": [10, 20]})
     data
 
 In this case, we have generated a 2D array, assigned the names *x* and *y* to the two dimensions respectively and associated two *coordinate labels* '10' and '20' with the two locations along the x dimension. If you supply a pandas :py:class:`~pandas.Series` or :py:class:`~pandas.DataFrame`, metadata is copied directly:
 
 .. ipython:: python
 
-    xr.DataArray(pd.Series(range(3), index=list('abc'), name='foo'))
+    xr.DataArray(pd.Series(range(3), index=list("abc"), name="foo"))
 
 Here are the key properties for a ``DataArray``:
 
@@ -75,13 +73,13 @@ While you're setting up your DataArray, it's often a good idea to set metadata a
 
 .. ipython:: python
 
-    data.attrs['long_name'] = 'random velocity'
-    data.attrs['units'] = 'metres/sec'
-    data.attrs['description'] = 'A random variable created as an example.'
-    data.attrs['random_attribute'] = 123
+    data.attrs["long_name"] = "random velocity"
+    data.attrs["units"] = "metres/sec"
+    data.attrs["description"] = "A random variable created as an example."
+    data.attrs["random_attribute"] = 123
     data.attrs
     # you can add metadata to coordinates too
-    data.x.attrs['units'] = 'x units'
+    data.x.attrs["units"] = "x units"
 
 
 Computation
@@ -102,15 +100,15 @@ numbers:
 
 .. ipython:: python
 
-    data.mean(dim='x')
+    data.mean(dim="x")
 
 Arithmetic operations broadcast based on dimension name. This means you don't
 need to insert dummy dimensions for alignment:
 
 .. ipython:: python
 
-    a = xr.DataArray(np.random.randn(3), [data.coords['y']])
-    b = xr.DataArray(np.random.randn(4), dims='z')
+    a = xr.DataArray(np.random.randn(3), [data.coords["y"]])
+    b = xr.DataArray(np.random.randn(4), dims="z")
 
     a
     b
@@ -139,9 +137,9 @@ xarray supports grouped operations using a very similar API to pandas (see :ref:
 
 .. ipython:: python
 
-    labels = xr.DataArray(['E', 'F', 'E'], [data.coords['y']], name='labels')
+    labels = xr.DataArray(["E", "F", "E"], [data.coords["y"]], name="labels")
     labels
-    data.groupby(labels).mean('y')
+    data.groupby(labels).mean("y")
     data.groupby(labels).map(lambda x: x - x.min())
 
 Plotting
@@ -178,7 +176,7 @@ objects. You can think of it as a multi-dimensional generalization of the
 
 .. ipython:: python
 
-    ds = xr.Dataset({'foo': data, 'bar': ('x', [1, 2]), 'baz': np.pi})
+    ds = xr.Dataset({"foo": data, "bar": ("x", [1, 2]), "baz": np.pi})
     ds
 
 
@@ -186,7 +184,7 @@ This creates a dataset with three DataArrays named ``foo``, ``bar`` and ``baz``.
 
 .. ipython:: python
 
-    ds['foo']
+    ds["foo"]
     ds.foo
 
 
@@ -216,14 +214,15 @@ You can directly read and write xarray objects to disk using :py:meth:`~xarray.D
 
 .. ipython:: python
 
-    ds.to_netcdf('example.nc')
-    xr.open_dataset('example.nc')
+    ds.to_netcdf("example.nc")
+    xr.open_dataset("example.nc")
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
     import os
-    os.remove('example.nc')
+
+    os.remove("example.nc")
 
 
 It is common for datasets to be distributed across multiple files (commonly one file per timestep). xarray supports this use-case by providing the :py:meth:`~xarray.open_mfdataset` and the :py:meth:`~xarray.save_mfdataset` methods. For more, see :ref:`io`.
diff --git a/doc/related-projects.rst b/doc/related-projects.rst
index 57b8da0c447..9891f1a6bc2 100644
--- a/doc/related-projects.rst
+++ b/doc/related-projects.rst
@@ -62,6 +62,7 @@ Extend xarray capabilities
 - `eofs <https://ajdawson.github.io/eofs/>`_: EOF analysis in Python.
 - `hypothesis-gufunc <https://hypothesis-gufunc.readthedocs.io/en/latest/>`_: Extension to hypothesis. Makes it easy to write unit tests with xarray objects as input.
 - `nxarray <https://github.com/nxarray/nxarray>`_: NeXus input/output capability for xarray.
+- `xarray-custom <https://github.com/astropenguin/xarray-custom>`_: Data classes for custom xarray creation.
 - `xarray_extras <https://github.com/crusaderky/xarray_extras>`_: Advanced algorithms for xarray objects (e.g. integrations/interpolations).
 - `xpublish <https://xpublish.readthedocs.io/>`_: Publish Xarray Datasets via a Zarr compatible REST API.
 - `xrft <https://github.com/rabernat/xrft>`_: Fourier transforms for xarray data.
@@ -75,6 +76,7 @@ Visualization
 - `Datashader <https://datashader.org>`_, `geoviews <http://geo.holoviews.org>`_, `holoviews <http://holoviews.org/>`_, : visualization packages for large data.
 - `hvplot <https://hvplot.pyviz.org/>`_ : A high-level plotting API for the PyData ecosystem built on HoloViews.
 - `psyplot <https://psyplot.readthedocs.io>`_: Interactive data visualization with python.
+- `xarray-leaflet <https://github.com/davidbrochart/xarray_leaflet>`_: An xarray extension for tiles map plotting based on ipyleaflet.
 
 Non-Python projects
 ~~~~~~~~~~~~~~~~~~~
diff --git a/doc/reshaping.rst b/doc/reshaping.rst
index 465ca14dfc2..40de9ea799a 100644
--- a/doc/reshaping.rst
+++ b/doc/reshaping.rst
@@ -7,11 +7,12 @@ Reshaping and reorganizing data
 These methods allow you to reorganize
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
     import numpy as np
     import pandas as pd
     import xarray as xr
+
     np.random.seed(123456)
 
 Reordering dimensions
@@ -23,9 +24,9 @@ ellipsis (`...`) can be use to represent all other dimensions:
 
 .. ipython:: python
 
-    ds = xr.Dataset({'foo': (('x', 'y', 'z'), [[[42]]]), 'bar': (('y', 'z'), [[24]])})
-    ds.transpose('y', 'z', 'x')
-    ds.transpose(..., 'x')  # equivalent
+    ds = xr.Dataset({"foo": (("x", "y", "z"), [[[42]]]), "bar": (("y", "z"), [[24]])})
+    ds.transpose("y", "z", "x")
+    ds.transpose(..., "x")  # equivalent
     ds.transpose()  # reverses all dimensions
 
 Expand and squeeze dimensions
@@ -37,7 +38,7 @@ use :py:meth:`~xarray.DataArray.expand_dims`
 
 .. ipython:: python
 
-    expanded  = ds.expand_dims('w')
+    expanded = ds.expand_dims("w")
     expanded
 
 This method attaches a new dimension with size 1 to all data variables.
@@ -48,7 +49,7 @@ use :py:meth:`~xarray.DataArray.squeeze`
 
 .. ipython:: python
 
-    expanded.squeeze('w')
+    expanded.squeeze("w")
 
 Converting between datasets and arrays
 --------------------------------------
@@ -69,14 +70,14 @@ To convert back from a DataArray to a Dataset, use
 
 .. ipython:: python
 
-    arr.to_dataset(dim='variable')
+    arr.to_dataset(dim="variable")
 
 The broadcasting behavior of ``to_array`` means that the resulting array
 includes the union of data variable dimensions:
 
 .. ipython:: python
 
-    ds2 = xr.Dataset({'a': 0, 'b': ('x', [3, 4, 5])})
+    ds2 = xr.Dataset({"a": 0, "b": ("x", [3, 4, 5])})
 
     # the input dataset has 4 elements
     ds2
@@ -90,7 +91,7 @@ If you use ``to_dataset`` without supplying the ``dim`` argument, the DataArray
 
 .. ipython:: python
 
-    arr.to_dataset(name='combined')
+    arr.to_dataset(name="combined")
 
 .. _reshape.stack:
 
@@ -103,11 +104,12 @@ implemented :py:meth:`~xarray.DataArray.stack` and
 
 .. ipython:: python
 
-    array = xr.DataArray(np.random.randn(2, 3),
-                         coords=[('x', ['a', 'b']), ('y', [0, 1, 2])])
-    stacked = array.stack(z=('x', 'y'))
+    array = xr.DataArray(
+        np.random.randn(2, 3), coords=[("x", ["a", "b"]), ("y", [0, 1, 2])]
+    )
+    stacked = array.stack(z=("x", "y"))
     stacked
-    stacked.unstack('z')
+    stacked.unstack("z")
 
 As elsewhere in xarray, an ellipsis (`...`) can be used to represent all unlisted dimensions:
 
@@ -128,15 +130,15 @@ possible levels. Missing levels are filled in with ``NaN`` in the resulting obje
 
     stacked2 = stacked[::2]
     stacked2
-    stacked2.unstack('z')
+    stacked2.unstack("z")
 
 However, xarray's ``stack`` has an important difference from pandas: unlike
 pandas, it does not automatically drop missing values. Compare:
 
 .. ipython:: python
 
-    array = xr.DataArray([[np.nan, 1], [2, 3]], dims=['x', 'y'])
-    array.stack(z=('x', 'y'))
+    array = xr.DataArray([[np.nan, 1], [2, 3]], dims=["x", "y"])
+    array.stack(z=("x", "y"))
     array.to_pandas().stack()
 
 We departed from pandas's behavior here because predictable shapes for new
@@ -166,16 +168,15 @@ like this:
 
 .. ipython:: python
 
-        data = xr.Dataset(
-            data_vars={'a': (('x', 'y'), [[0, 1, 2], [3, 4, 5]]),
-                      'b': ('x', [6, 7])},
-            coords={'y': ['u', 'v', 'w']}
-        )
-        data
-        stacked = data.to_stacked_array("z", sample_dims=['x'])
-        stacked
-        unstacked = stacked.to_unstacked_dataset("z")
-        unstacked
+    data = xr.Dataset(
+        data_vars={"a": (("x", "y"), [[0, 1, 2], [3, 4, 5]]), "b": ("x", [6, 7])},
+        coords={"y": ["u", "v", "w"]},
+    )
+    data
+    stacked = data.to_stacked_array("z", sample_dims=["x"])
+    stacked
+    unstacked = stacked.to_unstacked_dataset("z")
+    unstacked
 
 In this example, ``stacked`` is a two dimensional array that we can easily pass to a scikit-learn or another generic
 numerical method.
@@ -202,19 +203,23 @@ coordinates using :py:meth:`~xarray.DataArray.set_index`:
 
 .. ipython:: python
 
-     da = xr.DataArray(np.random.rand(4),
-                       coords={'band': ('x', ['a', 'a', 'b', 'b']),
-                               'wavenumber': ('x', np.linspace(200, 400, 4))},
-                       dims='x')
-     da
-     mda = da.set_index(x=['band', 'wavenumber'])
-     mda
+    da = xr.DataArray(
+        np.random.rand(4),
+        coords={
+            "band": ("x", ["a", "a", "b", "b"]),
+            "wavenumber": ("x", np.linspace(200, 400, 4)),
+        },
+        dims="x",
+    )
+    da
+    mda = da.set_index(x=["band", "wavenumber"])
+    mda
 
 These coordinates can now be used for indexing, e.g.,
 
 .. ipython:: python
 
-     mda.sel(band='a')
+    mda.sel(band="a")
 
 Conversely, you can use :py:meth:`~xarray.DataArray.reset_index`
 to extract multi-index levels as coordinates (this is mainly useful
@@ -222,14 +227,14 @@ for serialization):
 
 .. ipython:: python
 
-     mda.reset_index('x')
+    mda.reset_index("x")
 
 :py:meth:`~xarray.DataArray.reorder_levels` allows changing the order
 of multi-index levels:
 
 .. ipython:: python
 
-     mda.reorder_levels(x=['wavenumber', 'band'])
+    mda.reorder_levels(x=["wavenumber", "band"])
 
 As of xarray v0.9 coordinate labels for each dimension are optional.
 You can also  use ``.set_index`` / ``.reset_index`` to add / remove
@@ -237,12 +242,12 @@ labels for one or several dimensions:
 
 .. ipython:: python
 
-    array = xr.DataArray([1, 2, 3], dims='x')
+    array = xr.DataArray([1, 2, 3], dims="x")
     array
-    array['c'] = ('x', ['a', 'b', 'c'])
-    array.set_index(x='c')
-    array = array.set_index(x='c')
-    array = array.reset_index('x', drop=True)
+    array["c"] = ("x", ["a", "b", "c"])
+    array.set_index(x="c")
+    array = array.set_index(x="c")
+    array = array.reset_index("x", drop=True)
 
 .. _reshape.shift_and_roll:
 
@@ -254,9 +259,9 @@ To adjust coordinate labels, you can use the :py:meth:`~xarray.Dataset.shift` an
 
 .. ipython:: python
 
-	array = xr.DataArray([1, 2, 3, 4], dims='x')
-	array.shift(x=2)
-	array.roll(x=2, roll_coords=True)
+    array = xr.DataArray([1, 2, 3, 4], dims="x")
+    array.shift(x=2)
+    array.roll(x=2, roll_coords=True)
 
 .. _reshape.sort:
 
@@ -269,17 +274,18 @@ One may sort a DataArray/Dataset via :py:meth:`~xarray.DataArray.sortby` and
 
 .. ipython:: python
 
-  ds = xr.Dataset({'A': (('x', 'y'), [[1, 2], [3, 4]]),
-                   'B': (('x', 'y'), [[5, 6], [7, 8]])},
-                  coords={'x': ['b', 'a'], 'y': [1, 0]})
-  dax = xr.DataArray([100, 99], [('x', [0, 1])])
-  day = xr.DataArray([90, 80], [('y', [0, 1])])
-  ds.sortby([day, dax])
+    ds = xr.Dataset(
+        {"A": (("x", "y"), [[1, 2], [3, 4]]), "B": (("x", "y"), [[5, 6], [7, 8]])},
+        coords={"x": ["b", "a"], "y": [1, 0]},
+    )
+    dax = xr.DataArray([100, 99], [("x", [0, 1])])
+    day = xr.DataArray([90, 80], [("y", [0, 1])])
+    ds.sortby([day, dax])
 
 As a shortcut, you can refer to existing coordinates by name:
 
 .. ipython:: python
 
-  ds.sortby('x')
-  ds.sortby(['y', 'x'])
-  ds.sortby(['y', 'x'], ascending=False)
+    ds.sortby("x")
+    ds.sortby(["y", "x"])
+    ds.sortby(["y", "x"], ascending=False)
\ No newline at end of file
diff --git a/doc/time-series.rst b/doc/time-series.rst
index d838dbbd4cd..96a2edc0ea5 100644
--- a/doc/time-series.rst
+++ b/doc/time-series.rst
@@ -10,11 +10,12 @@ data in pandas such a joy to xarray. In most cases, we rely on pandas for the
 core functionality.
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
     import numpy as np
     import pandas as pd
     import xarray as xr
+
     np.random.seed(123456)
 
 Creating datetime64 data
@@ -29,8 +30,8 @@ using :py:func:`pandas.to_datetime` and :py:func:`pandas.date_range`:
 
 .. ipython:: python
 
-    pd.to_datetime(['2000-01-01', '2000-02-02'])
-    pd.date_range('2000-01-01', periods=365)
+    pd.to_datetime(["2000-01-01", "2000-02-02"])
+    pd.date_range("2000-01-01", periods=365)
 
 Alternatively, you can supply arrays of Python ``datetime`` objects. These get
 converted automatically when used as arguments in xarray objects:
@@ -38,7 +39,8 @@ converted automatically when used as arguments in xarray objects:
 .. ipython:: python
 
     import datetime
-    xr.Dataset({'time': datetime.datetime(2000, 1, 1)})
+
+    xr.Dataset({"time": datetime.datetime(2000, 1, 1)})
 
 When reading or writing netCDF files, xarray automatically decodes datetime and
 timedelta arrays using `CF conventions`_ (that is, by using a ``units``
@@ -62,8 +64,8 @@ You can manual decode arrays in this form by passing a dataset to
 
 .. ipython:: python
 
-    attrs = {'units': 'hours since 2000-01-01'}
-    ds = xr.Dataset({'time': ('time', [0, 1, 2, 3], attrs)})
+    attrs = {"units": "hours since 2000-01-01"}
+    ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)})
     xr.decode_cf(ds)
 
 One unfortunate limitation of using ``datetime64[ns]`` is that it limits the
@@ -87,10 +89,10 @@ items and with the `slice` object:
 
 .. ipython:: python
 
-    time = pd.date_range('2000-01-01', freq='H', periods=365 * 24)
-    ds = xr.Dataset({'foo': ('time', np.arange(365 * 24)), 'time': time})
-    ds.sel(time='2000-01')
-    ds.sel(time=slice('2000-06-01', '2000-06-10'))
+    time = pd.date_range("2000-01-01", freq="H", periods=365 * 24)
+    ds = xr.Dataset({"foo": ("time", np.arange(365 * 24)), "time": time})
+    ds.sel(time="2000-01")
+    ds.sel(time=slice("2000-06-01", "2000-06-10"))
 
 You can also select a particular time by indexing with a
 :py:class:`datetime.time` object:
@@ -113,8 +115,8 @@ given ``DataArray`` can be quickly computed using a special ``.dt`` accessor.
 
 .. ipython:: python
 
-    time = pd.date_range('2000-01-01', freq='6H', periods=365 * 4)
-    ds = xr.Dataset({'foo': ('time', np.arange(365 * 4)), 'time': time})
+    time = pd.date_range("2000-01-01", freq="6H", periods=365 * 4)
+    ds = xr.Dataset({"foo": ("time", np.arange(365 * 4)), "time": time})
     ds.time.dt.hour
     ds.time.dt.dayofweek
 
@@ -130,16 +132,16 @@ __ http://pandas.pydata.org/pandas-docs/stable/api.html#time-date-components
 
 .. ipython:: python
 
-    ds['time.month']
-    ds['time.dayofyear']
+    ds["time.month"]
+    ds["time.dayofyear"]
 
 For use as a derived coordinate, xarray adds ``'season'`` to the list of
 datetime components supported by pandas:
 
 .. ipython:: python
 
-    ds['time.season']
-    ds['time'].dt.season
+    ds["time.season"]
+    ds["time"].dt.season
 
 The set of valid seasons consists of 'DJF', 'MAM', 'JJA' and 'SON', labeled by
 the first letters of the corresponding months.
@@ -152,7 +154,7 @@ __ http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
 
 .. ipython:: python
 
-    ds['time'].dt.floor('D')
+    ds["time"].dt.floor("D")
 
 The ``.dt`` accessor can also be used to generate formatted datetime strings
 for arrays utilising the same formatting as the standard `datetime.strftime`_.
@@ -161,7 +163,7 @@ for arrays utilising the same formatting as the standard `datetime.strftime`_.
 
 .. ipython:: python
 
-    ds['time'].dt.strftime('%a, %b %d %H:%M')
+    ds["time"].dt.strftime("%a, %b %d %H:%M")
 
 .. _resampling:
 
@@ -173,9 +175,9 @@ Datetime components couple particularly well with grouped operations (see
 calculate the mean by time of day:
 
 .. ipython:: python
-   :okwarning:
+    :okwarning:
 
-    ds.groupby('time.hour').mean()
+    ds.groupby("time.hour").mean()
 
 For upsampling or downsampling temporal resolutions, xarray offers a
 :py:meth:`~xarray.Dataset.resample` method building on the core functionality
@@ -187,25 +189,25 @@ same api as ``resample`` `in pandas`_.
 For example, we can downsample our dataset from hourly to 6-hourly:
 
 .. ipython:: python
-   :okwarning:
+    :okwarning:
 
-    ds.resample(time='6H')
+    ds.resample(time="6H")
 
 This will create a specialized ``Resample`` object which saves information
 necessary for resampling. All of the reduction methods which work with
 ``Resample`` objects can also be used for resampling:
 
 .. ipython:: python
-   :okwarning:
+    :okwarning:
 
-   ds.resample(time='6H').mean()
+    ds.resample(time="6H").mean()
 
 You can also supply an arbitrary reduction function to aggregate over each
 resampling group:
 
 .. ipython:: python
 
-   ds.resample(time='6H').reduce(np.mean)
+    ds.resample(time="6H").reduce(np.mean)
 
 For upsampling, xarray provides six methods: ``asfreq``, ``ffill``, ``bfill``, ``pad``,
 ``nearest`` and ``interpolate``. ``interpolate`` extends ``scipy.interpolate.interp1d``
@@ -218,7 +220,7 @@ Data that has indices outside of the given ``tolerance`` are set to ``NaN``.
 
 .. ipython:: python
 
-    ds.resample(time='1H').nearest(tolerance='1H')
+    ds.resample(time="1H").nearest(tolerance="1H")
 
 
 For more examples of using grouped operations on a time dimension, see
diff --git a/doc/weather-climate.rst b/doc/weather-climate.rst
index 768cf6556f9..f03dfd14c73 100644
--- a/doc/weather-climate.rst
+++ b/doc/weather-climate.rst
@@ -4,7 +4,7 @@ Weather and climate data
 ========================
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
     import xarray as xr
 
@@ -56,11 +56,14 @@ coordinate with dates from a no-leap calendar and a
 
 .. ipython:: python
 
-   from itertools import product
-   from cftime import DatetimeNoLeap
-   dates = [DatetimeNoLeap(year, month, 1) for year, month in
-            product(range(1, 3), range(1, 13))]
-   da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo')
+    from itertools import product
+    from cftime import DatetimeNoLeap
+
+    dates = [
+        DatetimeNoLeap(year, month, 1)
+        for year, month in product(range(1, 3), range(1, 13))
+    ]
+    da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo")
 
 xarray also includes a :py:func:`~xarray.cftime_range` function, which enables
 creating a :py:class:`~xarray.CFTimeIndex` with regularly-spaced dates.  For
@@ -68,8 +71,17 @@ instance, we can create the same dates and DataArray we created above using:
 
 .. ipython:: python
 
-   dates = xr.cftime_range(start='0001', periods=24, freq='MS', calendar='noleap')
-   da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo')
+    dates = xr.cftime_range(start="0001", periods=24, freq="MS", calendar="noleap")
+    da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo")
+
+Mirroring pandas' method with the same name, :py:meth:`~xarray.infer_freq` allows one to
+infer the sampling frequency of a :py:class:`~xarray.CFTimeIndex` or a 1-D
+:py:class:`~xarray.DataArray` containing cftime objects. It also works transparently with
+``np.datetime64[ns]`` and ``np.timedelta64[ns]`` data.
+
+.. ipython:: python
+
+    xr.infer_freq(dates)
 
 With :py:meth:`~xarray.CFTimeIndex.strftime` we can also easily generate formatted strings from
 the datetime values of a :py:class:`~xarray.CFTimeIndex` directly or through the
@@ -80,8 +92,8 @@ using the same formatting as the standard `datetime.strftime`_ convention .
 
 .. ipython:: python
 
-    dates.strftime('%c')
-    da['time'].dt.strftime('%Y%m%d')
+    dates.strftime("%c")
+    da["time"].dt.strftime("%Y%m%d")
 
 For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports:
 
@@ -90,8 +102,8 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports:
 
 .. ipython:: python
 
-   da.sel(time='0001')
-   da.sel(time=slice('0001-05', '0002-02'))
+    da.sel(time="0001")
+    da.sel(time=slice("0001-05", "0002-02"))
 
 - Access of basic datetime components via the ``dt`` accessor (in this case
   just "year", "month", "day", "hour", "minute", "second", "microsecond",
@@ -99,64 +111,65 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports:
 
 .. ipython:: python
 
-   da.time.dt.year
-   da.time.dt.month
-   da.time.dt.season
-   da.time.dt.dayofyear
-   da.time.dt.dayofweek
-   da.time.dt.days_in_month
+    da.time.dt.year
+    da.time.dt.month
+    da.time.dt.season
+    da.time.dt.dayofyear
+    da.time.dt.dayofweek
+    da.time.dt.days_in_month
 
 - Rounding of datetimes to fixed frequencies via the ``dt`` accessor:
 
 .. ipython:: python
 
-   da.time.dt.ceil('3D')
-   da.time.dt.floor('5D')
-   da.time.dt.round('2D')
+    da.time.dt.ceil("3D")
+    da.time.dt.floor("5D")
+    da.time.dt.round("2D")
    
 - Group-by operations based on datetime accessor attributes (e.g. by month of
   the year):
 
 .. ipython:: python
 
-   da.groupby('time.month').sum()
+    da.groupby("time.month").sum()
 
 - Interpolation using :py:class:`cftime.datetime` objects:
 
 .. ipython:: python
 
-   da.interp(time=[DatetimeNoLeap(1, 1, 15), DatetimeNoLeap(1, 2, 15)])
+    da.interp(time=[DatetimeNoLeap(1, 1, 15), DatetimeNoLeap(1, 2, 15)])
 
 - Interpolation using datetime strings:
 
 .. ipython:: python
 
-   da.interp(time=['0001-01-15', '0001-02-15'])
+    da.interp(time=["0001-01-15", "0001-02-15"])
 
 - Differentiation:
 
 .. ipython:: python
 
-   da.differentiate('time')
+    da.differentiate("time")
 
 - Serialization:
 
 .. ipython:: python
 
-   da.to_netcdf('example-no-leap.nc')
-   xr.open_dataset('example-no-leap.nc')
+    da.to_netcdf("example-no-leap.nc")
+    xr.open_dataset("example-no-leap.nc")
 
 .. ipython:: python
     :suppress:
 
     import os
-    os.remove('example-no-leap.nc')
+
+    os.remove("example-no-leap.nc")
 
 - And resampling along the time dimension for data indexed by a :py:class:`~xarray.CFTimeIndex`:
 
 .. ipython:: python
 
-    da.resample(time='81T', closed='right', label='right', base=3).mean()
+    da.resample(time="81T", closed="right", label="right", base=3).mean()
 
 .. note::
 
@@ -168,13 +181,13 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports:
    method:
 
    .. ipython:: python
-      :okwarning:
+       :okwarning:
 
-       modern_times = xr.cftime_range('2000', periods=24, freq='MS', calendar='noleap')
-       da = xr.DataArray(range(24), [('time', modern_times)])
+       modern_times = xr.cftime_range("2000", periods=24, freq="MS", calendar="noleap")
+       da = xr.DataArray(range(24), [("time", modern_times)])
        da
-       datetimeindex = da.indexes['time'].to_datetimeindex()
-       da['time'] = datetimeindex
+       datetimeindex = da.indexes["time"].to_datetimeindex()
+       da["time"] = datetimeindex
 
    However in this case one should use caution to only perform operations which
    do not depend on differences between dates (e.g. differentiation,
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index fc95e26dabd..086cddee0a0 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -4,13 +4,14 @@ What's New
 ==========
 
 .. ipython:: python
-   :suppress:
+    :suppress:
 
     import numpy as np
     import pandas as pd
     import xarray as xray
     import xarray
     import xarray as xr
+
     np.random.seed(123456)
 
 .. _whats-new.0.16.0:
@@ -20,16 +21,54 @@ v0.16.0 (unreleased)
 
 Breaking changes
 ~~~~~~~~~~~~~~~~
+
+- ``groupby`` operations will restore coord dimension order. Pass ``restore_coord_dims=False``
+  to revert to previous behavior.
+- :meth:`DataArray.transpose` will now transpose coordinates by default.
+  Pass ``transpose_coords=False`` to revert to previous behaviour.
+  By `Maximilian Roos <https://github.com/max-sixty>`_
 - Alternate draw styles for :py:meth:`plot.step` must be passed using the
   ``drawstyle`` (or ``ds``) keyword argument, instead of the ``linestyle`` (or
   ``ls``) keyword argument, in line with the `upstream change in Matplotlib
   <https://matplotlib.org/api/prev_api_changes/api_changes_3.1.0.html#passing-a-line2d-s-drawstyle-together-with-the-linestyle-is-deprecated>`_.
   (:pull:`3274`)
   By `Elliott Sales de Andrade <https://github.com/QuLogic>`_
+- The old :py:func:`auto_combine` function has now been removed in
+  favour of the :py:func:`combine_by_coords` and
+  :py:func:`combine_nested` functions. This also means that
+  the default behaviour of :py:func:`open_mfdataset` has changed to use
+  ``combine='by_coords'`` as the default argument value. (:issue:`2616`, :pull:`3926`)
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
+- The ``DataArray`` and ``Variable`` HTML reprs now expand the data section by
+  default (:issue:`4176`)
+  By `Stephan Hoyer <https://github.com/shoyer>`_.
+
+Enhancements
+~~~~~~~~~~~~
+- Performance improvement of :py:meth:`DataArray.interp` and :py:func:`Dataset.interp`
+  For orthogonal linear- and nearest-neighbor interpolation, we do 1d-interpolation sequentially
+  rather than interpolating in multidimensional space. (:issue:`2223`)
+  By `Keisuke Fujii <https://github.com/fujiisoup>`_.
+- :py:meth:`DataArray.reset_index` and :py:meth:`Dataset.reset_index` now keep
+  coordinate attributes (:pull:`4103`). By `Oriol Abril <https://github.com/OriolAbril>`_.
 
 New Features
 ~~~~~~~~~~~~
-- Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`)
+- :py:meth:`DataArray.argmin` and :py:meth:`DataArray.argmax` now support
+  sequences of 'dim' arguments, and if a sequence is passed return a dict
+  (which can be passed to :py:meth:`isel` to get the value of the minimum) of
+  the indices for each dimension of the minimum or maximum of a DataArray.
+  (:pull:`3936`)
+  By `John Omotani <https://github.com/johnomotani>`_, thanks to `Keisuke Fujii
+  <https://github.com/fujiisoup>`_ for work in :pull:`1469`.
+- Added :py:meth:`xarray.infer_freq` for extending frequency inferring to CFTime indexes and data (:pull:`4033`).
+  By `Pascal Bourgault <https://github.com/aulemahal>`_.
+- ``chunks='auto'`` is now supported in the ``chunks`` argument of
+  :py:meth:`Dataset.chunk`. (:issue:`4055`)
+  By `Andrew Williams <https://github.com/AndrewWilliams3142>`_
+- Added :py:func:`xarray.cov` and :py:func:`xarray.corr` (:issue:`3784`, :pull:`3550`, :pull:`4089`).
+  By `Andrew Williams <https://github.com/AndrewWilliams3142>`_ and `Robin Beer <https://github.com/r-beer>`_.
+- Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`, :pull:`3733`, :pull:`4099`)
   By `Pascal Bourgault <https://github.com/aulemahal>`_.
 - Control over attributes of result in :py:func:`merge`, :py:func:`concat`,
   :py:func:`combine_by_coords` and :py:func:`combine_nested` using
@@ -43,11 +82,24 @@ New Features
 - Limited the length of array items with long string reprs to a
   reasonable width (:pull:`3900`)
   By `Maximilian Roos <https://github.com/max-sixty>`_
+- Limited the number of lines of large arrays when numpy reprs would have greater than 40.
+  (:pull:`3905`)
+  By `Maximilian Roos <https://github.com/max-sixty>`_
 - Implement :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`,
   :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`.  (:issue:`60`, :pull:`3871`)
   By `Todd Jennings <https://github.com/toddrjen>`_
+- Support dask handling for :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`,
+  :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`.  (:pull:`3922`, :pull:`4135`)
+  By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_ and `Pascal Bourgault <https://github.com/aulemahal>`_.
+- More support for unit aware arrays with pint (:pull:`3643`, :pull:`3975`)
+  By `Justus Magin <https://github.com/keewis>`_.
+- Support overriding existing variables in ``to_zarr()`` with ``mode='a'`` even
+  without ``append_dim``, as long as dimension sizes do not change.
+  By `Stephan Hoyer <https://github.com/shoyer>`_.
 - Allow plotting of boolean arrays. (:pull:`3766`)
   By `Marek Jacob <https://github.com/MeraX>`_
+- Enable using MultiIndex levels as cordinates in 1D and 2D plots (:issue:`3927`).
+  By `Mathias Hauser <https://github.com/mathause>`_.
 - A ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex`, analogous to
   the ``days_in_month`` accessor for a :py:class:`pandas.DatetimeIndex`, which
   returns the days in the month each datetime in the index.  Now days in month
@@ -55,9 +107,32 @@ New Features
   the :py:class:`~core.accessor_dt.DatetimeAccessor` (:pull:`3935`).  This
   feature requires cftime version 1.1.0 or greater.  By
   `Spencer Clark <https://github.com/spencerkclark>`_.
+- For the netCDF3 backend, added dtype coercions for unsigned integer types.
+  (:issue:`4014`, :pull:`4018`)
+  By `Yunus Sevinchan <https://github.com/blsqr>`_
+- :py:meth:`map_blocks` now accepts a ``template`` kwarg. This allows use cases
+  where the result of a computation could not be inferred automatically.
+  By `Deepak Cherian <https://github.com/dcherian>`_
+- :py:meth:`map_blocks` can now handle dask-backed xarray objects in ``args``. (:pull:`3818`)
+  By `Deepak Cherian <https://github.com/dcherian>`_
+- Add keyword ``decode_timedelta`` to :py:func:`xarray.open_dataset`,
+  (:py:func:`xarray.open_dataarray`, :py:func:`xarray.open_dataarray`,
+  :py:func:`xarray.decode_cf`) that allows to disable/enable the decoding of timedeltas
+  independently of time decoding (:issue:`1621`)
+  `Aureliana Barghini <https://github.com/aurghs>`
 
 Bug fixes
 ~~~~~~~~~
+- Fix errors combining attrs in :py:func:`open_mfdataset` (:issue:`4009`, :pull:`4173`)
+  By `John Omotani <https://github.com/johnomotani>`_
+- If groupby receives a ``DataArray`` with name=None, assign a default name (:issue:`158`)
+  By `Phil Butcher <https://github.com/pjbutcher>`_.
+- Support dark mode in VS code (:issue:`4024`)
+  By `Keisuke Fujii <https://github.com/fujiisoup>`_.
+- Fix bug when converting multiindexed Pandas objects to sparse xarray objects. (:issue:`4019`)
+  By `Deepak Cherian <https://github.com/dcherian>`_.
+- ``ValueError`` is raised when ``fill_value`` is not a scalar in :py:meth:`full_like`. (:issue:`3977`)
+  By `Huite Bootsma <https://github.com/huite>`_.
 - Fix wrong order in converting a ``pd.Series`` with a MultiIndex to ``DataArray``. (:issue:`3951`)
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 - Fix renaming of coords when one or more stacked coords is not in
@@ -72,15 +147,28 @@ Bug fixes
   By `Deepak Cherian <https://github.com/dcherian>`_
 - Fix :py:class:`~xarray.plot.FacetGrid` when ``vmin == vmax``. (:issue:`3734`)
   By `Deepak Cherian <https://github.com/dcherian>`_
+- Fix plotting when ``levels`` is a scalar and ``norm`` is provided. (:issue:`3735`)
+  By `Deepak Cherian <https://github.com/dcherian>`_
 - Fix bug where plotting line plots with 2D coordinates depended on dimension
   order. (:issue:`3933`)
   By `Tom Nicholas <https://github.com/TomNicholas>`_.
 - Fix ``RasterioDeprecationWarning`` when using a ``vrt`` in ``open_rasterio``. (:issue:`3964`)
   By `Taher Chegini <https://github.com/cheginit>`_.
+- Fix ``AttributeError`` on displaying a :py:class:`Variable`
+  in a notebook context. (:issue:`3972`, :pull:`3973`)
+  By `Ian Castleden <https://github.com/arabidopsis>`_.
 - Fix bug causing :py:meth:`DataArray.interpolate_na` to always drop attributes,
   and added `keep_attrs` argument. (:issue:`3968`)
   By `Tom Nicholas <https://github.com/TomNicholas>`_.
-
+- Fix bug in time parsing failing to fall back to cftime. This was causing time
+  variables with a time unit of `'msecs'` to fail to parse. (:pull:`3998`)
+  By `Ryan May <https://github.com/dopplershift>`_.
+- Fix weighted mean when passing boolean weights (:issue:`4074`).
+  By `Mathias Hauser <https://github.com/mathause>`_.
+- Fix html repr in untrusted notebooks: fallback to plain text repr. (:pull:`4053`)
+  By `Benoit Bovy <https://github.com/benbovy>`_.
+- Fix :py:func:`open_rasterio` for ``WarpedVRT`` with specified ``src_crs``. (:pull:`4104`)
+  By `Dave Cole <https://github.com/dtpc>`_.
 
 Documentation
 ~~~~~~~~~~~~~
@@ -102,6 +190,14 @@ Documentation
   of ``kwargs`` in :py:meth:`Dataset.interp` and :py:meth:`DataArray.interp`
   for 1-d and n-d interpolation (:pull:`3956`).
   By `Matthias Riße <https://github.com/risebell>`_.
+- Apply ``black`` to all the code in the documentation (:pull:`4012`)
+  By `Justus Magin <https://github.com/keewis>`_.
+- Narrative documentation now describes :py:meth:`map_blocks`: :ref:`dask.automatic-parallelization`.
+  By `Deepak Cherian <https://github.com/dcherian>`_.
+- Document ``.plot``, ``.dt``, ``.str`` accessors the way they are called. (:issue:`3625`, :pull:`3988`)
+  By `Justus Magin <https://github.com/keewis>`_.
+- Add documentation for the parameters and return values of :py:meth:`DataArray.sel`.
+  By `Justus Magin <https://github.com/keewis>`_.
 
 Internal Changes
 ~~~~~~~~~~~~~~~~
@@ -110,12 +206,17 @@ Internal Changes
 - Run the ``isort`` pre-commit hook only on python source files
   and update the ``flake8`` version. (:issue:`3750`, :pull:`3711`)
   By `Justus Magin <https://github.com/keewis>`_.
+- Add `blackdoc <https://blackdoc.readthedocs.io>`_ to the list of
+  checkers for development. (:pull:`4177`)
+  By `Justus Magin <https://github.com/keewis>`_.
 - Add a CI job that runs the tests with every optional dependency
   except ``dask``. (:issue:`3794`, :pull:`3919`)
   By `Justus Magin <https://github.com/keewis>`_.
 - Use ``async`` / ``await`` for the asynchronous distributed
   tests. (:issue:`3987`, :pull:`3989`)
   By `Justus Magin <https://github.com/keewis>`_.
+- Various internal code clean-ups (:pull:`4026`,  :pull:`4038`).
+  By `Prajjwal Nijhara <https://github.com/pnijhara>`_.
 
 .. _whats-new.0.15.1:
 
@@ -140,7 +241,7 @@ New Features
 
 - Weighted array reductions are now supported via the new :py:meth:`DataArray.weighted`
   and :py:meth:`Dataset.weighted` methods. See :ref:`comput.weighted`. (:issue:`422`, :pull:`2922`).
-  By `Mathias Hauser <https://github.com/mathause>`_
+  By `Mathias Hauser <https://github.com/mathause>`_.
 - The new jupyter notebook repr (``Dataset._repr_html_`` and
   ``DataArray._repr_html_``) (introduced in 0.14.1) is now on by default. To
   disable, use ``xarray.set_options(display_style="text")``.
@@ -174,6 +275,8 @@ New Features
   :py:meth:`core.groupby.DatasetGroupBy.quantile`, :py:meth:`core.groupby.DataArrayGroupBy.quantile`
   (:issue:`3843`, :pull:`3844`)
   By `Aaron Spring <https://github.com/aaronspring>`_.
+- Add a diff summary for `testing.assert_allclose`. (:issue:`3617`, :pull:`3847`)
+  By `Justus Magin <https://github.com/keewis>`_.
 
 Bug fixes
 ~~~~~~~~~
@@ -1958,8 +2061,8 @@ Enhancements
 
   .. ipython:: python
 
-    ds = xr.Dataset({'a': 1})
-    np.sin(ds)
+      ds = xr.Dataset({"a": 1})
+      np.sin(ds)
 
   This obliviates the need for the ``xarray.ufuncs`` module, which will be
   deprecated in the future when xarray drops support for older versions of
@@ -2050,8 +2153,8 @@ Enhancements
 
   .. ipython:: python
 
-    da = xr.DataArray(np.array([True, False, np.nan], dtype=object), dims='x')
-    da.sum()
+      da = xr.DataArray(np.array([True, False, np.nan], dtype=object), dims="x")
+      da.sum()
 
   (:issue:`1866`)
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
@@ -2205,7 +2308,7 @@ Breaking changes
   .. ipython::
     :verbatim:
 
-    In [1]: ds.resample('24H', dim='time', how='max')
+    In [1]: ds.resample("24H", dim="time", how="max")
     Out[1]:
     <xarray.Dataset>
     [...]
@@ -2215,7 +2318,7 @@ Breaking changes
   .. ipython::
     :verbatim:
 
-    In [1]: ds.resample(time='24H').max()
+    In [1]: ds.resample(time="24H").max()
     Out[1]:
     <xarray.Dataset>
     [...]
@@ -2285,9 +2388,9 @@ Enhancements
 
     In [1]: import xarray as xr
 
-    In [2]: arr = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=('x', 'y'))
+    In [2]: arr = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=("x", "y"))
 
-    In [3]: xr.where(arr % 2, 'even', 'odd')
+    In [3]: xr.where(arr % 2, "even", "odd")
     Out[3]:
     <xarray.DataArray (x: 2, y: 3)>
     array([['even', 'odd', 'even'],
@@ -2808,7 +2911,7 @@ Breaking changes
   .. ipython::
     :verbatim:
 
-    In [1]: xr.Dataset({'foo': (('x', 'y'), [[1, 2]])})
+    In [1]: xr.Dataset({"foo": (("x", "y"), [[1, 2]])})
     Out[1]:
     <xarray.Dataset>
     Dimensions:  (x: 1, y: 2)
@@ -3265,10 +3368,10 @@ Enhancements
   .. ipython::
     :verbatim:
 
-    In [1]: import xarray as xr; import numpy as np
+    In [1]: import xarray as xr
+       ...: import numpy as np
 
-    In [2]: arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5),
-                               dims=('x', 'y'))
+    In [2]: arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), dims=("x", "y"))
 
     In [3]: arr
     Out[3]:
@@ -3407,7 +3510,7 @@ Breaking changes
   .. ipython::
     :verbatim:
 
-    In [2]: xray.DataArray([4, 5, 6], dims='x', name='x')
+    In [2]: xray.DataArray([4, 5, 6], dims="x", name="x")
     Out[2]:
     <xray.DataArray 'x' (x: 3)>
     array([4, 5, 6])
@@ -3419,7 +3522,7 @@ Breaking changes
   .. ipython::
     :verbatim:
 
-    In [2]: xray.DataArray([4, 5, 6], dims='x', name='x')
+    In [2]: xray.DataArray([4, 5, 6], dims="x", name="x")
     Out[2]:
     <xray.DataArray 'x' (x: 3)>
     array([4, 5, 6])
@@ -3442,13 +3545,11 @@ Enhancements
   .. ipython::
     :verbatim:
 
-    In [7]: df = pd.DataFrame({'foo': range(3),
-       ...:                    'x': ['a', 'b', 'b'],
-       ...:                    'y': [0, 0, 1]})
+    In [7]: df = pd.DataFrame({"foo": range(3), "x": ["a", "b", "b"], "y": [0, 0, 1]})
 
-    In [8]: s = df.set_index(['x', 'y'])['foo']
+    In [8]: s = df.set_index(["x", "y"])["foo"]
 
-    In [12]: arr = xray.DataArray(s, dims='z')
+    In [12]: arr = xray.DataArray(s, dims="z")
 
     In [13]: arr
     Out[13]:
@@ -3457,13 +3558,13 @@ Enhancements
     Coordinates:
       * z        (z) object ('a', 0) ('b', 0) ('b', 1)
 
-    In [19]: arr.indexes['z']
+    In [19]: arr.indexes["z"]
     Out[19]:
     MultiIndex(levels=[[u'a', u'b'], [0, 1]],
                labels=[[0, 1, 1], [0, 0, 1]],
                names=[u'x', u'y'])
 
-    In [14]: arr.unstack('z')
+    In [14]: arr.unstack("z")
     Out[14]:
     <xray.DataArray 'foo' (x: 2, y: 2)>
     array([[  0.,  nan],
@@ -3472,7 +3573,7 @@ Enhancements
       * x        (x) object 'a' 'b'
       * y        (y) int64 0 1
 
-    In [26]: arr.unstack('z').stack(z=('x', 'y'))
+    In [26]: arr.unstack("z").stack(z=("x", "y"))
     Out[26]:
     <xray.DataArray 'foo' (z: 4)>
     array([  0.,  nan,   1.,   2.])
@@ -3500,9 +3601,9 @@ Enhancements
   for shifting/rotating datasets or arrays along a dimension:
 
   .. ipython:: python
-     :okwarning:
+      :okwarning:
 
-      array = xray.DataArray([5, 6, 7, 8], dims='x')
+      array = xray.DataArray([5, 6, 7, 8], dims="x")
       array.shift(x=2)
       array.roll(x=2)
 
@@ -3517,8 +3618,8 @@ Enhancements
 
   .. ipython:: python
 
-      a = xray.DataArray([1, 2, 3], dims='x')
-      b = xray.DataArray([5, 6], dims='y')
+      a = xray.DataArray([1, 2, 3], dims="x")
+      b = xray.DataArray([5, 6], dims="y")
       a
       b
       a2, b2 = xray.broadcast(a, b)
@@ -3588,9 +3689,9 @@ Enhancements
   .. ipython::
     :verbatim:
 
-    In [5]: array = xray.DataArray([1, 2, 3], dims='x')
+    In [5]: array = xray.DataArray([1, 2, 3], dims="x")
 
-    In [6]: array.reindex(x=[0.9, 1.5], method='nearest', tolerance=0.2)
+    In [6]: array.reindex(x=[0.9, 1.5], method="nearest", tolerance=0.2)
     Out[6]:
     <xray.DataArray (x: 2)>
     array([  2.,  nan])
@@ -3670,10 +3771,11 @@ Enhancements
   .. ipython::
     :verbatim:
 
-    In [1]: da = xray.DataArray(np.arange(56).reshape((7, 8)),
-       ...:                     coords={'x': list('abcdefg'),
-       ...:                             'y': 10 * np.arange(8)},
-       ...:                     dims=['x', 'y'])
+    In [1]: da = xray.DataArray(
+       ...:     np.arange(56).reshape((7, 8)),
+       ...:     coords={"x": list("abcdefg"), "y": 10 * np.arange(8)},
+       ...:     dims=["x", "y"],
+       ...: )
 
     In [2]: da
     Out[2]:
@@ -3690,7 +3792,7 @@ Enhancements
     * x        (x) |S1 'a' 'b' 'c' 'd' 'e' 'f' 'g'
 
     # we can index by position along each dimension
-    In [3]: da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim='points')
+    In [3]: da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim="points")
     Out[3]:
     <xray.DataArray (points: 3)>
     array([ 0,  9, 48])
@@ -3700,7 +3802,7 @@ Enhancements
       * points   (points) int64 0 1 2
 
     # or equivalently by label
-    In [9]: da.sel_points(x=['a', 'b', 'g'], y=[0, 10, 0], dim='points')
+    In [9]: da.sel_points(x=["a", "b", "g"], y=[0, 10, 0], dim="points")
     Out[9]:
     <xray.DataArray (points: 3)>
     array([ 0,  9, 48])
@@ -3714,11 +3816,11 @@ Enhancements
 
   .. ipython:: python
 
-    ds = xray.Dataset(coords={'x': range(100), 'y': range(100)})
-    ds['distance'] = np.sqrt(ds.x ** 2 + ds.y ** 2)
+      ds = xray.Dataset(coords={"x": range(100), "y": range(100)})
+      ds["distance"] = np.sqrt(ds.x ** 2 + ds.y ** 2)
 
-    @savefig where_example.png width=4in height=4in
-    ds.distance.where(ds.distance < 100).plot()
+      @savefig where_example.png width=4in height=4in
+      ds.distance.where(ds.distance < 100).plot()
 
 - Added new methods ``xray.DataArray.diff`` and ``xray.Dataset.diff``
   for finite difference calculations along a given axis.
@@ -3728,9 +3830,9 @@ Enhancements
 
   .. ipython:: python
 
-    da = xray.DataArray(np.random.random_sample(size=(5, 4)))
-    da.where(da < 0.5)
-    da.where(da < 0.5).to_masked_array(copy=True)
+      da = xray.DataArray(np.random.random_sample(size=(5, 4)))
+      da.where(da < 0.5)
+      da.where(da < 0.5).to_masked_array(copy=True)
 
 - Added new flag "drop_variables" to ``xray.open_dataset`` for
   excluding variables from being parsed. This may be useful to drop
@@ -3788,9 +3890,9 @@ Enhancements
   .. ipython::
     :verbatim:
 
-    In [1]: years, datasets = zip(*ds.groupby('time.year'))
+    In [1]: years, datasets = zip(*ds.groupby("time.year"))
 
-    In [2]: paths = ['%s.nc' % y for y in years]
+    In [2]: paths = ["%s.nc" % y for y in years]
 
     In [3]: xray.save_mfdataset(datasets, paths)
 
@@ -3863,9 +3965,9 @@ Backwards incompatible changes
   .. ipython::
     :verbatim:
 
-    In [1]: ds = xray.Dataset({'x': 0})
+    In [1]: ds = xray.Dataset({"x": 0})
 
-    In [2]: xray.concat([ds, ds], dim='y')
+    In [2]: xray.concat([ds, ds], dim="y")
     Out[2]:
     <xray.Dataset>
     Dimensions:  ()
@@ -3877,13 +3979,13 @@ Backwards incompatible changes
   Now, the default always concatenates data variables:
 
   .. ipython:: python
-    :suppress:
+      :suppress:
 
-    ds = xray.Dataset({'x': 0})
+      ds = xray.Dataset({"x": 0})
 
   .. ipython:: python
 
-    xray.concat([ds, ds], dim='y')
+      xray.concat([ds, ds], dim="y")
 
   To obtain the old behavior, supply the argument ``concat_over=[]``.
 
@@ -3896,17 +3998,20 @@ Enhancements
 
   .. ipython:: python
 
-      ds = xray.Dataset({'a': 1, 'b': ('x', [1, 2, 3])},
-                        coords={'c': 42}, attrs={'Conventions': 'None'})
+      ds = xray.Dataset(
+          {"a": 1, "b": ("x", [1, 2, 3])},
+          coords={"c": 42},
+          attrs={"Conventions": "None"},
+      )
       ds.to_array()
-      ds.to_array().to_dataset(dim='variable')
+      ds.to_array().to_dataset(dim="variable")
 
 - New ``xray.Dataset.fillna`` method to fill missing values, modeled
   off the pandas method of the same name:
 
   .. ipython:: python
 
-      array = xray.DataArray([np.nan, 1, np.nan, 3], dims='x')
+      array = xray.DataArray([np.nan, 1, np.nan, 3], dims="x")
       array.fillna(0)
 
   ``fillna`` works on both ``Dataset`` and ``DataArray`` objects, and uses
@@ -3919,9 +4024,9 @@ Enhancements
 
   .. ipython:: python
 
-      ds = xray.Dataset({'y': ('x', [1, 2, 3])})
-      ds.assign(z = lambda ds: ds.y ** 2)
-      ds.assign_coords(z = ('x', ['a', 'b', 'c']))
+      ds = xray.Dataset({"y": ("x", [1, 2, 3])})
+      ds.assign(z=lambda ds: ds.y ** 2)
+      ds.assign_coords(z=("x", ["a", "b", "c"]))
 
   These methods return a new Dataset (or DataArray) with updated data or
   coordinate variables.
@@ -3934,7 +4039,7 @@ Enhancements
   .. ipython::
       :verbatim:
 
-      In [12]: ds.sel(x=1.1, method='nearest')
+      In [12]: ds.sel(x=1.1, method="nearest")
       Out[12]:
       <xray.Dataset>
       Dimensions:  ()
@@ -3943,7 +4048,7 @@ Enhancements
       Data variables:
           y        int64 2
 
-      In [13]: ds.sel(x=[1.1, 2.1], method='pad')
+      In [13]: ds.sel(x=[1.1, 2.1], method="pad")
       Out[13]:
       <xray.Dataset>
       Dimensions:  (x: 2)
@@ -3969,7 +4074,7 @@ Enhancements
 
   .. ipython:: python
 
-      ds = xray.Dataset({'x': np.arange(1000)})
+      ds = xray.Dataset({"x": np.arange(1000)})
       with xray.set_options(display_width=40):
           print(ds)
 
@@ -4007,42 +4112,42 @@ Enhancements
   need to supply the time dimension explicitly:
 
   .. ipython:: python
-     :verbatim:
+      :verbatim:
 
-      time = pd.date_range('2000-01-01', freq='6H', periods=10)
-      array = xray.DataArray(np.arange(10), [('time', time)])
-      array.resample('1D', dim='time')
+      time = pd.date_range("2000-01-01", freq="6H", periods=10)
+      array = xray.DataArray(np.arange(10), [("time", time)])
+      array.resample("1D", dim="time")
 
   You can specify how to do the resampling with the ``how`` argument and other
   options such as ``closed`` and ``label`` let you control labeling:
 
   .. ipython:: python
-     :verbatim:
+      :verbatim:
 
-      array.resample('1D', dim='time', how='sum', label='right')
+      array.resample("1D", dim="time", how="sum", label="right")
 
   If the desired temporal resolution is higher than the original data
   (upsampling), xray will insert missing values:
 
   .. ipython:: python
-     :verbatim:
+      :verbatim:
 
-      array.resample('3H', 'time')
+      array.resample("3H", "time")
 
 - ``first`` and ``last`` methods on groupby objects let you take the first or
   last examples from each group along the grouped axis:
 
   .. ipython:: python
-     :verbatim:
+      :verbatim:
 
-      array.groupby('time.day').first()
+      array.groupby("time.day").first()
 
   These methods combine well with ``resample``:
 
   .. ipython:: python
-     :verbatim:
+      :verbatim:
 
-      array.resample('1D', dim='time', how='first')
+      array.resample("1D", dim="time", how="first")
 
 
 - ``xray.Dataset.swap_dims`` allows for easily swapping one dimension
@@ -4050,9 +4155,9 @@ Enhancements
 
   .. ipython:: python
 
-       ds = xray.Dataset({'x': range(3), 'y': ('x', list('abc'))})
-       ds
-       ds.swap_dims({'x': 'y'})
+      ds = xray.Dataset({"x": range(3), "y": ("x", list("abc"))})
+      ds
+      ds.swap_dims({"x": "y"})
 
   This was possible in earlier versions of xray, but required some contortions.
 - ``xray.open_dataset`` and ``xray.Dataset.to_netcdf`` now
@@ -4098,8 +4203,8 @@ Breaking changes
 
   .. ipython:: python
 
-      lhs = xray.DataArray([1, 2, 3], [('x', [0, 1, 2])])
-      rhs = xray.DataArray([2, 3, 4], [('x', [1, 2, 3])])
+      lhs = xray.DataArray([1, 2, 3], [("x", [0, 1, 2])])
+      rhs = xray.DataArray([2, 3, 4], [("x", [1, 2, 3])])
       lhs + rhs
 
   :ref:`For dataset construction and merging<merge>`, we align based on the
@@ -4107,14 +4212,14 @@ Breaking changes
 
   .. ipython:: python
 
-      xray.Dataset({'foo': lhs, 'bar': rhs})
+      xray.Dataset({"foo": lhs, "bar": rhs})
 
   :ref:`For update and __setitem__<update>`, we align based on the **original**
   object:
 
   .. ipython:: python
 
-      lhs.coords['rhs'] = rhs
+      lhs.coords["rhs"] = rhs
       lhs
 
 - Aggregations like ``mean`` or ``median`` now skip missing values by default:
@@ -4137,8 +4242,8 @@ Breaking changes
 
   .. ipython:: python
 
-      a = xray.DataArray([1, 2], coords={'c': 0}, dims='x')
-      b = xray.DataArray([1, 2], coords={'c': ('x', [0, 0])}, dims='x')
+      a = xray.DataArray([1, 2], coords={"c": 0}, dims="x")
+      b = xray.DataArray([1, 2], coords={"c": ("x", [0, 0])}, dims="x")
       (a + b).coords
 
   This functionality can be controlled through the ``compat`` option, which
@@ -4149,9 +4254,10 @@ Breaking changes
 
   .. ipython:: python
 
-      time = xray.DataArray(pd.date_range('2000-01-01', periods=365),
-                            dims='time', name='time')
-      counts = time.groupby('time.month').count()
+      time = xray.DataArray(
+          pd.date_range("2000-01-01", periods=365), dims="time", name="time"
+      )
+      counts = time.groupby("time.month").count()
       counts.sel(month=2)
 
   Previously, you would need to use something like
@@ -4161,8 +4267,8 @@ Breaking changes
 
   .. ipython:: python
 
-      ds = xray.Dataset({'t': pd.date_range('2000-01-01', periods=12, freq='M')})
-      ds['t.season']
+      ds = xray.Dataset({"t": pd.date_range("2000-01-01", periods=12, freq="M")})
+      ds["t.season"]
 
   Previously, it returned numbered seasons 1 through 4.
 - We have updated our use of the terms of "coordinates" and "variables". What
@@ -4185,8 +4291,8 @@ Enhancements
 
   .. ipython:: python
 
-      data = xray.DataArray([1, 2, 3], [('x', range(3))])
-      data.reindex(x=[0.5, 1, 1.5, 2, 2.5], method='pad')
+      data = xray.DataArray([1, 2, 3], [("x", range(3))])
+      data.reindex(x=[0.5, 1, 1.5, 2, 2.5], method="pad")
 
   This will be especially useful once pandas 0.16 is released, at which point
   xray will immediately support reindexing with
@@ -4205,15 +4311,15 @@ Enhancements
   makes it easy to drop explicitly listed variables or index labels:
 
   .. ipython:: python
-     :okwarning:
+      :okwarning:
 
       # drop variables
-      ds = xray.Dataset({'x': 0, 'y': 1})
-      ds.drop('x')
+      ds = xray.Dataset({"x": 0, "y": 1})
+      ds.drop("x")
 
       # drop index labels
-      arr = xray.DataArray([1, 2, 3], coords=[('x', list('abc'))])
-      arr.drop(['a', 'c'], dim='x')
+      arr = xray.DataArray([1, 2, 3], coords=[("x", list("abc"))])
+      arr.drop(["a", "c"], dim="x")
 
 - ``xray.Dataset.broadcast_equals`` has been added to correspond to
   the new ``compat`` option.
@@ -4281,7 +4387,8 @@ Backwards incompatible changes
   .. ipython:: python
 
       from datetime import datetime
-      xray.Dataset({'t': [datetime(2000, 1, 1)]})
+
+      xray.Dataset({"t": [datetime(2000, 1, 1)]})
 
 - xray now has support (including serialization to netCDF) for
   :py:class:`~pandas.TimedeltaIndex`. :py:class:`datetime.timedelta` objects
@@ -4297,8 +4404,8 @@ Enhancements
 
   .. ipython:: python
 
-     ds = xray.Dataset({'tmin': ([], 25, {'units': 'celsius'})})
-     ds.tmin.units
+      ds = xray.Dataset({"tmin": ([], 25, {"units": "celsius"})})
+      ds.tmin.units
 
   Tab-completion for these variables should work in editors such as IPython.
   However, setting variables or attributes in this fashion is not yet
@@ -4308,7 +4415,7 @@ Enhancements
 
   .. ipython:: python
 
-      array = xray.DataArray(np.zeros(5), dims=['x'])
+      array = xray.DataArray(np.zeros(5), dims=["x"])
       array[dict(x=slice(3))] = 1
       array
 
diff --git a/xarray/__init__.py b/xarray/__init__.py
index 0fead57e5fb..3886edc60e6 100644
--- a/xarray/__init__.py
+++ b/xarray/__init__.py
@@ -13,11 +13,12 @@
 from .backends.zarr import open_zarr
 from .coding.cftime_offsets import cftime_range
 from .coding.cftimeindex import CFTimeIndex
+from .coding.frequencies import infer_freq
 from .conventions import SerializationWarning, decode_cf
 from .core.alignment import align, broadcast
-from .core.combine import auto_combine, combine_by_coords, combine_nested
+from .core.combine import combine_by_coords, combine_nested
 from .core.common import ALL_DIMS, full_like, ones_like, zeros_like
-from .core.computation import apply_ufunc, dot, polyval, where
+from .core.computation import apply_ufunc, corr, cov, dot, polyval, where
 from .core.concat import concat
 from .core.dataarray import DataArray
 from .core.dataset import Dataset
@@ -46,7 +47,6 @@
     "align",
     "apply_ufunc",
     "as_variable",
-    "auto_combine",
     "broadcast",
     "cftime_range",
     "combine_by_coords",
@@ -54,7 +54,10 @@
     "concat",
     "decode_cf",
     "dot",
+    "cov",
+    "corr",
     "full_like",
+    "infer_freq",
     "load_dataarray",
     "load_dataset",
     "map_blocks",
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 7e81870d653..610731568df 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -5,7 +5,6 @@
 from io import BytesIO
 from numbers import Number
 from pathlib import Path
-from textwrap import dedent
 from typing import (
     TYPE_CHECKING,
     Callable,
@@ -24,7 +23,6 @@
 from ..core.combine import (
     _infer_concat_order_from_positions,
     _nested_combine,
-    auto_combine,
     combine_by_coords,
 )
 from ..core.dataarray import DataArray
@@ -304,6 +302,7 @@ def open_dataset(
     drop_variables=None,
     backend_kwargs=None,
     use_cftime=None,
+    decode_timedelta=None,
 ):
     """Open and decode a dataset from a file or file-like object.
 
@@ -384,9 +383,11 @@ def open_dataset(
         represented using ``np.datetime64[ns]`` objects.  If False, always
         decode times to ``np.datetime64[ns]`` objects; if this is not possible
         raise an error.
-    overwrite_encoded_chunks: bool, optional
-        Whether to drop the zarr chunks encoded for each variable when a
-        dataset is loaded with specified chunk sizes (default: False)
+    decode_timedelta : bool, optional
+        If True, decode variables and coordinates with time units in
+        {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'}
+        into timedelta objects. If False, leave them encoded as numbers.
+        If None (default), assume the same value of decode_time.
 
 
     Returns
@@ -442,6 +443,7 @@ def open_dataset(
         decode_times = False
         concat_characters = False
         decode_coords = False
+        decode_timedelta = False
 
     if cache is None:
         cache = chunks is None
@@ -458,6 +460,7 @@ def maybe_decode_store(store, lock=False):
             decode_coords=decode_coords,
             drop_variables=drop_variables,
             use_cftime=use_cftime,
+            decode_timedelta=decode_timedelta,
         )
 
         _protect_dataset_variables_inplace(ds, cache)
@@ -470,16 +473,14 @@ def maybe_decode_store(store, lock=False):
     if isinstance(filename_or_obj, AbstractDataStore):
         store = filename_or_obj
 
-    if isinstance(filename_or_obj, MutableMapping):
-        if engine == 'zarr':
-            # on ZarrStore, mode='r', synchronizer=None, group=None,
-            # consolidated=False.
-            overwrite_encoded_chunks = backend_kwargs.pop("overwrite_encoded_chunks", None)
-            store = backends.ZarrStore.open_group(
-                filename_or_obj,
-                group=group,
-                **backend_kwargs
-            )
+    if isinstance(filename_or_obj, MutableMapping) and engine == "zarr":
+        # on ZarrStore, mode='r', synchronizer=None, group=None,
+        # consolidated=False.
+        _backend_kwargs = backend_kwargs.copy()
+        overwrite_encoded_chunks = _backend_kwargs.pop("overwrite_encoded_chunks", None)
+        store = backends.ZarrStore.open_group(
+            filename_or_obj, group=group, **_backend_kwargs
+        )
 
     elif isinstance(filename_or_obj, str):
         filename_or_obj = _normalize_path(filename_or_obj)
@@ -508,14 +509,15 @@ def maybe_decode_store(store, lock=False):
             store = backends.CfGribDataStore(
                 filename_or_obj, lock=lock, **backend_kwargs
             )
-        elif engine == 'zarr':
+        elif engine == "zarr":
             # on ZarrStore, mode='r', synchronizer=None, group=None,
             # consolidated=False.
-            overwrite_encoded_chunks = backend_kwargs.pop("overwrite_encoded_chunks", None)
+            _backend_kwargs = backend_kwargs.copy()
+            overwrite_encoded_chunks = _backend_kwargs.pop(
+                "overwrite_encoded_chunks", None
+            )
             store = backends.ZarrStore.open_group(
-                filename_or_obj,
-                group=group,
-                **backend_kwargs
+                filename_or_obj, group=group, **_backend_kwargs
             )
     else:
         if engine not in [None, "scipy", "h5netcdf"]:
@@ -541,7 +543,8 @@ def maybe_decode_store(store, lock=False):
 
     if chunks is not None:
         from dask.base import tokenize
-        if engine != 'zarr':
+
+        if engine != "zarr":
 
             # if passed an actual file path, augment the token with
             # the file modification time
@@ -579,7 +582,10 @@ def maybe_decode_store(store, lock=False):
             if isinstance(chunks, int):
                 chunks = dict.fromkeys(ds.dims, chunks)
 
-            variables = {k: backends.ZarrStore.open_group.maybe_chunk(k, v, chunks) for k, v in ds.variables.items()}
+            variables = {
+                k: store.maybe_chunk(k, v, chunks, overwrite_encoded_chunks)
+                for k, v in ds.variables.items()
+            }
             ds2 = ds._replace_vars_and_dims(variables)
         return ds2
     else:
@@ -603,6 +609,7 @@ def open_dataarray(
     drop_variables=None,
     backend_kwargs=None,
     use_cftime=None,
+    decode_timedelta=None,
 ):
     """Open an DataArray from a file or file-like object containing a single
     data variable.
@@ -682,6 +689,11 @@ def open_dataarray(
         represented using ``np.datetime64[ns]`` objects.  If False, always
         decode times to ``np.datetime64[ns]`` objects; if this is not possible
         raise an error.
+    decode_timedelta : bool, optional
+        If True, decode variables and coordinates with time units in
+        {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'}
+        into timedelta objects. If False, leave them encoded as numbers.
+        If None (default), assume the same value of decode_time.
 
     Notes
     -----
@@ -713,6 +725,7 @@ def open_dataarray(
         drop_variables=drop_variables,
         backend_kwargs=backend_kwargs,
         use_cftime=use_cftime,
+        decode_timedelta=decode_timedelta,
     )
 
     if len(dataset.data_vars) != 1:
@@ -752,14 +765,14 @@ def close(self):
 def open_mfdataset(
     paths,
     chunks=None,
-    concat_dim="_not_supplied",
+    concat_dim=None,
     compat="no_conflicts",
     preprocess=None,
     engine=None,
     lock=None,
     data_vars="all",
     coords="different",
-    combine="_old_auto",
+    combine="by_coords",
     autoclose=None,
     parallel=False,
     join="outer",
@@ -772,9 +785,8 @@ def open_mfdataset(
     the datasets into one before returning the result, and if combine='nested' then
     ``combine_nested`` is used. The filepaths must be structured according to which
     combining function is used, the details of which are given in the documentation for
-    ``combine_by_coords`` and ``combine_nested``. By default the old (now deprecated)
-    ``auto_combine`` will be used, please specify either ``combine='by_coords'`` or
-    ``combine='nested'`` in future. Requires dask to be installed. See documentation for
+    ``combine_by_coords`` and ``combine_nested``. By default ``combine='by_coords'``
+    will be used. Requires dask to be installed. See documentation for
     details on dask [1]_. Global attributes from the ``attrs_file`` are used
     for the combined dataset.
 
@@ -784,7 +796,7 @@ def open_mfdataset(
         Either a string glob in the form ``"path/to/my/files/*.nc"`` or an explicit list of
         files to open. Paths can be given as strings or as pathlib Paths. If
         concatenation along more than one dimension is desired, then ``paths`` must be a
-        nested list-of-lists (see ``manual_combine`` for details). (A string glob will
+        nested list-of-lists (see ``combine_nested`` for details). (A string glob will
         be expanded to a 1-dimensional list.)
     chunks : int or dict, optional
         Dictionary with keys given by dimension names and values given by chunk sizes.
@@ -794,15 +806,16 @@ def open_mfdataset(
         see the full documentation for more details [2]_.
     concat_dim : str, or list of str, DataArray, Index or None, optional
         Dimensions to concatenate files along.  You only need to provide this argument
-        if any of the dimensions along which you want to concatenate is not a dimension
-        in the original datasets, e.g., if you want to stack a collection of 2D arrays
-        along a third dimension. Set ``concat_dim=[..., None, ...]`` explicitly to
-        disable concatenation along a particular dimension.
+        if ``combine='by_coords'``, and if any of the dimensions along which you want to
+        concatenate is not a dimension in the original datasets, e.g., if you want to
+        stack a collection of 2D arrays along a third dimension. Set
+        ``concat_dim=[..., None, ...]`` explicitly to disable concatenation along a
+        particular dimension. Default is None, which for a 1D list of filepaths is
+        equivalent to opening the files separately and then merging them with
+        ``xarray.merge``.
     combine : {'by_coords', 'nested'}, optional
         Whether ``xarray.combine_by_coords`` or ``xarray.combine_nested`` is used to
-        combine all the data. If this argument is not provided, `xarray.auto_combine` is
-        used, but in the future this behavior will switch to use
-        `xarray.combine_by_coords` by default.
+        combine all the data. Default is to use ``xarray.combine_by_coords``.
     compat : {'identical', 'equals', 'broadcast_equals',
               'no_conflicts', 'override'}, optional
         String indicating how to compare variables of the same name for
@@ -895,7 +908,6 @@ def open_mfdataset(
     --------
     combine_by_coords
     combine_nested
-    auto_combine
     open_dataset
 
     References
@@ -923,11 +935,8 @@ def open_mfdataset(
     # If combine='nested' then this creates a flat list which is easier to
     # iterate over, while saving the originally-supplied structure as "ids"
     if combine == "nested":
-        if str(concat_dim) == "_not_supplied":
-            raise ValueError("Must supply concat_dim when using " "combine='nested'")
-        else:
-            if isinstance(concat_dim, (str, DataArray)) or concat_dim is None:
-                concat_dim = [concat_dim]
+        if isinstance(concat_dim, (str, DataArray)) or concat_dim is None:
+            concat_dim = [concat_dim]
     combined_ids_paths = _infer_concat_order_from_positions(paths)
     ids, paths = (list(combined_ids_paths.keys()), list(combined_ids_paths.values()))
 
@@ -959,30 +968,7 @@ def open_mfdataset(
 
     # Combine all datasets, closing them in case of a ValueError
     try:
-        if combine == "_old_auto":
-            # Use the old auto_combine for now
-            # Remove this after deprecation cycle from #2616 is complete
-            basic_msg = dedent(
-                """\
-            In xarray version 0.15 the default behaviour of `open_mfdataset`
-            will change. To retain the existing behavior, pass
-            combine='nested'. To use future default behavior, pass
-            combine='by_coords'. See
-            http://xarray.pydata.org/en/stable/combining.html#combining-multi
-            """
-            )
-            warnings.warn(basic_msg, FutureWarning, stacklevel=2)
-
-            combined = auto_combine(
-                datasets,
-                concat_dim=concat_dim,
-                compat=compat,
-                data_vars=data_vars,
-                coords=coords,
-                join=join,
-                from_openmfds=True,
-            )
-        elif combine == "nested":
+        if combine == "nested":
             # Combined nested list by successive concat and merge operations
             # along each dimension, using structure given by "ids"
             combined = _nested_combine(
@@ -993,12 +979,18 @@ def open_mfdataset(
                 coords=coords,
                 ids=ids,
                 join=join,
+                combine_attrs="drop",
             )
         elif combine == "by_coords":
             # Redo ordering from coordinates, ignoring how they were ordered
             # previously
             combined = combine_by_coords(
-                datasets, compat=compat, data_vars=data_vars, coords=coords, join=join
+                datasets,
+                compat=compat,
+                data_vars=data_vars,
+                coords=coords,
+                join=join,
+                combine_attrs="drop",
             )
         else:
             raise ValueError(
@@ -1321,18 +1313,35 @@ def _validate_append_dim_and_encoding(
         return
     if append_dim:
         if append_dim not in ds.dims:
-            raise ValueError(f"{append_dim} not a valid dimension in the Dataset")
-    for data_var in ds_to_append:
-        if data_var in ds:
-            if append_dim is None:
+            raise ValueError(
+                f"append_dim={append_dim!r} does not match any existing "
+                f"dataset dimensions {ds.dims}"
+            )
+    for var_name in ds_to_append:
+        if var_name in ds:
+            if ds_to_append[var_name].dims != ds[var_name].dims:
+                raise ValueError(
+                    f"variable {var_name!r} already exists with different "
+                    f"dimension names {ds[var_name].dims} != "
+                    f"{ds_to_append[var_name].dims}, but changing variable "
+                    "dimensions is not supported by to_zarr()."
+                )
+            existing_sizes = {
+                k: v for k, v in ds[var_name].sizes.items() if k != append_dim
+            }
+            new_sizes = {
+                k: v for k, v in ds_to_append[var_name].sizes.items() if k != append_dim
+            }
+            if existing_sizes != new_sizes:
                 raise ValueError(
-                    "variable '{}' already exists, but append_dim "
-                    "was not set".format(data_var)
+                    f"variable {var_name!r} already exists with different "
+                    "dimension sizes: {existing_sizes} != {new_sizes}. "
+                    "to_zarr() only supports changing dimension sizes when "
+                    f"explicitly appending, but append_dim={append_dim!r}."
                 )
-            if data_var in encoding.keys():
+            if var_name in encoding.keys():
                 raise ValueError(
-                    "variable '{}' already exists, but encoding was"
-                    "provided".format(data_var)
+                    f"variable {var_name!r} already exists, but encoding was provided"
                 )
 
 
diff --git a/xarray/backends/common.py b/xarray/backends/common.py
index fa3ee19f542..63c4c956f86 100644
--- a/xarray/backends/common.py
+++ b/xarray/backends/common.py
@@ -241,7 +241,7 @@ def encode_attribute(self, a):
         """encode one attribute"""
         return a
 
-    def set_dimension(self, d, l):  # pragma: no cover
+    def set_dimension(self, dim, length):  # pragma: no cover
         raise NotImplementedError()
 
     def set_attribute(self, k, v):  # pragma: no cover
diff --git a/xarray/backends/memory.py b/xarray/backends/memory.py
index bee6521bce2..17095d09651 100644
--- a/xarray/backends/memory.py
+++ b/xarray/backends/memory.py
@@ -40,6 +40,6 @@ def set_attribute(self, k, v):
         # copy to imitate writing to disk.
         self._attributes[k] = copy.deepcopy(v)
 
-    def set_dimension(self, d, l, unlimited_dims=None):
+    def set_dimension(self, dim, length, unlimited_dims=None):
         # in this model, dimensions are accounted for in the variables
         pass
diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py
index c9c4baf9b01..51d7fce22a0 100644
--- a/xarray/backends/netcdf3.py
+++ b/xarray/backends/netcdf3.py
@@ -28,7 +28,14 @@
 
 # These data-types aren't supported by netCDF3, so they are automatically
 # coerced instead as indicated by the "coerce_nc3_dtype" function
-_nc3_dtype_coercions = {"int64": "int32", "bool": "int8"}
+_nc3_dtype_coercions = {
+    "int64": "int32",
+    "uint64": "int32",
+    "uint32": "int32",
+    "uint16": "int16",
+    "uint8": "int8",
+    "bool": "int8",
+}
 
 # encode all strings as UTF-8
 STRING_ENCODING = "utf-8"
@@ -37,12 +44,17 @@
 def coerce_nc3_dtype(arr):
     """Coerce an array to a data type that can be stored in a netCDF-3 file
 
-    This function performs the following dtype conversions:
-        int64 -> int32
-        bool -> int8
-
-    Data is checked for equality, or equivalence (non-NaN values) with
-    `np.allclose` with the default keyword arguments.
+    This function performs the dtype conversions as specified by the
+    ``_nc3_dtype_coercions`` mapping:
+        int64  -> int32
+        uint64 -> int32
+        uint32 -> int32
+        uint16 -> int16
+        uint8  -> int8
+        bool   -> int8
+
+    Data is checked for equality, or equivalence (non-NaN values) using the
+    ``(cast_array == original_array).all()``.
     """
     dtype = str(arr.dtype)
     if dtype in _nc3_dtype_coercions:
diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py
index 77beffd09b1..661d5b5c6fc 100644
--- a/xarray/backends/rasterio_.py
+++ b/xarray/backends/rasterio_.py
@@ -221,14 +221,17 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc
         vrt = filename
         filename = vrt.src_dataset.name
         vrt_params = dict(
+            src_crs=vrt.src_crs.to_string(),
             crs=vrt.crs.to_string(),
             resampling=vrt.resampling,
+            tolerance=vrt.tolerance,
             src_nodata=vrt.src_nodata,
             nodata=vrt.nodata,
-            tolerance=vrt.tolerance,
-            transform=vrt.transform,
             width=vrt.width,
             height=vrt.height,
+            src_transform=vrt.src_transform,
+            transform=vrt.transform,
+            dtype=vrt.working_dtype,
             warp_extras=vrt.warp_extras,
         )
 
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index c6e4d1b362a..92c5893fb3f 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -358,8 +358,7 @@ def encode_variable(self, variable):
     def encode_attribute(self, a):
         return encode_zarr_attr_value(a)
 
-
-    def get_chunk(name, var, chunks):
+    def get_chunk(self, name, var, chunks):
         chunk_spec = dict(zip(var.dims, var.encoding.get("chunks")))
 
         # Coordinate labels aren't chunked
@@ -388,15 +387,17 @@ def get_chunk(name, var, chunks):
                 chunk_spec[dim] = chunks[dim]
         return chunk_spec
 
-    def maybe_chunk(name, var, chunks):
-        chunk_spec = get_chunk(name, var, chunks)
+    def maybe_chunk(self, name, var, chunks, overwrite_encoded_chunks):
+        chunk_spec = self.get_chunk(name, var, chunks)
 
         if (var.ndim > 0) and (chunk_spec is not None):
+            from dask.base import tokenize
+
             # does this cause any data to be read?
             token2 = tokenize(name, var._data)
             name2 = "zarr-%s" % token2
             var = var.chunk(chunk_spec, name=name2, lock=None)
-            if overwrite_encoded_chunks and var.chunks is not None:
+            if open_kwargs["overwrite_encoded_chunks"] and var.chunks is not None:
                 var.encoding["chunks"] = tuple(x[0] for x in var.chunk)
         return var
 
@@ -489,18 +490,23 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No
             fill_value = attrs.pop("_FillValue", None)
             if v.encoding == {"_FillValue": None} and fill_value is None:
                 v.encoding = {}
-            if name in self.ds:
+
+            if self.append_dim is not None and self.append_dim in dims:
+                # resize existing variable
                 zarr_array = self.ds[name]
-                if self.append_dim in dims:
-                    # this is the DataArray that has append_dim as a
-                    # dimension
-                    append_axis = dims.index(self.append_dim)
-                    new_shape = list(zarr_array.shape)
-                    new_shape[append_axis] += v.shape[append_axis]
-                    new_region = [slice(None)] * len(new_shape)
-                    new_region[append_axis] = slice(zarr_array.shape[append_axis], None)
-                    zarr_array.resize(new_shape)
-                    writer.add(v.data, zarr_array, region=tuple(new_region))
+                append_axis = dims.index(self.append_dim)
+
+                new_region = [slice(None)] * len(dims)
+                new_region[append_axis] = slice(zarr_array.shape[append_axis], None)
+                region = tuple(new_region)
+
+                new_shape = list(zarr_array.shape)
+                new_shape[append_axis] += v.shape[append_axis]
+                zarr_array.resize(new_shape)
+            elif name in self.ds:
+                # override existing variable
+                zarr_array = self.ds[name]
+                region = None
             else:
                 # new variable
                 encoding = extract_zarr_variable_encoding(
@@ -518,7 +524,9 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No
                     name, shape=shape, dtype=dtype, fill_value=fill_value, **encoding
                 )
                 zarr_array.attrs.put(encoded_attrs)
-                writer.add(v.data, zarr_array)
+                region = None
+
+            writer.add(v.data, zarr_array, region=region)
 
     def close(self):
         if self._consolidate_on_close:
@@ -540,6 +548,7 @@ def open_zarr(
     drop_variables=None,
     consolidated=False,
     overwrite_encoded_chunks=False,
+    decode_timedelta=None,
     **kwargs,
 ):
     """Load and decode a dataset from a Zarr store.
@@ -599,6 +608,11 @@ def open_zarr(
     consolidated : bool, optional
         Whether to open the store using zarr's consolidated metadata
         capability. Only works for stores that have already been consolidated.
+    decode_timedelta : bool, optional
+        If True, decode variables and coordinates with time units in
+        {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'}
+        into timedelta objects. If False, leave them encoded as numbers.
+        If None (default), assume the same value of decode_time.
 
     Returns
     -------
diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py
index 6fc28d213dd..2a7eaa99edb 100644
--- a/xarray/coding/cftimeindex.py
+++ b/xarray/coding/cftimeindex.py
@@ -578,7 +578,8 @@ def asi8(self):
             [
                 _total_microseconds(exact_cftime_datetime_difference(epoch, date))
                 for date in self.values
-            ]
+            ],
+            dtype=np.int64,
         )
 
     def _round_via_method(self, freq, method):
diff --git a/xarray/coding/frequencies.py b/xarray/coding/frequencies.py
new file mode 100644
index 00000000000..86f84ba5fbd
--- /dev/null
+++ b/xarray/coding/frequencies.py
@@ -0,0 +1,272 @@
+"""FrequencyInferer analog for cftime.datetime objects"""
+# The infer_freq method and the _CFTimeFrequencyInferer
+# subclass defined here were copied and adapted for
+# use with cftime.datetime objects based on the source code in
+# pandas.tseries.Frequencies._FrequencyInferer
+
+# For reference, here is a copy of the pandas copyright notice:
+
+# (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team
+# All rights reserved.
+
+# Copyright (c) 2008-2011 AQR Capital Management, LLC
+# All rights reserved.
+
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+
+#     * Redistributions of source code must retain the above copyright
+#        notice, this list of conditions and the following disclaimer.
+
+#     * Redistributions in binary form must reproduce the above
+#        copyright notice, this list of conditions and the following
+#        disclaimer in the documentation and/or other materials provided
+#        with the distribution.
+
+#     * Neither the name of the copyright holder nor the names of any
+#        contributors may be used to endorse or promote products derived
+#        from this software without specific prior written permission.
+
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import pandas as pd
+
+from ..core.common import _contains_datetime_like_objects
+from .cftime_offsets import _MONTH_ABBREVIATIONS
+from .cftimeindex import CFTimeIndex
+
+_ONE_MICRO = 1
+_ONE_MILLI = _ONE_MICRO * 1000
+_ONE_SECOND = _ONE_MILLI * 1000
+_ONE_MINUTE = 60 * _ONE_SECOND
+_ONE_HOUR = 60 * _ONE_MINUTE
+_ONE_DAY = 24 * _ONE_HOUR
+
+
+def infer_freq(index):
+    """
+    Infer the most likely frequency given the input index.
+
+    Parameters
+    ----------
+    index : CFTimeIndex, DataArray, pd.DatetimeIndex, pd.TimedeltaIndex, pd.Series
+      If not passed a CFTimeIndex, this simply calls `pandas.infer_freq`.
+      If passed a Series or a DataArray will use the values of the series (NOT THE INDEX).
+
+    Returns
+    -------
+    str or None
+        None if no discernible frequency.
+
+    Raises
+    ------
+    TypeError
+        If the index is not datetime-like.
+    ValueError
+        If there are fewer than three values or the index is not 1D.
+    """
+    from xarray.core.dataarray import DataArray
+
+    if isinstance(index, (DataArray, pd.Series)):
+        if index.ndim != 1:
+            raise ValueError("'index' must be 1D")
+        elif not _contains_datetime_like_objects(DataArray(index)):
+            raise ValueError("'index' must contain datetime-like objects")
+        dtype = np.asarray(index).dtype
+        if dtype == "datetime64[ns]":
+            index = pd.DatetimeIndex(index.values)
+        elif dtype == "timedelta64[ns]":
+            index = pd.TimedeltaIndex(index.values)
+        else:
+            index = CFTimeIndex(index.values)
+
+    if isinstance(index, CFTimeIndex):
+        inferer = _CFTimeFrequencyInferer(index)
+        return inferer.get_freq()
+
+    return pd.infer_freq(index)
+
+
+class _CFTimeFrequencyInferer:  # (pd.tseries.frequencies._FrequencyInferer):
+    def __init__(self, index):
+        self.index = index
+        self.values = index.asi8
+
+        if len(index) < 3:
+            raise ValueError("Need at least 3 dates to infer frequency")
+
+        self.is_monotonic = (
+            self.index.is_monotonic_decreasing or self.index.is_monotonic_increasing
+        )
+
+        self._deltas = None
+        self._year_deltas = None
+        self._month_deltas = None
+
+    def get_freq(self):
+        """Find the appropriate frequency string to describe the inferred frequency of self.index
+
+        Adapted from `pandas.tsseries.frequencies._FrequencyInferer.get_freq` for CFTimeIndexes.
+
+        Returns
+        -------
+        str or None
+        """
+        if not self.is_monotonic or not self.index.is_unique:
+            return None
+
+        delta = self.deltas[0]  # Smallest delta
+        if _is_multiple(delta, _ONE_DAY):
+            return self._infer_daily_rule()
+        # There is no possible intraday frequency with a non-unique delta
+        # Different from pandas: we don't need to manage DST and business offsets in cftime
+        elif not len(self.deltas) == 1:
+            return None
+
+        if _is_multiple(delta, _ONE_HOUR):
+            return _maybe_add_count("H", delta / _ONE_HOUR)
+        elif _is_multiple(delta, _ONE_MINUTE):
+            return _maybe_add_count("T", delta / _ONE_MINUTE)
+        elif _is_multiple(delta, _ONE_SECOND):
+            return _maybe_add_count("S", delta / _ONE_SECOND)
+        elif _is_multiple(delta, _ONE_MILLI):
+            return _maybe_add_count("L", delta / _ONE_MILLI)
+        else:
+            return _maybe_add_count("U", delta / _ONE_MICRO)
+
+    def _infer_daily_rule(self):
+        annual_rule = self._get_annual_rule()
+        if annual_rule:
+            nyears = self.year_deltas[0]
+            month = _MONTH_ABBREVIATIONS[self.index[0].month]
+            alias = f"{annual_rule}-{month}"
+            return _maybe_add_count(alias, nyears)
+
+        quartely_rule = self._get_quartely_rule()
+        if quartely_rule:
+            nquarters = self.month_deltas[0] / 3
+            mod_dict = {0: 12, 2: 11, 1: 10}
+            month = _MONTH_ABBREVIATIONS[mod_dict[self.index[0].month % 3]]
+            alias = f"{quartely_rule}-{month}"
+            return _maybe_add_count(alias, nquarters)
+
+        monthly_rule = self._get_monthly_rule()
+        if monthly_rule:
+            return _maybe_add_count(monthly_rule, self.month_deltas[0])
+
+        if len(self.deltas) == 1:
+            # Daily as there is no "Weekly" offsets with CFTime
+            days = self.deltas[0] / _ONE_DAY
+            return _maybe_add_count("D", days)
+
+        # CFTime has no business freq and no "week of month" (WOM)
+        return None
+
+    def _get_annual_rule(self):
+        if len(self.year_deltas) > 1:
+            return None
+
+        if len(np.unique(self.index.month)) > 1:
+            return None
+
+        return {"cs": "AS", "ce": "A"}.get(month_anchor_check(self.index))
+
+    def _get_quartely_rule(self):
+        if len(self.month_deltas) > 1:
+            return None
+
+        if not self.month_deltas[0] % 3 == 0:
+            return None
+
+        return {"cs": "QS", "ce": "Q"}.get(month_anchor_check(self.index))
+
+    def _get_monthly_rule(self):
+        if len(self.month_deltas) > 1:
+            return None
+
+        return {"cs": "MS", "ce": "M"}.get(month_anchor_check(self.index))
+
+    @property
+    def deltas(self):
+        """Sorted unique timedeltas as microseconds."""
+        if self._deltas is None:
+            self._deltas = _unique_deltas(self.values)
+        return self._deltas
+
+    @property
+    def year_deltas(self):
+        """Sorted unique year deltas."""
+        if self._year_deltas is None:
+            self._year_deltas = _unique_deltas(self.index.year)
+        return self._year_deltas
+
+    @property
+    def month_deltas(self):
+        """Sorted unique month deltas."""
+        if self._month_deltas is None:
+            self._month_deltas = _unique_deltas(self.index.year * 12 + self.index.month)
+        return self._month_deltas
+
+
+def _unique_deltas(arr):
+    """Sorted unique deltas of numpy array"""
+    return np.sort(np.unique(np.diff(arr)))
+
+
+def _is_multiple(us, mult: int):
+    """Whether us is a multiple of mult"""
+    return us % mult == 0
+
+
+def _maybe_add_count(base: str, count: float):
+    """If count is greater than 1, add it to the base offset string"""
+    if count != 1:
+        assert count == int(count)
+        count = int(count)
+        return f"{count}{base}"
+    else:
+        return base
+
+
+def month_anchor_check(dates):
+    """Return the monthly offset string.
+
+    Return "cs" if all dates are the first days of the month,
+    "ce" if all dates are the last day of the month,
+    None otherwise.
+
+    Replicated pandas._libs.tslibs.resolution.month_position_check
+    but without business offset handling.
+    """
+    calendar_end = True
+    calendar_start = True
+
+    for date in dates:
+        if calendar_start:
+            calendar_start &= date.day == 1
+
+        if calendar_end:
+            cal = date.day == date.daysinmonth
+            if calendar_end:
+                calendar_end &= cal
+        elif not calendar_start:
+            break
+
+    if calendar_end:
+        return "ce"
+    elif calendar_start:
+        return "cs"
+    else:
+        return None
diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index 965ddd8f043..77b2d2c7937 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -80,8 +80,9 @@ def _decode_cf_datetime_dtype(data, units, calendar, use_cftime):
             "the default calendar" if calendar is None else "calendar %r" % calendar
         )
         msg = (
-            "unable to decode time units %r with %s. Try "
-            "opening your dataset with decode_times=False." % (units, calendar_msg)
+            f"unable to decode time units {units!r} with {calendar_msg!r}. Try "
+            "opening your dataset with decode_times=False or installing cftime "
+            "if it is not installed."
         )
         raise ValueError(msg)
     else:
@@ -155,9 +156,9 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None):
     if use_cftime is None:
         try:
             dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar)
-        except (OutOfBoundsDatetime, OverflowError):
+        except (KeyError, OutOfBoundsDatetime, OverflowError):
             dates = _decode_datetime_with_cftime(
-                flat_num_dates.astype(np.float), units, calendar
+                flat_num_dates.astype(float), units, calendar
             )
 
             if (
@@ -178,7 +179,7 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None):
                     dates = cftime_to_nptime(dates)
     elif use_cftime:
         dates = _decode_datetime_with_cftime(
-            flat_num_dates.astype(np.float), units, calendar
+            flat_num_dates.astype(float), units, calendar
         )
     else:
         dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar)
diff --git a/xarray/conventions.py b/xarray/conventions.py
index df24d0d3d8d..fc0572944f3 100644
--- a/xarray/conventions.py
+++ b/xarray/conventions.py
@@ -116,7 +116,7 @@ def maybe_default_fill_value(var):
 
 def maybe_encode_bools(var):
     if (
-        (var.dtype == np.bool)
+        (var.dtype == bool)
         and ("dtype" not in var.encoding)
         and ("dtype" not in var.attrs)
     ):
@@ -266,6 +266,7 @@ def decode_cf_variable(
     decode_endianness=True,
     stack_char_dim=True,
     use_cftime=None,
+    decode_timedelta=None,
 ):
     """
     Decodes a variable which may hold CF encoded information.
@@ -315,6 +316,9 @@ def decode_cf_variable(
     var = as_variable(var)
     original_dtype = var.dtype
 
+    if decode_timedelta is None:
+        decode_timedelta = decode_times
+
     if concat_characters:
         if stack_char_dim:
             var = strings.CharacterArrayCoder().decode(var, name=name)
@@ -328,12 +332,10 @@ def decode_cf_variable(
         ]:
             var = coder.decode(var, name=name)
 
+    if decode_timedelta:
+        var = times.CFTimedeltaCoder().decode(var, name=name)
     if decode_times:
-        for coder in [
-            times.CFTimedeltaCoder(),
-            times.CFDatetimeCoder(use_cftime=use_cftime),
-        ]:
-            var = coder.decode(var, name=name)
+        var = times.CFDatetimeCoder(use_cftime=use_cftime).decode(var, name=name)
 
     dimensions, data, attributes, encoding = variables.unpack_for_decoding(var)
     # TODO(shoyer): convert everything below to use coders
@@ -442,6 +444,7 @@ def decode_cf_variables(
     decode_coords=True,
     drop_variables=None,
     use_cftime=None,
+    decode_timedelta=None,
 ):
     """
     Decode several CF encoded variables.
@@ -492,6 +495,7 @@ def stackable(dim):
             decode_times=decode_times,
             stack_char_dim=stack_char_dim,
             use_cftime=use_cftime,
+            decode_timedelta=decode_timedelta,
         )
         if decode_coords:
             var_attrs = new_vars[k].attrs
@@ -518,6 +522,7 @@ def decode_cf(
     decode_coords=True,
     drop_variables=None,
     use_cftime=None,
+    decode_timedelta=None,
 ):
     """Decode the given Dataset or Datastore according to CF conventions into
     a new Dataset.
@@ -552,6 +557,11 @@ def decode_cf(
         represented using ``np.datetime64[ns]`` objects.  If False, always
         decode times to ``np.datetime64[ns]`` objects; if this is not possible
         raise an error.
+    decode_timedelta : bool, optional
+        If True, decode variables and coordinates with time units in
+        {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'}
+        into timedelta objects. If False, leave them encoded as numbers.
+        If None (default), assume the same value of decode_time.
 
     Returns
     -------
@@ -583,6 +593,7 @@ def decode_cf(
         decode_coords,
         drop_variables=drop_variables,
         use_cftime=use_cftime,
+        decode_timedelta=decode_timedelta,
     )
     ds = Dataset(vars, attrs=attrs)
     ds = ds.set_coords(coord_names.union(extra_coords).intersection(vars))
diff --git a/xarray/convert.py b/xarray/convert.py
index 4974a55d8e2..0c86b090f34 100644
--- a/xarray/convert.py
+++ b/xarray/convert.py
@@ -229,11 +229,11 @@ def _iris_cell_methods_to_str(cell_methods_obj):
     """
     cell_methods = []
     for cell_method in cell_methods_obj:
-        names = "".join([f"{n}: " for n in cell_method.coord_names])
+        names = "".join(f"{n}: " for n in cell_method.coord_names)
         intervals = " ".join(
-            [f"interval: {interval}" for interval in cell_method.intervals]
+            f"interval: {interval}" for interval in cell_method.intervals
         )
-        comments = " ".join([f"comment: {comment}" for comment in cell_method.comments])
+        comments = " ".join(f"comment: {comment}" for comment in cell_method.comments)
         extra = " ".join([intervals, comments]).strip()
         if extra:
             extra = f" ({extra})"
diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py
index 2977596036c..630aaee142f 100644
--- a/xarray/core/accessor_dt.py
+++ b/xarray/core/accessor_dt.py
@@ -240,12 +240,6 @@ class DatetimeAccessor(Properties):
     Fields can be accessed through the `.dt` attribute
     for applicable DataArrays.
 
-    Notes
-    ------
-    Note that these fields are not calendar-aware; if your datetimes are encoded
-    with a non-Gregorian calendar (e.g. a 360-day calendar) using cftime,
-    then some fields like `dayofyear` may not be accurate.
-
     Examples
     ---------
     >>> import xarray as xr
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index 1f990457798..58bd7178fa2 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -1,7 +1,5 @@
 import itertools
-import warnings
 from collections import Counter
-from textwrap import dedent
 
 import pandas as pd
 
@@ -762,272 +760,3 @@ def combine_by_coords(
         join=join,
         combine_attrs=combine_attrs,
     )
-
-
-# Everything beyond here is only needed until the deprecation cycle in #2616
-# is completed
-
-
-_CONCAT_DIM_DEFAULT = "__infer_concat_dim__"
-
-
-def auto_combine(
-    datasets,
-    concat_dim="_not_supplied",
-    compat="no_conflicts",
-    data_vars="all",
-    coords="different",
-    fill_value=dtypes.NA,
-    join="outer",
-    from_openmfds=False,
-):
-    """
-    Attempt to auto-magically combine the given datasets into one.
-
-    This entire function is deprecated in favour of ``combine_nested`` and
-    ``combine_by_coords``.
-
-    This method attempts to combine a list of datasets into a single entity by
-    inspecting metadata and using a combination of concat and merge.
-    It does not concatenate along more than one dimension or sort data under
-    any circumstances. It does align coordinates, but different variables on
-    datasets can cause it to fail under some scenarios. In complex cases, you
-    may need to clean up your data and use ``concat``/``merge`` explicitly.
-    ``auto_combine`` works well if you have N years of data and M data
-    variables, and each combination of a distinct time period and set of data
-    variables is saved its own dataset.
-
-    Parameters
-    ----------
-    datasets : sequence of xarray.Dataset
-        Dataset objects to merge.
-    concat_dim : str or DataArray or Index, optional
-        Dimension along which to concatenate variables, as used by
-        :py:func:`xarray.concat`. You only need to provide this argument if
-        the dimension along which you want to concatenate is not a dimension
-        in the original datasets, e.g., if you want to stack a collection of
-        2D arrays along a third dimension.
-        By default, xarray attempts to infer this argument by examining
-        component files. Set ``concat_dim=None`` explicitly to disable
-        concatenation.
-    compat : {'identical', 'equals', 'broadcast_equals',
-             'no_conflicts', 'override'}, optional
-        String indicating how to compare variables of the same name for
-        potential conflicts:
-
-        - 'broadcast_equals': all values must be equal when variables are
-          broadcast against each other to ensure common dimensions.
-        - 'equals': all values and dimensions must be the same.
-        - 'identical': all values, dimensions and attributes must be the
-          same.
-        - 'no_conflicts': only values which are not null in both datasets
-          must be equal. The returned dataset then contains the combination
-          of all non-null values.
-        - 'override': skip comparing and pick variable from first dataset
-    data_vars : {'minimal', 'different', 'all' or list of str}, optional
-        Details are in the documentation of concat
-    coords : {'minimal', 'different', 'all' o list of str}, optional
-        Details are in the documentation of concat
-    fill_value : scalar, optional
-        Value to use for newly missing values
-    join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
-        String indicating how to combine differing indexes
-        (excluding concat_dim) in objects
-
-        - 'outer': use the union of object indexes
-        - 'inner': use the intersection of object indexes
-        - 'left': use indexes from the first object with each dimension
-        - 'right': use indexes from the last object with each dimension
-        - 'exact': instead of aligning, raise `ValueError` when indexes to be
-          aligned are not equal
-        - 'override': if indexes are of same size, rewrite indexes to be
-          those of the first object with that dimension. Indexes for the same
-          dimension must have the same size in all objects.
-
-    Returns
-    -------
-    combined : xarray.Dataset
-
-    See also
-    --------
-    concat
-    Dataset.merge
-    """
-
-    if not from_openmfds:
-        basic_msg = dedent(
-            """\
-        In xarray version 0.15 `auto_combine` will be deprecated. See
-        http://xarray.pydata.org/en/stable/combining.html#combining-multi"""
-        )
-        warnings.warn(basic_msg, FutureWarning, stacklevel=2)
-
-    if concat_dim == "_not_supplied":
-        concat_dim = _CONCAT_DIM_DEFAULT
-        message = ""
-    else:
-        message = dedent(
-            """\
-        Also `open_mfdataset` will no longer accept a `concat_dim` argument.
-        To get equivalent behaviour from now on please use the new
-        `combine_nested` function instead (or the `combine='nested'` option to
-        `open_mfdataset`)."""
-        )
-
-    if _dimension_coords_exist(datasets):
-        message += dedent(
-            """\
-        The datasets supplied have global dimension coordinates. You may want
-        to use the new `combine_by_coords` function (or the
-        `combine='by_coords'` option to `open_mfdataset`) to order the datasets
-        before concatenation. Alternatively, to continue concatenating based
-        on the order the datasets are supplied in future, please use the new
-        `combine_nested` function (or the `combine='nested'` option to
-        open_mfdataset)."""
-        )
-    else:
-        message += dedent(
-            """\
-        The datasets supplied do not have global dimension coordinates. In
-        future, to continue concatenating without supplying dimension
-        coordinates, please use the new `combine_nested` function (or the
-        `combine='nested'` option to open_mfdataset."""
-        )
-
-    if _requires_concat_and_merge(datasets):
-        manual_dims = [concat_dim].append(None)
-        message += dedent(
-            """\
-        The datasets supplied require both concatenation and merging. From
-        xarray version 0.15 this will operation will require either using the
-        new `combine_nested` function (or the `combine='nested'` option to
-        open_mfdataset), with a nested list structure such that you can combine
-        along the dimensions {}. Alternatively if your datasets have global
-        dimension coordinates then you can use the new `combine_by_coords`
-        function.""".format(
-                manual_dims
-            )
-        )
-
-    warnings.warn(message, FutureWarning, stacklevel=2)
-
-    return _old_auto_combine(
-        datasets,
-        concat_dim=concat_dim,
-        compat=compat,
-        data_vars=data_vars,
-        coords=coords,
-        fill_value=fill_value,
-        join=join,
-    )
-
-
-def _dimension_coords_exist(datasets):
-    """
-    Check if the datasets have consistent global dimension coordinates
-    which would in future be used by `auto_combine` for concatenation ordering.
-    """
-
-    # Group by data vars
-    sorted_datasets = sorted(datasets, key=vars_as_keys)
-    grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys)
-
-    # Simulates performing the multidimensional combine on each group of data
-    # variables before merging back together
-    try:
-        for vars, datasets_with_same_vars in grouped_by_vars:
-            _infer_concat_order_from_coords(list(datasets_with_same_vars))
-        return True
-    except ValueError:
-        # ValueError means datasets don't have global dimension coordinates
-        # Or something else went wrong in trying to determine them
-        return False
-
-
-def _requires_concat_and_merge(datasets):
-    """
-    Check if the datasets require the use of both xarray.concat and
-    xarray.merge, which in future might require the user to use
-    `manual_combine` instead.
-    """
-    # Group by data vars
-    sorted_datasets = sorted(datasets, key=vars_as_keys)
-    grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys)
-
-    return len(list(grouped_by_vars)) > 1
-
-
-def _old_auto_combine(
-    datasets,
-    concat_dim=_CONCAT_DIM_DEFAULT,
-    compat="no_conflicts",
-    data_vars="all",
-    coords="different",
-    fill_value=dtypes.NA,
-    join="outer",
-):
-    if concat_dim is not None:
-        dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim
-
-        sorted_datasets = sorted(datasets, key=vars_as_keys)
-        grouped = itertools.groupby(sorted_datasets, key=vars_as_keys)
-
-        concatenated = [
-            _auto_concat(
-                list(datasets),
-                dim=dim,
-                data_vars=data_vars,
-                coords=coords,
-                compat=compat,
-                fill_value=fill_value,
-                join=join,
-            )
-            for vars, datasets in grouped
-        ]
-    else:
-        concatenated = datasets
-    merged = merge(concatenated, compat=compat, fill_value=fill_value, join=join)
-    return merged
-
-
-def _auto_concat(
-    datasets,
-    dim=None,
-    data_vars="all",
-    coords="different",
-    fill_value=dtypes.NA,
-    join="outer",
-    compat="no_conflicts",
-):
-    if len(datasets) == 1 and dim is None:
-        # There is nothing more to combine, so kick out early.
-        return datasets[0]
-    else:
-        if dim is None:
-            ds0 = datasets[0]
-            ds1 = datasets[1]
-            concat_dims = set(ds0.dims)
-            if ds0.dims != ds1.dims:
-                dim_tuples = set(ds0.dims.items()) - set(ds1.dims.items())
-                concat_dims = {i for i, _ in dim_tuples}
-            if len(concat_dims) > 1:
-                concat_dims = {d for d in concat_dims if not ds0[d].equals(ds1[d])}
-            if len(concat_dims) > 1:
-                raise ValueError(
-                    "too many different dimensions to " "concatenate: %s" % concat_dims
-                )
-            elif len(concat_dims) == 0:
-                raise ValueError(
-                    "cannot infer dimension to concatenate: "
-                    "supply the ``concat_dim`` argument "
-                    "explicitly"
-                )
-            (dim,) = concat_dims
-        return concat(
-            datasets,
-            dim=dim,
-            data_vars=data_vars,
-            coords=coords,
-            fill_value=fill_value,
-            compat=compat,
-        )
diff --git a/xarray/core/common.py b/xarray/core/common.py
index 8f6d57e9f12..f759f4c32dd 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -25,7 +25,7 @@
 from .options import OPTIONS, _get_keep_attrs
 from .pycompat import dask_array_type
 from .rolling_exp import RollingExp
-from .utils import Frozen, either_dict_or_kwargs
+from .utils import Frozen, either_dict_or_kwargs, is_scalar
 
 # Used as a sentinel value to indicate a all dimensions
 ALL_DIMS = ...
@@ -447,7 +447,7 @@ def assign_coords(self, coords=None, **coords_kwargs):
         New coordinate can also be attached to an existing dimension:
 
         >>> lon_2 = np.array([300, 289, 0, 1])
-        >>> da.assign_coords(lon_2=('lon', lon_2))
+        >>> da.assign_coords(lon_2=("lon", lon_2))
         <xarray.DataArray (lon: 4)>
         array([0.28298 , 0.667347, 0.657938, 0.177683])
         Coordinates:
@@ -456,7 +456,7 @@ def assign_coords(self, coords=None, **coords_kwargs):
 
         Note that the same result can also be obtained with a dict e.g.
 
-        >>> _ = da.assign_coords({"lon_2": ('lon', lon_2)})
+        >>> _ = da.assign_coords({"lon_2": ("lon", lon_2)})
 
         Notes
         -----
@@ -1397,6 +1397,9 @@ def full_like(other, fill_value, dtype: DTypeLike = None):
     from .dataset import Dataset
     from .variable import Variable
 
+    if not is_scalar(fill_value):
+        raise ValueError(f"fill_value must be scalar. Received {fill_value} instead.")
+
     if isinstance(other, Dataset):
         data_vars = {
             k: _full_like_variable(v, fill_value, dtype)
@@ -1478,7 +1481,7 @@ def zeros_like(other, dtype: DTypeLike = None):
     * lat      (lat) int64 1 2
     * lon      (lon) int64 0 1 2
 
-    >>> xr.zeros_like(x, dtype=np.float)
+    >>> xr.zeros_like(x, dtype=float)
     <xarray.DataArray (lat: 2, lon: 3)>
     array([[0., 0., 0.],
            [0., 0., 0.]])
diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index 6cf4178b5bf..d8a0c53e817 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -24,9 +24,8 @@
 import numpy as np
 
 from . import dtypes, duck_array_ops, utils
-from .alignment import deep_align
+from .alignment import align, deep_align
 from .merge import merge_coordinates_without_align
-from .nanops import dask_array
 from .options import OPTIONS
 from .pycompat import dask_array_type
 from .utils import is_dict_like
@@ -1070,6 +1069,200 @@ def earth_mover_distance(first_samples,
         return apply_array_ufunc(func, *args, dask=dask)
 
 
+def cov(da_a, da_b, dim=None, ddof=1):
+    """
+    Compute covariance between two DataArray objects along a shared dimension.
+
+    Parameters
+    ----------
+    da_a: DataArray object
+        Array to compute.
+    da_b: DataArray object
+        Array to compute.
+    dim : str, optional
+        The dimension along which the covariance will be computed
+    ddof: int, optional
+        If ddof=1, covariance is normalized by N-1, giving an unbiased estimate,
+        else normalization is by N.
+
+    Returns
+    -------
+    covariance: DataArray
+
+    See also
+    --------
+    pandas.Series.cov: corresponding pandas function
+    xr.corr: respective function to calculate correlation
+
+    Examples
+    --------
+    >>> da_a = DataArray(
+    ...     np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]),
+    ...     dims=("space", "time"),
+    ...     coords=[
+    ...         ("space", ["IA", "IL", "IN"]),
+    ...         ("time", pd.date_range("2000-01-01", freq="1D", periods=3)),
+    ...     ],
+    ... )
+    >>> da_a
+    <xarray.DataArray (space: 3, time: 3)>
+    array([[1. , 2. , 3. ],
+           [0.1, 0.2, 0.3],
+           [3.2, 0.6, 1.8]])
+    Coordinates:
+      * space    (space) <U2 'IA' 'IL' 'IN'
+      * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03
+    >>> da_b = DataArray(
+    ...     np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
+    ...     dims=("space", "time"),
+    ...     coords=[
+    ...         ("space", ["IA", "IL", "IN"]),
+    ...         ("time", pd.date_range("2000-01-01", freq="1D", periods=3)),
+    ...     ],
+    ... )
+    >>> da_b
+    <xarray.DataArray (space: 3, time: 3)>
+    array([[ 0.2,  0.4,  0.6],
+           [15. , 10. ,  5. ],
+           [ 3.2,  0.6,  1.8]])
+    Coordinates:
+      * space    (space) <U2 'IA' 'IL' 'IN'
+      * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03
+    >>> xr.cov(da_a, da_b)
+    <xarray.DataArray ()>
+    array(-3.53055556)
+    >>> xr.cov(da_a, da_b, dim="time")
+    <xarray.DataArray (space: 3)>
+    array([ 0.2, -0.5,  1.69333333])
+    Coordinates:
+      * space    (space) <U2 'IA' 'IL' 'IN'
+    """
+    from .dataarray import DataArray
+
+    if any(not isinstance(arr, DataArray) for arr in [da_a, da_b]):
+        raise TypeError(
+            "Only xr.DataArray is supported."
+            "Given {}.".format([type(arr) for arr in [da_a, da_b]])
+        )
+
+    return _cov_corr(da_a, da_b, dim=dim, ddof=ddof, method="cov")
+
+
+def corr(da_a, da_b, dim=None):
+    """
+    Compute the Pearson correlation coefficient between
+    two DataArray objects along a shared dimension.
+
+    Parameters
+    ----------
+    da_a: DataArray object
+        Array to compute.
+    da_b: DataArray object
+        Array to compute.
+    dim: str, optional
+        The dimension along which the correlation will be computed
+
+    Returns
+    -------
+    correlation: DataArray
+
+    See also
+    --------
+    pandas.Series.corr: corresponding pandas function
+    xr.cov: underlying covariance function
+
+    Examples
+    --------
+    >>> da_a = DataArray(
+    ...     np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]),
+    ...     dims=("space", "time"),
+    ...     coords=[
+    ...         ("space", ["IA", "IL", "IN"]),
+    ...         ("time", pd.date_range("2000-01-01", freq="1D", periods=3)),
+    ...     ],
+    ... )
+    >>> da_a
+    <xarray.DataArray (space: 3, time: 3)>
+    array([[1. , 2. , 3. ],
+           [0.1, 0.2, 0.3],
+           [3.2, 0.6, 1.8]])
+    Coordinates:
+      * space    (space) <U2 'IA' 'IL' 'IN'
+      * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03
+    >>> da_b = DataArray(
+    ...     np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
+    ...     dims=("space", "time"),
+    ...     coords=[
+    ...         ("space", ["IA", "IL", "IN"]),
+    ...         ("time", pd.date_range("2000-01-01", freq="1D", periods=3)),
+    ...     ],
+    ... )
+    >>> da_b
+    <xarray.DataArray (space: 3, time: 3)>
+    array([[ 0.2,  0.4,  0.6],
+           [15. , 10. ,  5. ],
+           [ 3.2,  0.6,  1.8]])
+    Coordinates:
+      * space    (space) <U2 'IA' 'IL' 'IN'
+      * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03
+    >>> xr.corr(da_a, da_b)
+    <xarray.DataArray ()>
+    array(-0.57087777)
+    >>> xr.corr(da_a, da_b, dim="time")
+    <xarray.DataArray (space: 3)>
+    array([ 1., -1.,  1.])
+    Coordinates:
+      * space    (space) <U2 'IA' 'IL' 'IN'
+    """
+    from .dataarray import DataArray
+
+    if any(not isinstance(arr, DataArray) for arr in [da_a, da_b]):
+        raise TypeError(
+            "Only xr.DataArray is supported."
+            "Given {}.".format([type(arr) for arr in [da_a, da_b]])
+        )
+
+    return _cov_corr(da_a, da_b, dim=dim, method="corr")
+
+
+def _cov_corr(da_a, da_b, dim=None, ddof=0, method=None):
+    """
+    Internal method for xr.cov() and xr.corr() so only have to
+    sanitize the input arrays once and we don't repeat code.
+    """
+    # 1. Broadcast the two arrays
+    da_a, da_b = align(da_a, da_b, join="inner", copy=False)
+
+    # 2. Ignore the nans
+    valid_values = da_a.notnull() & da_b.notnull()
+
+    if not valid_values.all():
+        da_a = da_a.where(valid_values)
+        da_b = da_b.where(valid_values)
+
+    valid_count = valid_values.sum(dim) - ddof
+
+    # 3. Detrend along the given dim
+    demeaned_da_a = da_a - da_a.mean(dim=dim)
+    demeaned_da_b = da_b - da_b.mean(dim=dim)
+
+    # 4. Compute covariance along the given dim
+    # N.B. `skipna=False` is required or there is a bug when computing
+    # auto-covariance. E.g. Try xr.cov(da,da) for
+    # da = xr.DataArray([[1, 2], [1, np.nan]], dims=["x", "time"])
+    cov = (demeaned_da_a * demeaned_da_b).sum(dim=dim, skipna=False) / (valid_count)
+
+    if method == "cov":
+        return cov
+
+    else:
+        # compute std + corr
+        da_a_std = da_a.std(dim=dim)
+        da_b_std = da_b.std(dim=dim)
+        corr = cov / (da_a_std * da_b_std)
+        return corr
+
+
 def dot(*arrays, dims=None, **kwargs):
     """Generalized dot product for xarray objects. Like np.einsum, but
     provides a simpler interface based on array dimensions.
@@ -1192,10 +1385,10 @@ def dot(*arrays, dims=None, **kwargs):
     # construct einsum subscripts, such as '...abc,...ab->...c'
     # Note: input_core_dims are always moved to the last position
     subscripts_list = [
-        "..." + "".join([dim_map[d] for d in ds]) for ds in input_core_dims
+        "..." + "".join(dim_map[d] for d in ds) for ds in input_core_dims
     ]
     subscripts = ",".join(subscripts_list)
-    subscripts += "->..." + "".join([dim_map[d] for d in output_core_dims[0]])
+    subscripts += "->..." + "".join(dim_map[d] for d in output_core_dims[0])
 
     join = OPTIONS["arithmetic_join"]
     # using "inner" emulates `(a * b).sum()` for all joins (except "exact")
@@ -1329,7 +1522,7 @@ def polyval(coord, coeffs, degree_dim="degree"):
     from .dataarray import DataArray
     from .missing import get_clean_interp_index
 
-    x = get_clean_interp_index(coord, coord.name)
+    x = get_clean_interp_index(coord, coord.name, strict=False)
 
     deg_coord = coeffs[degree_dim]
 
@@ -1380,24 +1573,24 @@ def _calc_idxminmax(
     # This will run argmin or argmax.
     indx = func(array, dim=dim, axis=None, keep_attrs=keep_attrs, skipna=skipna)
 
-    # Get the coordinate we want.
-    coordarray = array[dim]
-
     # Handle dask arrays.
-    if isinstance(array, dask_array_type):
-        res = dask_array.map_blocks(coordarray, indx, dtype=indx.dtype)
+    if isinstance(array.data, dask_array_type):
+        import dask.array
+
+        chunks = dict(zip(array.dims, array.chunks))
+        dask_coord = dask.array.from_array(array[dim].data, chunks=chunks[dim])
+        res = indx.copy(data=dask_coord[indx.data.ravel()].reshape(indx.shape))
+        # we need to attach back the dim name
+        res.name = dim
     else:
-        res = coordarray[
-            indx,
-        ]
+        res = array[dim][(indx,)]
+        # The dim is gone but we need to remove the corresponding coordinate.
+        del res.coords[dim]
 
     if skipna or (skipna is None and array.dtype.kind in na_dtypes):
         # Put the NaN values back in after removing them
         res = res.where(~allna, fill_value)
 
-    # The dim is gone but we need to remove the corresponding coordinate.
-    del res.coords[dim]
-
     # Copy attributes from argmin/argmax, if any
     res.attrs = indx.attrs
 
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index ffa05ca64f0..0ce76a5e23a 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -1,6 +1,5 @@
 import datetime
 import functools
-import warnings
 from numbers import Number
 from typing import (
     TYPE_CHECKING,
@@ -54,7 +53,7 @@
 from .formatting import format_item
 from .indexes import Indexes, default_indexes, propagate_indexes
 from .indexing import is_fancy_indexer
-from .merge import PANDAS_TYPES, _extract_indexes_from_coords
+from .merge import PANDAS_TYPES, MergeError, _extract_indexes_from_coords
 from .options import OPTIONS
 from .utils import Default, ReprObject, _check_inplace, _default, either_dict_or_kwargs
 from .variable import (
@@ -261,7 +260,7 @@ class DataArray(AbstractArray, DataWithCoords):
     _resample_cls = resample.DataArrayResample
     _weighted_cls = weighted.DataArrayWeighted
 
-    dt = property(CombinedDatetimelikeAccessor)
+    dt = utils.UncachedAccessor(CombinedDatetimelikeAccessor)
 
     def __init__(
         self,
@@ -1077,6 +1076,19 @@ def sel(
         """Return a new DataArray whose data is given by selecting index
         labels along the specified dimension(s).
 
+        In contrast to `DataArray.isel`, indexers for this method should use
+        labels instead of integers.
+
+        Under the hood, this method is powered by using pandas's powerful Index
+        objects. This makes label based indexing essentially just as fast as
+        using integer indexing.
+
+        It also means this method uses pandas's (well documented) logic for
+        indexing. This means you can use string shortcuts for datetime indexes
+        (e.g., '2000-01' to select all values in January 2000). It also means
+        that slices are treated as inclusive of both the start and stop values,
+        unlike normal Python indexing.
+
         .. warning::
 
           Do not try to assign values when using any of the indexing methods
@@ -1089,6 +1101,45 @@ def sel(
           Assigning values with the chained indexing using ``.sel`` or
           ``.isel`` fails silently.
 
+        Parameters
+        ----------
+        indexers : dict, optional
+            A dict with keys matching dimensions and values given
+            by scalars, slices or arrays of tick labels. For dimensions with
+            multi-index, the indexer may also be a dict-like object with keys
+            matching index level names.
+            If DataArrays are passed as indexers, xarray-style indexing will be
+            carried out. See :ref:`indexing` for the details.
+            One of indexers or indexers_kwargs must be provided.
+        method : {None, 'nearest', 'pad'/'ffill', 'backfill'/'bfill'}, optional
+            Method to use for inexact matches:
+
+            * None (default): only exact matches
+            * pad / ffill: propagate last valid index value forward
+            * backfill / bfill: propagate next valid index value backward
+            * nearest: use nearest valid index value
+        tolerance : optional
+            Maximum distance between original and new labels for inexact
+            matches. The values of the index at the matching locations must
+            satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
+        drop : bool, optional
+            If ``drop=True``, drop coordinates variables in `indexers` instead
+            of making them scalar.
+        **indexers_kwargs : {dim: indexer, ...}, optional
+            The keyword arguments form of ``indexers``.
+            One of indexers or indexers_kwargs must be provided.
+
+        Returns
+        -------
+        obj : DataArray
+            A new DataArray with the same contents as this DataArray, except the
+            data and each dimension is indexed by the appropriate indexers.
+            If indexer DataArrays have coordinates that do not conflict with
+            this object, then these coordinates will be attached.
+            In general, each array's data will be a view of the array's data
+            in this DataArray, unless vectorized indexing was triggered by using
+            an array indexer, in which case the data will be a copy.
+
         See Also
         --------
         Dataset.sel
@@ -1915,7 +1966,7 @@ def to_unstacked_dataset(self, dim, level=0):
         # unstacked dataset
         return Dataset(data_dict)
 
-    def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArray":
+    def transpose(self, *dims: Hashable, transpose_coords: bool = True) -> "DataArray":
         """Return a new DataArray object with transposed dimensions.
 
         Parameters
@@ -1923,7 +1974,7 @@ def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArra
         *dims : hashable, optional
             By default, reverse the dimensions. Otherwise, reorder the
             dimensions to this order.
-        transpose_coords : boolean, optional
+        transpose_coords : boolean, default True
             If True, also transpose the coordinates of this DataArray.
 
         Returns
@@ -1952,15 +2003,6 @@ def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArra
                 coords[name] = coord.variable.transpose(*coord_dims)
             return self._replace(variable, coords)
         else:
-            if transpose_coords is None and any(self[c].ndim > 1 for c in self.coords):
-                warnings.warn(
-                    "This DataArray contains multi-dimensional "
-                    "coordinates. In the future, these coordinates "
-                    "will be transposed as well unless you specify "
-                    "transpose_coords=False.",
-                    FutureWarning,
-                    stacklevel=2,
-                )
             return self._replace(variable)
 
     @property
@@ -2671,8 +2713,15 @@ def func(self, other):
             # don't support automatic alignment with in-place arithmetic.
             other_coords = getattr(other, "coords", None)
             other_variable = getattr(other, "variable", other)
-            with self.coords._merge_inplace(other_coords):
-                f(self.variable, other_variable)
+            try:
+                with self.coords._merge_inplace(other_coords):
+                    f(self.variable, other_variable)
+            except MergeError as exc:
+                raise MergeError(
+                    "Automatic alignment is not supported for in-place operations.\n"
+                    "Consider aligning the indices manually or using a not-in-place operation.\n"
+                    "See https://github.com/pydata/xarray/issues/3910 for more explanations."
+                ) from exc
             return self
 
         return func
@@ -2680,24 +2729,7 @@ def func(self, other):
     def _copy_attrs_from(self, other: Union["DataArray", Dataset, Variable]) -> None:
         self.attrs = other.attrs
 
-    @property
-    def plot(self) -> _PlotMethods:
-        """
-        Access plotting functions for DataArray's
-
-        >>> d = xr.DataArray([[1, 2], [3, 4]])
-
-        For convenience just call this directly
-
-        >>> d.plot()
-
-        Or use it as a namespace to use xarray.plot functions as
-        DataArray methods
-
-        >>> d.plot.imshow()  # equivalent to xarray.plot.imshow(d)
-
-        """
-        return _PlotMethods(self)
+    plot = utils.UncachedAccessor(_PlotMethods)
 
     def _title_for_slice(self, truncate: int = 50) -> str:
         """
@@ -3260,57 +3292,107 @@ def map_blocks(
         func: "Callable[..., T_DSorDA]",
         args: Sequence[Any] = (),
         kwargs: Mapping[str, Any] = None,
+        template: Union["DataArray", "Dataset"] = None,
     ) -> "T_DSorDA":
         """
-        Apply a function to each chunk of this DataArray. This method is experimental
-        and its signature may change.
+        Apply a function to each block of this DataArray.
+
+        .. warning::
+            This method is experimental and its signature may change.
 
         Parameters
         ----------
         func: callable
-            User-provided function that accepts a DataArray as its first parameter. The
-            function will receive a subset of this DataArray, corresponding to one chunk
-            along each chunked dimension. ``func`` will be executed as
-            ``func(obj_subset, *args, **kwargs)``.
-
-            The function will be first run on mocked-up data, that looks like this array
-            but has sizes 0, to determine properties of the returned object such as
-            dtype, variable names, new dimensions and new indexes (if any).
+            User-provided function that accepts a DataArray as its first
+            parameter. The function will receive a subset or 'block' of this DataArray (see below),
+            corresponding to one chunk along each chunked dimension. ``func`` will be
+            executed as ``func(subset_dataarray, *subset_args, **kwargs)``.
 
             This function must return either a single DataArray or a single Dataset.
 
-            This function cannot change size of existing dimensions, or add new chunked
-            dimensions.
+            This function cannot add a new chunked dimension.
+
+        obj: DataArray, Dataset
+            Passed to the function as its first argument, one block at a time.
         args: Sequence
-            Passed verbatim to func after unpacking, after the sliced DataArray. xarray
-            objects, if any, will not be split by chunks. Passing dask collections is
-            not allowed.
+            Passed to func after unpacking and subsetting any xarray objects by blocks.
+            xarray objects in args must be aligned with obj, otherwise an error is raised.
         kwargs: Mapping
             Passed verbatim to func after unpacking. xarray objects, if any, will not be
-            split by chunks. Passing dask collections is not allowed.
+            subset to blocks. Passing dask collections in kwargs is not allowed.
+        template: (optional) DataArray, Dataset
+            xarray object representing the final result after compute is called. If not provided,
+            the function will be first run on mocked-up data, that looks like ``obj`` but
+            has sizes 0, to determine properties of the returned object such as dtype,
+            variable names, attributes, new dimensions and new indexes (if any).
+            ``template`` must be provided if the function changes the size of existing dimensions.
+            When provided, ``attrs`` on variables in `template` are copied over to the result. Any
+            ``attrs`` set by ``func`` will be ignored.
+
 
         Returns
         -------
-        A single DataArray or Dataset with dask backend, reassembled from the outputs of
-        the function.
+        A single DataArray or Dataset with dask backend, reassembled from the outputs of the
+        function.
 
         Notes
         -----
-        This method is designed for when one needs to manipulate a whole xarray object
-        within each chunk. In the more common case where one can work on numpy arrays,
-        it is recommended to use apply_ufunc.
+        This function is designed for when ``func`` needs to manipulate a whole xarray object
+        subset to each block. In the more common case where ``func`` can work on numpy arrays, it is
+        recommended to use ``apply_ufunc``.
 
-        If none of the variables in this DataArray is backed by dask, calling this
-        method is equivalent to calling ``func(self, *args, **kwargs)``.
+        If none of the variables in ``obj`` is backed by dask arrays, calling this function is
+        equivalent to calling ``func(obj, *args, **kwargs)``.
 
         See Also
         --------
-        dask.array.map_blocks, xarray.apply_ufunc, xarray.map_blocks,
-        xarray.Dataset.map_blocks
+        dask.array.map_blocks, xarray.apply_ufunc, xarray.Dataset.map_blocks,
+        xarray.DataArray.map_blocks
+
+        Examples
+        --------
+
+        Calculate an anomaly from climatology using ``.groupby()``. Using
+        ``xr.map_blocks()`` allows for parallel operations with knowledge of ``xarray``,
+        its indices, and its methods like ``.groupby()``.
+
+        >>> def calculate_anomaly(da, groupby_type="time.month"):
+        ...     gb = da.groupby(groupby_type)
+        ...     clim = gb.mean(dim="time")
+        ...     return gb - clim
+        >>> time = xr.cftime_range("1990-01", "1992-01", freq="M")
+        >>> np.random.seed(123)
+        >>> array = xr.DataArray(
+        ...     np.random.rand(len(time)), dims="time", coords=[time]
+        ... ).chunk()
+        >>> array.map_blocks(calculate_anomaly, template=array).compute()
+        <xarray.DataArray (time: 24)>
+        array([ 0.12894847,  0.11323072, -0.0855964 , -0.09334032,  0.26848862,
+                0.12382735,  0.22460641,  0.07650108, -0.07673453, -0.22865714,
+               -0.19063865,  0.0590131 , -0.12894847, -0.11323072,  0.0855964 ,
+                0.09334032, -0.26848862, -0.12382735, -0.22460641, -0.07650108,
+                0.07673453,  0.22865714,  0.19063865, -0.0590131 ])
+        Coordinates:
+          * time     (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00
+
+        Note that one must explicitly use ``args=[]`` and ``kwargs={}`` to pass arguments
+        to the function being applied in ``xr.map_blocks()``:
+
+        >>> array.map_blocks(
+        ...     calculate_anomaly, kwargs={"groupby_type": "time.year"}, template=array,
+        ... )
+        <xarray.DataArray (time: 24)>
+        array([ 0.15361741, -0.25671244, -0.31600032,  0.008463  ,  0.1766172 ,
+               -0.11974531,  0.43791243,  0.14197797, -0.06191987, -0.15073425,
+               -0.19967375,  0.18619794, -0.05100474, -0.42989909, -0.09153273,
+                0.24841842, -0.30708526, -0.31412523,  0.04197439,  0.0422506 ,
+                0.14482397,  0.35985481,  0.23487834,  0.12144652])
+        Coordinates:
+            * time     (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00
         """
         from .parallel import map_blocks
 
-        return map_blocks(func, self, args, kwargs)
+        return map_blocks(func, self, args, kwargs, template)
 
     def polyfit(
         self,
@@ -3495,17 +3577,18 @@ def pad(
         Examples
         --------
 
-        >>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0,1,2])])
-        >>> arr.pad(x=(1,2), constant_values=0)
+        >>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0, 1, 2])])
+        >>> arr.pad(x=(1, 2), constant_values=0)
         <xarray.DataArray (x: 6)>
         array([0, 5, 6, 7, 0, 0])
         Coordinates:
           * x        (x) float64 nan 0.0 1.0 2.0 nan nan
 
-        >>> da = xr.DataArray([[0,1,2,3], [10,11,12,13]],
-                              dims=["x", "y"],
-                              coords={"x": [0,1], "y": [10, 20 ,30, 40], "z": ("x", [100, 200])}
-            )
+        >>> da = xr.DataArray(
+        ...     [[0, 1, 2, 3], [10, 11, 12, 13]],
+        ...     dims=["x", "y"],
+        ...     coords={"x": [0, 1], "y": [10, 20, 30, 40], "z": ("x", [100, 200])},
+        ... )
         >>> da.pad(x=1)
         <xarray.DataArray (x: 4, y: 4)>
         array([[nan, nan, nan, nan],
@@ -3592,8 +3675,9 @@ def idxmin(
         Examples
         --------
 
-        >>> array = xr.DataArray([0, 2, 1, 0, -2], dims="x",
-        ...                      coords={"x": ['a', 'b', 'c', 'd', 'e']})
+        >>> array = xr.DataArray(
+        ...     [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]}
+        ... )
         >>> array.min()
         <xarray.DataArray ()>
         array(-2)
@@ -3604,13 +3688,15 @@ def idxmin(
         <xarray.DataArray 'x' ()>
         array('e', dtype='<U1')
 
-        >>> array = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0],
-        ...                       [-4.0, np.NaN, 2.0, np.NaN, -2.0],
-        ...                       [np.NaN, np.NaN, 1., np.NaN, np.NaN]],
-        ...                      dims=["y", "x"],
-        ...                      coords={"y": [-1, 0, 1],
-        ...                              "x": np.arange(5.)**2}
-        ...                      )
+        >>> array = xr.DataArray(
+        ...     [
+        ...         [2.0, 1.0, 2.0, 0.0, -2.0],
+        ...         [-4.0, np.NaN, 2.0, np.NaN, -2.0],
+        ...         [np.NaN, np.NaN, 1.0, np.NaN, np.NaN],
+        ...     ],
+        ...     dims=["y", "x"],
+        ...     coords={"y": [-1, 0, 1], "x": np.arange(5.0) ** 2},
+        ... )
         >>> array.min(dim="x")
         <xarray.DataArray (y: 3)>
         array([-2., -4.,  1.])
@@ -3686,8 +3772,9 @@ def idxmax(
         Examples
         --------
 
-        >>> array = xr.DataArray([0, 2, 1, 0, -2], dims="x",
-        ...                      coords={"x": ['a', 'b', 'c', 'd', 'e']})
+        >>> array = xr.DataArray(
+        ...     [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]}
+        ... )
         >>> array.max()
         <xarray.DataArray ()>
         array(2)
@@ -3698,13 +3785,15 @@ def idxmax(
         <xarray.DataArray 'x' ()>
         array('b', dtype='<U1')
 
-        >>> array = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0],
-        ...                       [-4.0, np.NaN, 2.0, np.NaN, -2.0],
-        ...                       [np.NaN, np.NaN, 1., np.NaN, np.NaN]],
-        ...                      dims=["y", "x"],
-        ...                      coords={"y": [-1, 0, 1],
-        ...                              "x": np.arange(5.)**2}
-        ...                      )
+        >>> array = xr.DataArray(
+        ...     [
+        ...         [2.0, 1.0, 2.0, 0.0, -2.0],
+        ...         [-4.0, np.NaN, 2.0, np.NaN, -2.0],
+        ...         [np.NaN, np.NaN, 1.0, np.NaN, np.NaN],
+        ...     ],
+        ...     dims=["y", "x"],
+        ...     coords={"y": [-1, 0, 1], "x": np.arange(5.0) ** 2},
+        ... )
         >>> array.max(dim="x")
         <xarray.DataArray (y: 3)>
         array([2., 2., 1.])
@@ -3730,9 +3819,212 @@ def idxmax(
             keep_attrs=keep_attrs,
         )
 
+    def argmin(
+        self,
+        dim: Union[Hashable, Sequence[Hashable]] = None,
+        axis: int = None,
+        keep_attrs: bool = None,
+        skipna: bool = None,
+    ) -> Union["DataArray", Dict[Hashable, "DataArray"]]:
+        """Index or indices of the minimum of the DataArray over one or more dimensions.
+
+        If a sequence is passed to 'dim', then result returned as dict of DataArrays,
+        which can be passed directly to isel(). If a single str is passed to 'dim' then
+        returns a DataArray with dtype int.
+
+        If there are multiple minima, the indices of the first one found will be
+        returned.
+
+        Parameters
+        ----------
+        dim : hashable, sequence of hashable or ..., optional
+            The dimensions over which to find the minimum. By default, finds minimum over
+            all dimensions - for now returning an int for backward compatibility, but
+            this is deprecated, in future will return a dict with indices for all
+            dimensions; to return a dict with all dimensions now, pass '...'.
+        axis : int, optional
+            Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments
+            can be supplied.
+        keep_attrs : bool, optional
+            If True, the attributes (`attrs`) will be copied from the original
+            object to the new one.  If False (default), the new object will be
+            returned without attributes.
+        skipna : bool, optional
+            If True, skip missing values (as marked by NaN). By default, only
+            skips missing values for float dtypes; other dtypes either do not
+            have a sentinel missing value (int) or skipna=True has not been
+            implemented (object, datetime64 or timedelta64).
+
+        Returns
+        -------
+        result : DataArray or dict of DataArray
+
+        See also
+        --------
+        Variable.argmin, DataArray.idxmin
+
+        Examples
+        --------
+        >>> array = xr.DataArray([0, 2, -1, 3], dims="x")
+        >>> array.min()
+        <xarray.DataArray ()>
+        array(-1)
+        >>> array.argmin()
+        <xarray.DataArray ()>
+        array(2)
+        >>> array.argmin(...)
+        {'x': <xarray.DataArray ()>
+        array(2)}
+        >>> array.isel(array.argmin(...))
+        array(-1)
+
+        >>> array = xr.DataArray([[[3, 2, 1], [3, 1, 2], [2, 1, 3]],
+        ...                       [[1, 3, 2], [2, -5, 1], [2, 3, 1]]],
+        ...                      dims=("x", "y", "z"))
+        >>> array.min(dim="x")
+        <xarray.DataArray (y: 3, z: 3)>
+        array([[ 1,  2,  1],
+               [ 2, -5,  1],
+               [ 2,  1,  1]])
+        Dimensions without coordinates: y, z
+        >>> array.argmin(dim="x")
+        <xarray.DataArray (y: 3, z: 3)>
+        array([[1, 0, 0],
+               [1, 1, 1],
+               [0, 0, 1]])
+        Dimensions without coordinates: y, z
+        >>> array.argmin(dim=["x"])
+        {'x': <xarray.DataArray (y: 3, z: 3)>
+        array([[1, 0, 0],
+               [1, 1, 1],
+               [0, 0, 1]])
+        Dimensions without coordinates: y, z}
+        >>> array.min(dim=("x", "z"))
+        <xarray.DataArray (y: 3)>
+        array([ 1, -5,  1])
+        Dimensions without coordinates: y
+        >>> array.argmin(dim=["x", "z"])
+        {'x': <xarray.DataArray (y: 3)>
+        array([0, 1, 0])
+        Dimensions without coordinates: y, 'z': <xarray.DataArray (y: 3)>
+        array([2, 1, 1])
+        Dimensions without coordinates: y}
+        >>> array.isel(array.argmin(dim=["x", "z"]))
+        <xarray.DataArray (y: 3)>
+        array([ 1, -5,  1])
+        Dimensions without coordinates: y
+        """
+        result = self.variable.argmin(dim, axis, keep_attrs, skipna)
+        if isinstance(result, dict):
+            return {k: self._replace_maybe_drop_dims(v) for k, v in result.items()}
+        else:
+            return self._replace_maybe_drop_dims(result)
+
+    def argmax(
+        self,
+        dim: Union[Hashable, Sequence[Hashable]] = None,
+        axis: int = None,
+        keep_attrs: bool = None,
+        skipna: bool = None,
+    ) -> Union["DataArray", Dict[Hashable, "DataArray"]]:
+        """Index or indices of the maximum of the DataArray over one or more dimensions.
+
+        If a sequence is passed to 'dim', then result returned as dict of DataArrays,
+        which can be passed directly to isel(). If a single str is passed to 'dim' then
+        returns a DataArray with dtype int.
+
+        If there are multiple maxima, the indices of the first one found will be
+        returned.
+
+        Parameters
+        ----------
+        dim : hashable, sequence of hashable or ..., optional
+            The dimensions over which to find the maximum. By default, finds maximum over
+            all dimensions - for now returning an int for backward compatibility, but
+            this is deprecated, in future will return a dict with indices for all
+            dimensions; to return a dict with all dimensions now, pass '...'.
+        axis : int, optional
+            Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments
+            can be supplied.
+        keep_attrs : bool, optional
+            If True, the attributes (`attrs`) will be copied from the original
+            object to the new one.  If False (default), the new object will be
+            returned without attributes.
+        skipna : bool, optional
+            If True, skip missing values (as marked by NaN). By default, only
+            skips missing values for float dtypes; other dtypes either do not
+            have a sentinel missing value (int) or skipna=True has not been
+            implemented (object, datetime64 or timedelta64).
+
+        Returns
+        -------
+        result : DataArray or dict of DataArray
+
+        See also
+        --------
+        Variable.argmax, DataArray.idxmax
+
+        Examples
+        --------
+        >>> array = xr.DataArray([0, 2, -1, 3], dims="x")
+        >>> array.max()
+        <xarray.DataArray ()>
+        array(3)
+        >>> array.argmax()
+        <xarray.DataArray ()>
+        array(3)
+        >>> array.argmax(...)
+        {'x': <xarray.DataArray ()>
+        array(3)}
+        >>> array.isel(array.argmax(...))
+        <xarray.DataArray ()>
+        array(3)
+
+        >>> array = xr.DataArray([[[3, 2, 1], [3, 1, 2], [2, 1, 3]],
+        ...                       [[1, 3, 2], [2, 5, 1], [2, 3, 1]]],
+        ...                      dims=("x", "y", "z"))
+        >>> array.max(dim="x")
+        <xarray.DataArray (y: 3, z: 3)>
+        array([[3, 3, 2],
+               [3, 5, 2],
+               [2, 3, 3]])
+        Dimensions without coordinates: y, z
+        >>> array.argmax(dim="x")
+        <xarray.DataArray (y: 3, z: 3)>
+        array([[0, 1, 1],
+               [0, 1, 0],
+               [0, 1, 0]])
+        Dimensions without coordinates: y, z
+        >>> array.argmax(dim=["x"])
+        {'x': <xarray.DataArray (y: 3, z: 3)>
+        array([[0, 1, 1],
+               [0, 1, 0],
+               [0, 1, 0]])
+        Dimensions without coordinates: y, z}
+        >>> array.max(dim=("x", "z"))
+        <xarray.DataArray (y: 3)>
+        array([3, 5, 3])
+        Dimensions without coordinates: y
+        >>> array.argmax(dim=["x", "z"])
+        {'x': <xarray.DataArray (y: 3)>
+        array([0, 1, 0])
+        Dimensions without coordinates: y, 'z': <xarray.DataArray (y: 3)>
+        array([0, 1, 2])
+        Dimensions without coordinates: y}
+        >>> array.isel(array.argmax(dim=["x", "z"]))
+        <xarray.DataArray (y: 3)>
+        array([3, 5, 3])
+        Dimensions without coordinates: y
+        """
+        result = self.variable.argmax(dim, axis, keep_attrs, skipna)
+        if isinstance(result, dict):
+            return {k: self._replace_maybe_drop_dims(v) for k, v in result.items()}
+        else:
+            return self._replace_maybe_drop_dims(result)
+
     # this needs to be at the end, or mypy will confuse with `str`
     # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names
-    str = property(StringAccessor)
+    str = utils.UncachedAccessor(StringAccessor)
 
 
 # priority most be higher than Variable to properly work with binary ufuncs
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index d811d54847f..b46b1d6dce0 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -27,6 +27,7 @@
     TypeVar,
     Union,
     cast,
+    overload,
 )
 
 import numpy as np
@@ -329,7 +330,7 @@ def split_indexes(
         else:
             vars_to_remove.append(d)
             if not drop:
-                vars_to_create[str(d) + "_"] = Variable(d, index)
+                vars_to_create[str(d) + "_"] = Variable(d, index, variables[d].attrs)
 
     for d, levs in dim_levels.items():
         index = variables[d].to_index()
@@ -341,7 +342,7 @@ def split_indexes(
         if not drop:
             for lev in levs:
                 idx = index.get_level_values(lev)
-                vars_to_create[idx.name] = Variable(d, idx)
+                vars_to_create[idx.name] = Variable(d, idx, variables[d].attrs)
 
     new_variables = dict(variables)
     for v in set(vars_to_remove):
@@ -1055,9 +1056,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset":
         structure of the original object, but with the new data. Original
         object is unaffected.
 
-        >>> ds.copy(
-        ...     data={"foo": np.arange(6).reshape(2, 3), "bar": ["a", "b"]}
-        ... )
+        >>> ds.copy(data={"foo": np.arange(6).reshape(2, 3), "bar": ["a", "b"]})
         <xarray.Dataset>
         Dimensions:  (dim_0: 2, dim_1: 3, x: 2)
         Coordinates:
@@ -1243,13 +1242,25 @@ def loc(self) -> _LocIndexer:
         """
         return _LocIndexer(self)
 
-    def __getitem__(self, key: Any) -> "Union[DataArray, Dataset]":
+    # FIXME https://github.com/python/mypy/issues/7328
+    @overload
+    def __getitem__(self, key: Mapping) -> "Dataset":  # type: ignore
+        ...
+
+    @overload
+    def __getitem__(self, key: Hashable) -> "DataArray":  # type: ignore
+        ...
+
+    @overload
+    def __getitem__(self, key: Any) -> "Dataset":
+        ...
+
+    def __getitem__(self, key):
         """Access variables or coordinates this dataset as a
         :py:class:`~xarray.DataArray`.
 
         Indexing with a list of names will return a new ``Dataset`` object.
         """
-        # TODO(shoyer): type this properly: https://github.com/python/mypy/issues/7328
         if utils.is_dict_like(key):
             return self.isel(**cast(Mapping, key))
 
@@ -1537,7 +1548,7 @@ def to_netcdf(
             ``dask.delayed.Delayed`` object that can be computed later.
         invalid_netcdf: boolean
             Only valid along with engine='h5netcdf'. If True, allow writing
-            hdf5 files which are valid netcdf as described in
+            hdf5 files which are invalid netcdf as described in
             https://github.com/shoyer/h5netcdf. Default: False.
         """
         if encoding is None:
@@ -1581,7 +1592,7 @@ def to_zarr(
         mode : {'w', 'w-', 'a', None}
             Persistence mode: 'w' means create (overwrite if exists);
             'w-' means create (fail if exists);
-            'a' means append (create if does not exist).
+            'a' means override existing variables (create if does not exist).
             If ``append_dim`` is set, ``mode`` can be omitted as it is
             internally set to ``'a'``. Otherwise, ``mode`` will default to
             `w-` if not set.
@@ -1600,11 +1611,21 @@ def to_zarr(
             If True, apply zarr's `consolidate_metadata` function to the store
             after writing.
         append_dim: hashable, optional
-            If set, the dimension on which the data will be appended.
+            If set, the dimension along which the data will be appended. All
+            other dimensions on overriden variables must remain the same size.
 
         References
         ----------
         https://zarr.readthedocs.io/
+
+        Notes
+        -----
+        Zarr chunking behavior:
+            If chunks are found in the encoding argument or attribute
+            corresponding to any DataArray, those chunks are used.
+            If a DataArray is a dask array, it is written with those chunks.
+            If not other chunks are found, Zarr uses its own heuristics to
+            choose automatic chunk sizes.
         """
         if encoding is None:
             encoding = {}
@@ -1699,7 +1720,10 @@ def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
     def chunk(
         self,
         chunks: Union[
-            None, Number, Mapping[Hashable, Union[None, Number, Tuple[Number, ...]]]
+            None,
+            Number,
+            str,
+            Mapping[Hashable, Union[None, Number, str, Tuple[Number, ...]]],
         ] = None,
         name_prefix: str = "xarray-",
         token: str = None,
@@ -1717,7 +1741,7 @@ def chunk(
 
         Parameters
         ----------
-        chunks : int or mapping, optional
+        chunks : int, 'auto' or mapping, optional
             Chunk sizes along each dimension, e.g., ``5`` or
             ``{'x': 5, 'y': 5}``.
         name_prefix : str, optional
@@ -1734,7 +1758,7 @@ def chunk(
         """
         from dask.base import tokenize
 
-        if isinstance(chunks, Number):
+        if isinstance(chunks, (Number, str)):
             chunks = dict.fromkeys(self.dims, chunks)
 
         if chunks is not None:
@@ -1768,7 +1792,7 @@ def maybe_chunk(name, var, chunks):
         return self._replace(variables)
 
     def _validate_indexers(
-        self, indexers: Mapping[Hashable, Any], missing_dims: str = "raise",
+        self, indexers: Mapping[Hashable, Any], missing_dims: str = "raise"
     ) -> Iterator[Tuple[Hashable, Union[int, slice, np.ndarray, Variable]]]:
         """ Here we make sure
         + indexer has a valid keys
@@ -4526,7 +4550,7 @@ def _set_sparse_data_from_dataframe(
         idx = dataframe.index
         if isinstance(idx, pd.MultiIndex):
             coords = np.stack([np.asarray(code) for code in idx.codes], axis=0)
-            is_sorted = idx.is_lexsorted
+            is_sorted = idx.is_lexsorted()
             shape = tuple(lev.size for lev in idx.levels)
         else:
             coords = np.arange(idx.size).reshape(1, -1)
@@ -4598,6 +4622,7 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Datas
         See also
         --------
         xarray.DataArray.from_series
+        pandas.DataFrame.to_xarray
         """
         # TODO: Add an option to remove dimensions along which the variables
         # are constant, to enable consistent serialization to/from a dataframe,
@@ -5551,16 +5576,7 @@ def real(self):
     def imag(self):
         return self._unary_op(lambda x: x.imag, keep_attrs=True)(self)
 
-    @property
-    def plot(self):
-        """
-        Access plotting functions for Datasets.
-        Use it as a namespace to use xarray.plot functions as Dataset methods
-
-        >>> ds.plot.scatter(...)  # equivalent to xarray.plot.scatter(ds,...)
-
-        """
-        return _Dataset_PlotMethods(self)
+    plot = utils.UncachedAccessor(_Dataset_PlotMethods)
 
     def filter_by_attrs(self, **kwargs):
         """Returns a ``Dataset`` with variables that match specific conditions.
@@ -5709,57 +5725,108 @@ def map_blocks(
         func: "Callable[..., T_DSorDA]",
         args: Sequence[Any] = (),
         kwargs: Mapping[str, Any] = None,
+        template: Union["DataArray", "Dataset"] = None,
     ) -> "T_DSorDA":
         """
-        Apply a function to each chunk of this Dataset. This method is experimental and
-        its signature may change.
+        Apply a function to each block of this Dataset.
+
+        .. warning::
+            This method is experimental and its signature may change.
 
         Parameters
         ----------
         func: callable
-            User-provided function that accepts a Dataset as its first parameter. The
-            function will receive a subset of this Dataset, corresponding to one chunk
-            along each chunked dimension. ``func`` will be executed as
-            ``func(obj_subset, *args, **kwargs)``.
-
-            The function will be first run on mocked-up data, that looks like this
-            Dataset but has sizes 0, to determine properties of the returned object such
-            as dtype, variable names, new dimensions and new indexes (if any).
+            User-provided function that accepts a Dataset as its first
+            parameter. The function will receive a subset or 'block' of this Dataset (see below),
+            corresponding to one chunk along each chunked dimension. ``func`` will be
+            executed as ``func(subset_dataset, *subset_args, **kwargs)``.
 
             This function must return either a single DataArray or a single Dataset.
 
-            This function cannot change size of existing dimensions, or add new chunked
-            dimensions.
+            This function cannot add a new chunked dimension.
+
+        obj: DataArray, Dataset
+            Passed to the function as its first argument, one block at a time.
         args: Sequence
-            Passed verbatim to func after unpacking, after the sliced DataArray. xarray
-            objects, if any, will not be split by chunks. Passing dask collections is
-            not allowed.
+            Passed to func after unpacking and subsetting any xarray objects by blocks.
+            xarray objects in args must be aligned with obj, otherwise an error is raised.
         kwargs: Mapping
             Passed verbatim to func after unpacking. xarray objects, if any, will not be
-            split by chunks. Passing dask collections is not allowed.
+            subset to blocks. Passing dask collections in kwargs is not allowed.
+        template: (optional) DataArray, Dataset
+            xarray object representing the final result after compute is called. If not provided,
+            the function will be first run on mocked-up data, that looks like ``obj`` but
+            has sizes 0, to determine properties of the returned object such as dtype,
+            variable names, attributes, new dimensions and new indexes (if any).
+            ``template`` must be provided if the function changes the size of existing dimensions.
+            When provided, ``attrs`` on variables in `template` are copied over to the result. Any
+            ``attrs`` set by ``func`` will be ignored.
+
 
         Returns
         -------
-        A single DataArray or Dataset with dask backend, reassembled from the outputs of
-        the function.
+        A single DataArray or Dataset with dask backend, reassembled from the outputs of the
+        function.
 
         Notes
         -----
-        This method is designed for when one needs to manipulate a whole xarray object
-        within each chunk. In the more common case where one can work on numpy arrays,
-        it is recommended to use apply_ufunc.
+        This function is designed for when ``func`` needs to manipulate a whole xarray object
+        subset to each block. In the more common case where ``func`` can work on numpy arrays, it is
+        recommended to use ``apply_ufunc``.
 
-        If none of the variables in this Dataset is backed by dask, calling this method
-        is equivalent to calling ``func(self, *args, **kwargs)``.
+        If none of the variables in ``obj`` is backed by dask arrays, calling this function is
+        equivalent to calling ``func(obj, *args, **kwargs)``.
 
         See Also
         --------
-        dask.array.map_blocks, xarray.apply_ufunc, xarray.map_blocks,
+        dask.array.map_blocks, xarray.apply_ufunc, xarray.Dataset.map_blocks,
         xarray.DataArray.map_blocks
+
+        Examples
+        --------
+
+        Calculate an anomaly from climatology using ``.groupby()``. Using
+        ``xr.map_blocks()`` allows for parallel operations with knowledge of ``xarray``,
+        its indices, and its methods like ``.groupby()``.
+
+        >>> def calculate_anomaly(da, groupby_type="time.month"):
+        ...     gb = da.groupby(groupby_type)
+        ...     clim = gb.mean(dim="time")
+        ...     return gb - clim
+        >>> time = xr.cftime_range("1990-01", "1992-01", freq="M")
+        >>> np.random.seed(123)
+        >>> array = xr.DataArray(
+        ...     np.random.rand(len(time)), dims="time", coords=[time]
+        ... ).chunk()
+        >>> ds = xr.Dataset({"a": array})
+        >>> ds.map_blocks(calculate_anomaly, template=ds).compute()
+        <xarray.DataArray (time: 24)>
+        array([ 0.12894847,  0.11323072, -0.0855964 , -0.09334032,  0.26848862,
+                0.12382735,  0.22460641,  0.07650108, -0.07673453, -0.22865714,
+               -0.19063865,  0.0590131 , -0.12894847, -0.11323072,  0.0855964 ,
+                0.09334032, -0.26848862, -0.12382735, -0.22460641, -0.07650108,
+                0.07673453,  0.22865714,  0.19063865, -0.0590131 ])
+        Coordinates:
+          * time     (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00
+
+        Note that one must explicitly use ``args=[]`` and ``kwargs={}`` to pass arguments
+        to the function being applied in ``xr.map_blocks()``:
+
+        >>> ds.map_blocks(
+        ...     calculate_anomaly, kwargs={"groupby_type": "time.year"}, template=ds,
+        ... )
+        <xarray.DataArray (time: 24)>
+        array([ 0.15361741, -0.25671244, -0.31600032,  0.008463  ,  0.1766172 ,
+               -0.11974531,  0.43791243,  0.14197797, -0.06191987, -0.15073425,
+               -0.19967375,  0.18619794, -0.05100474, -0.42989909, -0.09153273,
+                0.24841842, -0.30708526, -0.31412523,  0.04197439,  0.0422506 ,
+                0.14482397,  0.35985481,  0.23487834,  0.12144652])
+        Coordinates:
+            * time     (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00
         """
         from .parallel import map_blocks
 
-        return map_blocks(func, self, args, kwargs)
+        return map_blocks(func, self, args, kwargs, template)
 
     def polyfit(
         self,
@@ -5823,7 +5890,7 @@ def polyfit(
         variables = {}
         skipna_da = skipna
 
-        x = get_clean_interp_index(self, dim)
+        x = get_clean_interp_index(self, dim, strict=False)
         xname = "{}_".format(self[dim].name)
         order = int(deg) + 1
         lhs = np.vander(x, order)
@@ -5934,7 +6001,7 @@ def polyfit(
                             "The number of data points must exceed order to scale the covariance matrix."
                         )
                     fac = residuals / (x.shape[0] - order)
-                covariance = xr.DataArray(Vbase, dims=("cov_i", "cov_j"),) * fac
+                covariance = xr.DataArray(Vbase, dims=("cov_i", "cov_j")) * fac
                 variables[name + "polyfit_covariance"] = covariance
 
         return Dataset(data_vars=variables, attrs=self.attrs.copy())
@@ -6060,8 +6127,8 @@ def pad(
         Examples
         --------
 
-        >>> ds = xr.Dataset({'foo': ('x', range(5))})
-        >>> ds.pad(x=(1,2))
+        >>> ds = xr.Dataset({"foo": ("x", range(5))})
+        >>> ds.pad(x=(1, 2))
         <xarray.Dataset>
         Dimensions:  (x: 8)
         Dimensions without coordinates: x
@@ -6155,17 +6222,20 @@ def idxmin(
         Examples
         --------
 
-        >>> array1 = xr.DataArray([0, 2, 1, 0, -2], dims="x",
-        ...                       coords={"x": ['a', 'b', 'c', 'd', 'e']})
-        >>> array2 = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0],
-        ...                        [-4.0, np.NaN, 2.0, np.NaN, -2.0],
-        ...                        [np.NaN, np.NaN, 1., np.NaN, np.NaN]],
-        ...                       dims=["y", "x"],
-        ...                       coords={"y": [-1, 0, 1],
-        ...                               "x": ['a', 'b', 'c', 'd', 'e']}
-        ...                       )
-        >>> ds = xr.Dataset({'int': array1, 'float': array2})
-        >>> ds.min(dim='x')
+        >>> array1 = xr.DataArray(
+        ...     [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]}
+        ... )
+        >>> array2 = xr.DataArray(
+        ...     [
+        ...         [2.0, 1.0, 2.0, 0.0, -2.0],
+        ...         [-4.0, np.NaN, 2.0, np.NaN, -2.0],
+        ...         [np.NaN, np.NaN, 1.0, np.NaN, np.NaN],
+        ...     ],
+        ...     dims=["y", "x"],
+        ...     coords={"y": [-1, 0, 1], "x": ["a", "b", "c", "d", "e"]},
+        ... )
+        >>> ds = xr.Dataset({"int": array1, "float": array2})
+        >>> ds.min(dim="x")
         <xarray.Dataset>
         Dimensions:  (y: 3)
         Coordinates:
@@ -6173,7 +6243,7 @@ def idxmin(
         Data variables:
             int      int64 -2
             float    (y) float64 -2.0 -4.0 1.0
-        >>> ds.argmin(dim='x')
+        >>> ds.argmin(dim="x")
         <xarray.Dataset>
         Dimensions:  (y: 3)
         Coordinates:
@@ -6181,7 +6251,7 @@ def idxmin(
         Data variables:
             int      int64 4
             float    (y) int64 4 0 2
-        >>> ds.idxmin(dim='x')
+        >>> ds.idxmin(dim="x")
         <xarray.Dataset>
         Dimensions:  (y: 3)
         Coordinates:
@@ -6197,7 +6267,7 @@ def idxmin(
                 skipna=skipna,
                 fill_value=fill_value,
                 keep_attrs=keep_attrs,
-            ),
+            )
         )
 
     def idxmax(
@@ -6250,17 +6320,20 @@ def idxmax(
         Examples
         --------
 
-        >>> array1 = xr.DataArray([0, 2, 1, 0, -2], dims="x",
-        ...                       coords={"x": ['a', 'b', 'c', 'd', 'e']})
-        >>> array2 = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0],
-        ...                        [-4.0, np.NaN, 2.0, np.NaN, -2.0],
-        ...                        [np.NaN, np.NaN, 1., np.NaN, np.NaN]],
-        ...                       dims=["y", "x"],
-        ...                       coords={"y": [-1, 0, 1],
-        ...                               "x": ['a', 'b', 'c', 'd', 'e']}
-        ...                       )
-        >>> ds = xr.Dataset({'int': array1, 'float': array2})
-        >>> ds.max(dim='x')
+        >>> array1 = xr.DataArray(
+        ...     [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]}
+        ... )
+        >>> array2 = xr.DataArray(
+        ...     [
+        ...         [2.0, 1.0, 2.0, 0.0, -2.0],
+        ...         [-4.0, np.NaN, 2.0, np.NaN, -2.0],
+        ...         [np.NaN, np.NaN, 1.0, np.NaN, np.NaN],
+        ...     ],
+        ...     dims=["y", "x"],
+        ...     coords={"y": [-1, 0, 1], "x": ["a", "b", "c", "d", "e"]},
+        ... )
+        >>> ds = xr.Dataset({"int": array1, "float": array2})
+        >>> ds.max(dim="x")
         <xarray.Dataset>
         Dimensions:  (y: 3)
         Coordinates:
@@ -6268,7 +6341,7 @@ def idxmax(
         Data variables:
             int      int64 2
             float    (y) float64 2.0 2.0 1.0
-        >>> ds.argmax(dim='x')
+        >>> ds.argmax(dim="x")
         <xarray.Dataset>
         Dimensions:  (y: 3)
         Coordinates:
@@ -6276,7 +6349,7 @@ def idxmax(
         Data variables:
             int      int64 1
             float    (y) int64 0 2 2
-        >>> ds.idxmax(dim='x')
+        >>> ds.idxmax(dim="x")
         <xarray.Dataset>
         Dimensions:  (y: 3)
         Coordinates:
@@ -6292,8 +6365,134 @@ def idxmax(
                 skipna=skipna,
                 fill_value=fill_value,
                 keep_attrs=keep_attrs,
-            ),
+            )
         )
 
+    def argmin(self, dim=None, axis=None, **kwargs):
+        """Indices of the minima of the member variables.
+
+        If there are multiple minima, the indices of the first one found will be
+        returned.
+
+        Parameters
+        ----------
+        dim : str, optional
+            The dimension over which to find the minimum. By default, finds minimum over
+            all dimensions - for now returning an int for backward compatibility, but
+            this is deprecated, in future will be an error, since DataArray.argmin will
+            return a dict with indices for all dimensions, which does not make sense for
+            a Dataset.
+        axis : int, optional
+            Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments
+            can be supplied.
+        keep_attrs : bool, optional
+            If True, the attributes (`attrs`) will be copied from the original
+            object to the new one.  If False (default), the new object will be
+            returned without attributes.
+        skipna : bool, optional
+            If True, skip missing values (as marked by NaN). By default, only
+            skips missing values for float dtypes; other dtypes either do not
+            have a sentinel missing value (int) or skipna=True has not been
+            implemented (object, datetime64 or timedelta64).
+
+        Returns
+        -------
+        result : Dataset
+
+        See also
+        --------
+        DataArray.argmin
+
+       """
+        if dim is None and axis is None:
+            warnings.warn(
+                "Once the behaviour of DataArray.argmin() and Variable.argmin() with "
+                "neither dim nor axis argument changes to return a dict of indices of "
+                "each dimension, for consistency it will be an error to call "
+                "Dataset.argmin() with no argument, since we don't return a dict of "
+                "Datasets.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+        if (
+            dim is None
+            or axis is not None
+            or (not isinstance(dim, Sequence) and dim is not ...)
+            or isinstance(dim, str)
+        ):
+            # Return int index if single dimension is passed, and is not part of a
+            # sequence
+            argmin_func = getattr(duck_array_ops, "argmin")
+            return self.reduce(argmin_func, dim=dim, axis=axis, **kwargs)
+        else:
+            raise ValueError(
+                "When dim is a sequence or ..., DataArray.argmin() returns a dict. "
+                "dicts cannot be contained in a Dataset, so cannot call "
+                "Dataset.argmin() with a sequence or ... for dim"
+            )
+
+    def argmax(self, dim=None, axis=None, **kwargs):
+        """Indices of the maxima of the member variables.
+
+        If there are multiple maxima, the indices of the first one found will be
+        returned.
+
+        Parameters
+        ----------
+        dim : str, optional
+            The dimension over which to find the maximum. By default, finds maximum over
+            all dimensions - for now returning an int for backward compatibility, but
+            this is deprecated, in future will be an error, since DataArray.argmax will
+            return a dict with indices for all dimensions, which does not make sense for
+            a Dataset.
+        axis : int, optional
+            Axis over which to apply `argmax`. Only one of the 'dim' and 'axis' arguments
+            can be supplied.
+        keep_attrs : bool, optional
+            If True, the attributes (`attrs`) will be copied from the original
+            object to the new one.  If False (default), the new object will be
+            returned without attributes.
+        skipna : bool, optional
+            If True, skip missing values (as marked by NaN). By default, only
+            skips missing values for float dtypes; other dtypes either do not
+            have a sentinel missing value (int) or skipna=True has not been
+            implemented (object, datetime64 or timedelta64).
+
+        Returns
+        -------
+        result : Dataset
+
+        See also
+        --------
+        DataArray.argmax
+
+       """
+        if dim is None and axis is None:
+            warnings.warn(
+                "Once the behaviour of DataArray.argmax() and Variable.argmax() with "
+                "neither dim nor axis argument changes to return a dict of indices of "
+                "each dimension, for consistency it will be an error to call "
+                "Dataset.argmax() with no argument, since we don't return a dict of "
+                "Datasets.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+        if (
+            dim is None
+            or axis is not None
+            or (not isinstance(dim, Sequence) and dim is not ...)
+            or isinstance(dim, str)
+        ):
+            # Return int index if single dimension is passed, and is not part of a
+            # sequence
+            argmax_func = getattr(duck_array_ops, "argmax")
+            return self.reduce(argmax_func, dim=dim, axis=axis, **kwargs)
+        else:
+            raise ValueError(
+                "When dim is a sequence or ..., DataArray.argmin() returns a dict. "
+                "dicts cannot be contained in a Dataset, so cannot call "
+                "Dataset.argmin() with a sequence or ... for dim"
+            )
+
 
 ops.inject_all_ops_and_reduce_methods(Dataset, array_only=False)
diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
index 1340b456cf2..df579d23544 100644
--- a/xarray/core/duck_array_ops.py
+++ b/xarray/core/duck_array_ops.py
@@ -6,6 +6,7 @@
 import contextlib
 import inspect
 import warnings
+from distutils.version import LooseVersion
 from functools import partial
 
 import numpy as np
@@ -20,6 +21,14 @@
 except ImportError:
     dask_array = None  # type: ignore
 
+# TODO: remove after we stop supporting dask < 2.9.1
+try:
+    import dask
+
+    dask_version = dask.__version__
+except ImportError:
+    dask_version = None
+
 
 def _dask_or_eager_func(
     name,
@@ -199,8 +208,19 @@ def allclose_or_equiv(arr1, arr2, rtol=1e-5, atol=1e-8):
     """
     arr1 = asarray(arr1)
     arr2 = asarray(arr2)
+
     lazy_equiv = lazy_array_equiv(arr1, arr2)
     if lazy_equiv is None:
+        # TODO: remove after we require dask >= 2.9.1
+        sufficient_dask_version = (
+            dask_version is not None and LooseVersion(dask_version) >= "2.9.1"
+        )
+        if not sufficient_dask_version and any(
+            isinstance(arr, dask_array_type) for arr in [arr1, arr2]
+        ):
+            arr1 = np.array(arr1)
+            arr2 = np.array(arr2)
+
         return bool(isclose(arr1, arr2, rtol=rtol, atol=atol, equal_nan=True).all())
     else:
         return lazy_equiv
@@ -339,6 +359,7 @@ def f(values, axis=None, skipna=None, **kwargs):
 cumprod_1d.numeric_only = True
 cumsum_1d = _create_nan_agg_method("cumsum")
 cumsum_1d.numeric_only = True
+unravel_index = _dask_or_eager_func("unravel_index")
 
 
 _mean = _create_nan_agg_method("mean")
diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py
index 534d253ecc8..28eaae5f05b 100644
--- a/xarray/core/formatting.py
+++ b/xarray/core/formatting.py
@@ -3,7 +3,7 @@
 import contextlib
 import functools
 from datetime import datetime, timedelta
-from itertools import zip_longest
+from itertools import chain, zip_longest
 from typing import Hashable
 
 import numpy as np
@@ -140,7 +140,7 @@ def format_item(x, timedelta_format=None, quote_strings=True):
         return format_timedelta(x, timedelta_format=timedelta_format)
     elif isinstance(x, (str, bytes)):
         return repr(x) if quote_strings else x
-    elif isinstance(x, (float, np.float)):
+    elif isinstance(x, (float, np.float_)):
         return f"{x:.4}"
     else:
         return str(x)
@@ -298,12 +298,10 @@ def _summarize_coord_multiindex(coord, col_width, marker):
 
 def _summarize_coord_levels(coord, col_width, marker="-"):
     return "\n".join(
-        [
-            summarize_variable(
-                lname, coord.get_level_variable(lname), col_width, marker=marker
-            )
-            for lname in coord.level_names
-        ]
+        summarize_variable(
+            lname, coord.get_level_variable(lname), col_width, marker=marker
+        )
+        for lname in coord.level_names
     )
 
 
@@ -424,6 +422,17 @@ def set_numpy_options(*args, **kwargs):
         np.set_printoptions(**original)
 
 
+def limit_lines(string: str, *, limit: int):
+    """
+    If the string is more lines than the limit,
+    this returns the middle lines replaced by an ellipsis
+    """
+    lines = string.splitlines()
+    if len(lines) > limit:
+        string = "\n".join(chain(lines[: limit // 2], ["..."], lines[-limit // 2 :]))
+    return string
+
+
 def short_numpy_repr(array):
     array = np.asarray(array)
 
@@ -449,7 +458,7 @@ def short_data_repr(array):
     elif hasattr(internal_data, "__array_function__") or isinstance(
         internal_data, dask_array_type
     ):
-        return repr(array.data)
+        return limit_lines(repr(array.data), limit=40)
     elif array._in_memory or array.size < 1e5:
         return short_numpy_repr(array)
     else:
@@ -541,7 +550,10 @@ def extra_items_repr(extra_keys, mapping, ab_side):
     for k in a_keys & b_keys:
         try:
             # compare xarray variable
-            compatible = getattr(a_mapping[k], compat)(b_mapping[k])
+            if not callable(compat):
+                compatible = getattr(a_mapping[k], compat)(b_mapping[k])
+            else:
+                compatible = compat(a_mapping[k], b_mapping[k])
             is_variable = True
         except AttributeError:
             # compare attribute value
@@ -562,7 +574,7 @@ def extra_items_repr(extra_keys, mapping, ab_side):
 
                 for m in (a_mapping, b_mapping):
                     attr_s = "\n".join(
-                        [summarize_attr(ak, av) for ak, av in m[k].attrs.items()]
+                        summarize_attr(ak, av) for ak, av in m[k].attrs.items()
                     )
                     attrs_summary.append(attr_s)
 
@@ -598,8 +610,13 @@ def extra_items_repr(extra_keys, mapping, ab_side):
 
 
 def _compat_to_str(compat):
+    if callable(compat):
+        compat = compat.__name__
+
     if compat == "equals":
         return "equal"
+    elif compat == "allclose":
+        return "close"
     else:
         return compat
 
@@ -613,8 +630,12 @@ def diff_array_repr(a, b, compat):
     ]
 
     summary.append(diff_dim_summary(a, b))
+    if callable(compat):
+        equiv = compat
+    else:
+        equiv = array_equiv
 
-    if not array_equiv(a.data, b.data):
+    if not equiv(a.data, b.data):
         temp = [wrap_indent(short_numpy_repr(obj), start="    ") for obj in (a, b)]
         diff_data_repr = [
             ab_side + "\n" + ab_data_repr
diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py
index 8678a58b381..400ef61502e 100644
--- a/xarray/core/formatting_html.py
+++ b/xarray/core/formatting_html.py
@@ -20,7 +20,9 @@ def short_data_repr_html(array):
     internal_data = getattr(array, "variable", array)._data
     if hasattr(internal_data, "_repr_html_"):
         return internal_data._repr_html_()
-    return escape(short_data_repr(array))
+    else:
+        text = escape(short_data_repr(array))
+        return f"<pre>{text}</pre>"
 
 
 def format_dims(dims, coord_names):
@@ -123,7 +125,7 @@ def summarize_variable(name, var, is_index=False, dtype=None, preview=None):
         f"<label for='{data_id}' title='Show/Hide data repr'>"
         f"{data_icon}</label>"
         f"<div class='xr-var-attrs'>{attrs_ul}</div>"
-        f"<pre class='xr-var-data'>{data_repr}</pre>"
+        f"<div class='xr-var-data'>{data_repr}</div>"
     )
 
 
@@ -182,8 +184,9 @@ def dim_section(obj):
 def array_section(obj):
     # "unique" id to expand/collapse the section
     data_id = "section-" + str(uuid.uuid4())
-    collapsed = ""
-    preview = escape(inline_variable_array_repr(obj.variable, max_width=70))
+    collapsed = "checked"
+    variable = getattr(obj, "variable", obj)
+    preview = escape(inline_variable_array_repr(variable, max_width=70))
     data_repr = short_data_repr_html(obj)
     data_icon = _icon("icon-database")
 
@@ -192,7 +195,7 @@ def array_section(obj):
         f"<input id='{data_id}' class='xr-array-in' type='checkbox' {collapsed}>"
         f"<label for='{data_id}' title='Show/hide data repr'>{data_icon}</label>"
         f"<div class='xr-array-preview xr-preview'><span>{preview}</span></div>"
-        f"<pre class='xr-array-data'>{data_repr}</pre>"
+        f"<div class='xr-array-data'>{data_repr}</div>"
         "</div>"
     )
 
@@ -221,14 +224,20 @@ def array_section(obj):
 )
 
 
-def _obj_repr(header_components, sections):
+def _obj_repr(obj, header_components, sections):
+    """Return HTML repr of an xarray object.
+
+    If CSS is not injected (untrusted notebook), fallback to the plain text repr.
+
+    """
     header = f"<div class='xr-header'>{''.join(h for h in header_components)}</div>"
     sections = "".join(f"<li class='xr-section-item'>{s}</li>" for s in sections)
 
     return (
         "<div>"
         f"{ICONS_SVG}<style>{CSS_STYLE}</style>"
-        "<div class='xr-wrap'>"
+        f"<pre class='xr-text-repr-fallback'>{escape(repr(obj))}</pre>"
+        "<div class='xr-wrap' hidden>"
         f"{header}"
         f"<ul class='xr-sections'>{sections}</ul>"
         "</div>"
@@ -256,7 +265,7 @@ def array_repr(arr):
 
     sections.append(attr_section(arr.attrs))
 
-    return _obj_repr(header_components, sections)
+    return _obj_repr(arr, header_components, sections)
 
 
 def dataset_repr(ds):
@@ -271,4 +280,4 @@ def dataset_repr(ds):
         attr_section(ds.attrs),
     ]
 
-    return _obj_repr(header_components, sections)
+    return _obj_repr(ds, header_components, sections)
diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index 5a5f4c0d296..04c0fabae6a 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -29,7 +29,7 @@ def check_reduce_dims(reduce_dims, dimensions):
     if reduce_dims is not ...:
         if is_scalar(reduce_dims):
             reduce_dims = [reduce_dims]
-        if any([dim not in dimensions for dim in reduce_dims]):
+        if any(dim not in dimensions for dim in reduce_dims):
             raise ValueError(
                 "cannot reduce over dimensions %r. expected either '...' to reduce over all dimensions or one or more of %r."
                 % (reduce_dims, dimensions)
@@ -272,8 +272,8 @@ def __init__(
         squeeze=False,
         grouper=None,
         bins=None,
-        restore_coord_dims=None,
-        cut_kwargs={},
+        restore_coord_dims=True,
+        cut_kwargs=None,
     ):
         """Create a GroupBy object
 
@@ -292,13 +292,15 @@ def __init__(
         bins : array-like, optional
             If `bins` is specified, the groups will be discretized into the
             specified bins by `pandas.cut`.
-        restore_coord_dims : bool, optional
+        restore_coord_dims : bool, default True
             If True, also restore the dimension order of multi-dimensional
             coordinates.
         cut_kwargs : dict, optional
             Extra keyword arguments to pass to `pandas.cut`
 
         """
+        if cut_kwargs is None:
+            cut_kwargs = {}
         from .dataarray import DataArray
 
         if grouper is not None and bins is not None:
@@ -319,7 +321,7 @@ def __init__(
                 group = _DummyGroup(obj, group.name, group.coords)
 
         if getattr(group, "name", None) is None:
-            raise ValueError("`group` must have a name")
+            group.name = "group"
 
         group, obj, stacked_dim, inserted_dims = _ensure_1d(group, obj)
         (group_dim,) = group.dims
@@ -387,21 +389,6 @@ def __init__(
                     "Failed to group data. Are you grouping by a variable that is all NaN?"
                 )
 
-        if (
-            isinstance(obj, DataArray)
-            and restore_coord_dims is None
-            and any(obj[c].ndim > 1 for c in obj.coords)
-        ):
-            warnings.warn(
-                "This DataArray contains multi-dimensional "
-                "coordinates. In the future, the dimension order "
-                "of these coordinates will be restored as well "
-                "unless you specify restore_coord_dims=False.",
-                FutureWarning,
-                stacklevel=2,
-            )
-            restore_coord_dims = False
-
         # specification for the groupby operation
         self._obj = obj
         self._group = group
diff --git a/xarray/core/merge.py b/xarray/core/merge.py
index fea94246471..35b77d700a0 100644
--- a/xarray/core/merge.py
+++ b/xarray/core/merge.py
@@ -841,7 +841,7 @@ def merge(
     from .dataarray import DataArray
     from .dataset import Dataset
 
-    dict_like_objects = list()
+    dict_like_objects = []
     for obj in objects:
         if not isinstance(obj, (DataArray, Dataset, dict)):
             raise TypeError(
diff --git a/xarray/core/missing.py b/xarray/core/missing.py
index f973b4a5468..59d4f777c73 100644
--- a/xarray/core/missing.py
+++ b/xarray/core/missing.py
@@ -208,7 +208,9 @@ def _apply_over_vars_with_dim(func, self, dim=None, **kwargs):
     return ds
 
 
-def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] = True):
+def get_clean_interp_index(
+    arr, dim: Hashable, use_coordinate: Union[str, bool] = True, strict: bool = True
+):
     """Return index to use for x values in interpolation or curve fitting.
 
     Parameters
@@ -221,6 +223,8 @@ def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool]
       If use_coordinate is True, the coordinate that shares the name of the
       dimension along which interpolation is being performed will be used as the
       x values. If False, the x values are set as an equally spaced sequence.
+    strict : bool
+      Whether to raise errors if the index is either non-unique or non-monotonic (default).
 
     Returns
     -------
@@ -257,11 +261,12 @@ def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool]
     if isinstance(index, pd.MultiIndex):
         index.name = dim
 
-    if not index.is_monotonic:
-        raise ValueError(f"Index {index.name!r} must be monotonically increasing")
+    if strict:
+        if not index.is_monotonic:
+            raise ValueError(f"Index {index.name!r} must be monotonically increasing")
 
-    if not index.is_unique:
-        raise ValueError(f"Index {index.name!r} has duplicate values")
+        if not index.is_unique:
+            raise ValueError(f"Index {index.name!r} has duplicate values")
 
     # Special case for non-standard calendar indexes
     # Numerical datetime values are defined with respect to 1970-01-01T00:00:00 in units of nanoseconds
@@ -282,7 +287,7 @@ def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool]
         # xarray/numpy raise a ValueError
         raise TypeError(
             f"Index {index.name!r} must be castable to float64 to support "
-            f"interpolation, got {type(index).__name__}."
+            f"interpolation or curve fitting, got {type(index).__name__}."
         )
 
     return index
@@ -619,6 +624,19 @@ def interp(var, indexes_coords, method, **kwargs):
     # default behavior
     kwargs["bounds_error"] = kwargs.get("bounds_error", False)
 
+    # check if the interpolation can be done in orthogonal manner
+    if (
+        len(indexes_coords) > 1
+        and method in ["linear", "nearest"]
+        and all(dest[1].ndim == 1 for dest in indexes_coords.values())
+        and len(set([d[1].dims[0] for d in indexes_coords.values()]))
+        == len(indexes_coords)
+    ):
+        # interpolate sequentially
+        for dim, dest in indexes_coords.items():
+            var = interp(var, {dim: dest}, method, **kwargs)
+        return var
+
     # target dimensions
     dims = list(indexes_coords)
     x, new_x = zip(*[indexes_coords[d] for d in dims])
@@ -659,7 +677,7 @@ def interp_func(var, x, new_x, method, kwargs):
         New coordinates. Should not contain NaN.
     method: string
         {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'} for
-        1-dimensional itnterpolation.
+        1-dimensional interpolation.
         {'linear', 'nearest'} for multidimensional interpolation
     **kwargs:
         Optional keyword arguments to be passed to scipy.interpolator
diff --git a/xarray/core/ops.py b/xarray/core/ops.py
index b789f93b4f1..d4aeea37aad 100644
--- a/xarray/core/ops.py
+++ b/xarray/core/ops.py
@@ -47,8 +47,6 @@
 # methods which remove an axis
 REDUCE_METHODS = ["all", "any"]
 NAN_REDUCE_METHODS = [
-    "argmax",
-    "argmin",
     "max",
     "min",
     "mean",
diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py
index 6f1668f698f..86044e72dd2 100644
--- a/xarray/core/parallel.py
+++ b/xarray/core/parallel.py
@@ -16,6 +16,8 @@
     DefaultDict,
     Dict,
     Hashable,
+    Iterable,
+    List,
     Mapping,
     Sequence,
     Tuple,
@@ -25,12 +27,50 @@
 
 import numpy as np
 
+from .alignment import align
 from .dataarray import DataArray
 from .dataset import Dataset
 
 T_DSorDA = TypeVar("T_DSorDA", DataArray, Dataset)
 
 
+def unzip(iterable):
+    return zip(*iterable)
+
+
+def assert_chunks_compatible(a: Dataset, b: Dataset):
+    a = a.unify_chunks()
+    b = b.unify_chunks()
+
+    for dim in set(a.chunks).intersection(set(b.chunks)):
+        if a.chunks[dim] != b.chunks[dim]:
+            raise ValueError(f"Chunk sizes along dimension {dim!r} are not equal.")
+
+
+def check_result_variables(
+    result: Union[DataArray, Dataset], expected: Mapping[str, Any], kind: str
+):
+
+    if kind == "coords":
+        nice_str = "coordinate"
+    elif kind == "data_vars":
+        nice_str = "data"
+
+    # check that coords and data variables are as expected
+    missing = expected[kind] - set(getattr(result, kind))
+    if missing:
+        raise ValueError(
+            "Result from applying user function does not contain "
+            f"{nice_str} variables {missing}."
+        )
+    extra = set(getattr(result, kind)) - expected[kind]
+    if extra:
+        raise ValueError(
+            "Result from applying user function has unexpected "
+            f"{nice_str} variables {extra}."
+        )
+
+
 def dataset_to_dataarray(obj: Dataset) -> DataArray:
     if not isinstance(obj, Dataset):
         raise TypeError("Expected Dataset, got %s" % type(obj))
@@ -43,6 +83,17 @@ def dataset_to_dataarray(obj: Dataset) -> DataArray:
     return next(iter(obj.data_vars.values()))
 
 
+def dataarray_to_dataset(obj: DataArray) -> Dataset:
+    # only using _to_temp_dataset would break
+    # func = lambda x: x.to_dataset()
+    # since that relies on preserving name.
+    if obj.name is None:
+        dataset = obj._to_temp_dataset()
+    else:
+        dataset = obj.to_dataset()
+    return dataset
+
+
 def make_meta(obj):
     """If obj is a DataArray or Dataset, return a new object of the same type and with
     the same variables and dtypes, but where all variables have size 0 and numpy
@@ -80,7 +131,8 @@ def infer_template(
         template = func(*meta_args, **kwargs)
     except Exception as e:
         raise Exception(
-            "Cannot infer object returned from running user provided function."
+            "Cannot infer object returned from running user provided function. "
+            "Please supply the 'template' kwarg to map_blocks."
         ) from e
 
     if not isinstance(template, (Dataset, DataArray)):
@@ -102,39 +154,54 @@ def make_dict(x: Union[DataArray, Dataset]) -> Dict[Hashable, Any]:
     return {k: v.data for k, v in x.variables.items()}
 
 
+def _get_chunk_slicer(dim: Hashable, chunk_index: Mapping, chunk_bounds: Mapping):
+    if dim in chunk_index:
+        which_chunk = chunk_index[dim]
+        return slice(chunk_bounds[dim][which_chunk], chunk_bounds[dim][which_chunk + 1])
+    return slice(None)
+
+
 def map_blocks(
     func: Callable[..., T_DSorDA],
     obj: Union[DataArray, Dataset],
     args: Sequence[Any] = (),
     kwargs: Mapping[str, Any] = None,
+    template: Union[DataArray, Dataset] = None,
 ) -> T_DSorDA:
-    """Apply a function to each chunk of a DataArray or Dataset. This function is
-    experimental and its signature may change.
+    """Apply a function to each block of a DataArray or Dataset.
+
+    .. warning::
+        This function is experimental and its signature may change.
 
     Parameters
     ----------
     func: callable
         User-provided function that accepts a DataArray or Dataset as its first
-        parameter. The function will receive a subset of 'obj' (see below),
+        parameter ``obj``. The function will receive a subset or 'block' of ``obj`` (see below),
         corresponding to one chunk along each chunked dimension. ``func`` will be
-        executed as ``func(obj_subset, *args, **kwargs)``.
-
-        The function will be first run on mocked-up data, that looks like 'obj' but
-        has sizes 0, to determine properties of the returned object such as dtype,
-        variable names, new dimensions and new indexes (if any).
+        executed as ``func(subset_obj, *subset_args, **kwargs)``.
 
         This function must return either a single DataArray or a single Dataset.
 
-        This function cannot change size of existing dimensions, or add new chunked
-        dimensions.
+        This function cannot add a new chunked dimension.
+
     obj: DataArray, Dataset
-        Passed to the function as its first argument, one dask chunk at a time.
+        Passed to the function as its first argument, one block at a time.
     args: Sequence
-        Passed verbatim to func after unpacking, after the sliced obj. xarray objects,
-        if any, will not be split by chunks. Passing dask collections is not allowed.
+        Passed to func after unpacking and subsetting any xarray objects by blocks.
+        xarray objects in args must be aligned with obj, otherwise an error is raised.
     kwargs: Mapping
         Passed verbatim to func after unpacking. xarray objects, if any, will not be
-        split by chunks. Passing dask collections is not allowed.
+        subset to blocks. Passing dask collections in kwargs is not allowed.
+    template: (optional) DataArray, Dataset
+        xarray object representing the final result after compute is called. If not provided,
+        the function will be first run on mocked-up data, that looks like ``obj`` but
+        has sizes 0, to determine properties of the returned object such as dtype,
+        variable names, attributes, new dimensions and new indexes (if any).
+        ``template`` must be provided if the function changes the size of existing dimensions.
+        When provided, ``attrs`` on variables in `template` are copied over to the result. Any
+        ``attrs`` set by ``func`` will be ignored.
+
 
     Returns
     -------
@@ -143,11 +210,11 @@ def map_blocks(
 
     Notes
     -----
-    This function is designed for when one needs to manipulate a whole xarray object
-    within each chunk. In the more common case where one can work on numpy arrays, it is
-    recommended to use apply_ufunc.
+    This function is designed for when ``func`` needs to manipulate a whole xarray object
+    subset to each block. In the more common case where ``func`` can work on numpy arrays, it is
+    recommended to use ``apply_ufunc``.
 
-    If none of the variables in obj is backed by dask, calling this function is
+    If none of the variables in ``obj`` is backed by dask arrays, calling this function is
     equivalent to calling ``func(obj, *args, **kwargs)``.
 
     See Also
@@ -163,10 +230,6 @@ def map_blocks(
     its indices, and its methods like ``.groupby()``.
 
     >>> def calculate_anomaly(da, groupby_type="time.month"):
-    ...     # Necessary workaround to xarray's check with zero dimensions
-    ...     # https://github.com/pydata/xarray/issues/3575
-    ...     if sum(da.shape) == 0:
-    ...         return da
     ...     gb = da.groupby(groupby_type)
     ...     clim = gb.mean(dim="time")
     ...     return gb - clim
@@ -175,7 +238,7 @@ def map_blocks(
     >>> array = xr.DataArray(
     ...     np.random.rand(len(time)), dims="time", coords=[time]
     ... ).chunk()
-    >>> xr.map_blocks(calculate_anomaly, array).compute()
+    >>> xr.map_blocks(calculate_anomaly, array, template=array).compute()
     <xarray.DataArray (time: 24)>
     array([ 0.12894847,  0.11323072, -0.0855964 , -0.09334032,  0.26848862,
             0.12382735,  0.22460641,  0.07650108, -0.07673453, -0.22865714,
@@ -189,7 +252,10 @@ def map_blocks(
     to the function being applied in ``xr.map_blocks()``:
 
     >>> xr.map_blocks(
-    ...     calculate_anomaly, array, kwargs={"groupby_type": "time.year"},
+    ...     calculate_anomaly,
+    ...     array,
+    ...     kwargs={"groupby_type": "time.year"},
+    ...     template=array,
     ... )
     <xarray.DataArray (time: 24)>
     array([ 0.15361741, -0.25671244, -0.31600032,  0.008463  ,  0.1766172 ,
@@ -201,22 +267,57 @@ def map_blocks(
         * time     (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00
     """
 
-    def _wrapper(func, obj, to_array, args, kwargs):
-        if to_array:
-            obj = dataset_to_dataarray(obj)
-
-        result = func(obj, *args, **kwargs)
+    def _wrapper(
+        func: Callable,
+        args: List,
+        kwargs: dict,
+        arg_is_array: Iterable[bool],
+        expected: dict,
+    ):
+        """
+        Wrapper function that receives datasets in args; converts to dataarrays when necessary;
+        passes these to the user function `func` and checks returned objects for expected shapes/sizes/etc.
+        """
+
+        converted_args = [
+            dataset_to_dataarray(arg) if is_array else arg
+            for is_array, arg in zip(arg_is_array, args)
+        ]
+
+        result = func(*converted_args, **kwargs)
+
+        # check all dims are present
+        missing_dimensions = set(expected["shapes"]) - set(result.sizes)
+        if missing_dimensions:
+            raise ValueError(
+                f"Dimensions {missing_dimensions} missing on returned object."
+            )
 
+        # check that index lengths and values are as expected
         for name, index in result.indexes.items():
-            if name in obj.indexes:
-                if len(index) != len(obj.indexes[name]):
+            if name in expected["shapes"]:
+                if len(index) != expected["shapes"][name]:
+                    raise ValueError(
+                        f"Received dimension {name!r} of length {len(index)}. Expected length {expected['shapes'][name]}."
+                    )
+            if name in expected["indexes"]:
+                expected_index = expected["indexes"][name]
+                if not index.equals(expected_index):
                     raise ValueError(
-                        "Length of the %r dimension has changed. This is not allowed."
-                        % name
+                        f"Expected index {name!r} to be {expected_index!r}. Received {index!r} instead."
                     )
 
+        # check that all expected variables were returned
+        check_result_variables(result, expected, "coords")
+        if isinstance(result, Dataset):
+            check_result_variables(result, expected, "data_vars")
+
         return make_dict(result)
 
+    if template is not None and not isinstance(template, (DataArray, Dataset)):
+        raise TypeError(
+            f"template must be a DataArray or Dataset. Received {type(template).__name__} instead."
+        )
     if not isinstance(args, Sequence):
         raise TypeError("args must be a sequence (for example, a list or tuple).")
     if kwargs is None:
@@ -224,32 +325,76 @@ def _wrapper(func, obj, to_array, args, kwargs):
     elif not isinstance(kwargs, Mapping):
         raise TypeError("kwargs must be a mapping (for example, a dict)")
 
-    for value in list(args) + list(kwargs.values()):
+    for value in kwargs.values():
         if dask.is_dask_collection(value):
             raise TypeError(
-                "Cannot pass dask collections in args or kwargs yet. Please compute or "
+                "Cannot pass dask collections in kwargs yet. Please compute or "
                 "load values before passing to map_blocks."
             )
 
     if not dask.is_dask_collection(obj):
         return func(obj, *args, **kwargs)
 
-    if isinstance(obj, DataArray):
-        # only using _to_temp_dataset would break
-        # func = lambda x: x.to_dataset()
-        # since that relies on preserving name.
-        if obj.name is None:
-            dataset = obj._to_temp_dataset()
-        else:
-            dataset = obj.to_dataset()
-        input_is_array = True
-    else:
-        dataset = obj
-        input_is_array = False
+    all_args = [obj] + list(args)
+    is_xarray = [isinstance(arg, (Dataset, DataArray)) for arg in all_args]
+    is_array = [isinstance(arg, DataArray) for arg in all_args]
 
-    input_chunks = dataset.chunks
+    # there should be a better way to group this. partition?
+    xarray_indices, xarray_objs = unzip(
+        (index, arg) for index, arg in enumerate(all_args) if is_xarray[index]
+    )
+    others = [
+        (index, arg) for index, arg in enumerate(all_args) if not is_xarray[index]
+    ]
+
+    # all xarray objects must be aligned. This is consistent with apply_ufunc.
+    aligned = align(*xarray_objs, join="exact")
+    xarray_objs = tuple(
+        dataarray_to_dataset(arg) if is_da else arg
+        for is_da, arg in zip(is_array, aligned)
+    )
+
+    _, npargs = unzip(
+        sorted(list(zip(xarray_indices, xarray_objs)) + others, key=lambda x: x[0])
+    )
+
+    # check that chunk sizes are compatible
+    input_chunks = dict(npargs[0].chunks)
+    input_indexes = dict(npargs[0].indexes)
+    for arg in xarray_objs[1:]:
+        assert_chunks_compatible(npargs[0], arg)
+        input_chunks.update(arg.chunks)
+        input_indexes.update(arg.indexes)
+
+    if template is None:
+        # infer template by providing zero-shaped arrays
+        template = infer_template(func, aligned[0], *args, **kwargs)
+        template_indexes = set(template.indexes)
+        preserved_indexes = template_indexes & set(input_indexes)
+        new_indexes = template_indexes - set(input_indexes)
+        indexes = {dim: input_indexes[dim] for dim in preserved_indexes}
+        indexes.update({k: template.indexes[k] for k in new_indexes})
+        output_chunks = {
+            dim: input_chunks[dim] for dim in template.dims if dim in input_chunks
+        }
+
+    else:
+        # template xarray object has been provided with proper sizes and chunk shapes
+        indexes = dict(template.indexes)
+        if isinstance(template, DataArray):
+            output_chunks = dict(zip(template.dims, template.chunks))  # type: ignore
+        else:
+            output_chunks = dict(template.chunks)
+
+    for dim in output_chunks:
+        if dim in input_chunks and len(input_chunks[dim]) != len(output_chunks[dim]):
+            raise ValueError(
+                "map_blocks requires that one block of the input maps to one block of output. "
+                f"Expected number of output chunks along dimension {dim!r} to be {len(input_chunks[dim])}. "
+                f"Received {len(output_chunks[dim])} instead. Please provide template if not provided, or "
+                "fix the provided template."
+            )
 
-    template: Union[DataArray, Dataset] = infer_template(func, obj, *args, **kwargs)
     if isinstance(template, DataArray):
         result_is_array = True
         template_name = template.name
@@ -261,13 +406,6 @@ def _wrapper(func, obj, to_array, args, kwargs):
             f"func output must be DataArray or Dataset; got {type(template)}"
         )
 
-    template_indexes = set(template.indexes)
-    dataset_indexes = set(dataset.indexes)
-    preserved_indexes = template_indexes & dataset_indexes
-    new_indexes = template_indexes - dataset_indexes
-    indexes = {dim: dataset.indexes[dim] for dim in preserved_indexes}
-    indexes.update({k: template.indexes[k] for k in new_indexes})
-
     # We're building a new HighLevelGraph hlg. We'll have one new layer
     # for each variable in the dataset, which is the result of the
     # func applied to the values.
@@ -275,19 +413,27 @@ def _wrapper(func, obj, to_array, args, kwargs):
     graph: Dict[Any, Any] = {}
     new_layers: DefaultDict[str, Dict[Any, Any]] = collections.defaultdict(dict)
     gname = "{}-{}".format(
-        dask.utils.funcname(func), dask.base.tokenize(dataset, args, kwargs)
+        dask.utils.funcname(func), dask.base.tokenize(npargs[0], args, kwargs)
     )
 
     # map dims to list of chunk indexes
     ichunk = {dim: range(len(chunks_v)) for dim, chunks_v in input_chunks.items()}
     # mapping from chunk index to slice bounds
-    chunk_index_bounds = {
+    input_chunk_bounds = {
         dim: np.cumsum((0,) + chunks_v) for dim, chunks_v in input_chunks.items()
     }
+    output_chunk_bounds = {
+        dim: np.cumsum((0,) + chunks_v) for dim, chunks_v in output_chunks.items()
+    }
 
-    # iterate over all possible chunk combinations
-    for v in itertools.product(*ichunk.values()):
-        chunk_index_dict = dict(zip(dataset.dims, v))
+    def subset_dataset_to_block(
+        graph: dict, gname: str, dataset: Dataset, input_chunk_bounds, chunk_index
+    ):
+        """
+        Creates a task that subsets an xarray dataset to a block determined by chunk_index.
+        Block extents are determined by input_chunk_bounds.
+        Also subtasks that subset the constituent variables of a dataset.
+        """
 
         # this will become [[name1, variable1],
         #                   [name2, variable2],
@@ -296,35 +442,31 @@ def _wrapper(func, obj, to_array, args, kwargs):
         data_vars = []
         coords = []
 
+        chunk_tuple = tuple(chunk_index.values())
         for name, variable in dataset.variables.items():
             # make a task that creates tuple of (dims, chunk)
             if dask.is_dask_collection(variable.data):
                 # recursively index into dask_keys nested list to get chunk
                 chunk = variable.__dask_keys__()
                 for dim in variable.dims:
-                    chunk = chunk[chunk_index_dict[dim]]
+                    chunk = chunk[chunk_index[dim]]
 
-                chunk_variable_task = (f"{gname}-{chunk[0]}",) + v
+                chunk_variable_task = (f"{gname}-{name}-{chunk[0]}",) + chunk_tuple
                 graph[chunk_variable_task] = (
                     tuple,
                     [variable.dims, chunk, variable.attrs],
                 )
             else:
-                # non-dask array with possibly chunked dimensions
+                # non-dask array possibly with dimensions chunked on other variables
                 # index into variable appropriately
-                subsetter = {}
-                for dim in variable.dims:
-                    if dim in chunk_index_dict:
-                        which_chunk = chunk_index_dict[dim]
-                        subsetter[dim] = slice(
-                            chunk_index_bounds[dim][which_chunk],
-                            chunk_index_bounds[dim][which_chunk + 1],
-                        )
-
+                subsetter = {
+                    dim: _get_chunk_slicer(dim, chunk_index, input_chunk_bounds)
+                    for dim in variable.dims
+                }
                 subset = variable.isel(subsetter)
                 chunk_variable_task = (
                     "{}-{}".format(gname, dask.base.tokenize(subset)),
-                ) + v
+                ) + chunk_tuple
                 graph[chunk_variable_task] = (
                     tuple,
                     [subset.dims, subset, subset.attrs],
@@ -336,15 +478,37 @@ def _wrapper(func, obj, to_array, args, kwargs):
             else:
                 data_vars.append([name, chunk_variable_task])
 
-        from_wrapper = (gname,) + v
-        graph[from_wrapper] = (
-            _wrapper,
-            func,
-            (Dataset, (dict, data_vars), (dict, coords), dataset.attrs),
-            input_is_array,
-            args,
-            kwargs,
-        )
+        return (Dataset, (dict, data_vars), (dict, coords), dataset.attrs)
+
+    # iterate over all possible chunk combinations
+    for chunk_tuple in itertools.product(*ichunk.values()):
+        # mapping from dimension name to chunk index
+        chunk_index = dict(zip(ichunk.keys(), chunk_tuple))
+
+        blocked_args = [
+            subset_dataset_to_block(graph, gname, arg, input_chunk_bounds, chunk_index)
+            if isxr
+            else arg
+            for isxr, arg in zip(is_xarray, npargs)
+        ]
+
+        # expected["shapes", "coords", "data_vars", "indexes"] are used to
+        # raise nice error messages in _wrapper
+        expected = {}
+        # input chunk 0 along a dimension maps to output chunk 0 along the same dimension
+        # even if length of dimension is changed by the applied function
+        expected["shapes"] = {
+            k: output_chunks[k][v] for k, v in chunk_index.items() if k in output_chunks
+        }
+        expected["data_vars"] = set(template.data_vars.keys())  # type: ignore
+        expected["coords"] = set(template.coords.keys())  # type: ignore
+        expected["indexes"] = {
+            dim: indexes[dim][_get_chunk_slicer(dim, chunk_index, output_chunk_bounds)]
+            for dim in indexes
+        }
+
+        from_wrapper = (gname,) + chunk_tuple
+        graph[from_wrapper] = (_wrapper, func, blocked_args, kwargs, is_array, expected)
 
         # mapping from variable name to dask graph key
         var_key_map: Dict[Hashable, str] = {}
@@ -356,10 +520,11 @@ def _wrapper(func, obj, to_array, args, kwargs):
 
             key: Tuple[Any, ...] = (gname_l,)
             for dim in variable.dims:
-                if dim in chunk_index_dict:
-                    key += (chunk_index_dict[dim],)
+                if dim in chunk_index:
+                    key += (chunk_index[dim],)
                 else:
                     # unchunked dimensions in the input have one chunk in the result
+                    # output can have new dimensions with exactly one chunk
                     key += (0,)
 
             # We're adding multiple new layers to the graph:
@@ -370,7 +535,11 @@ def _wrapper(func, obj, to_array, args, kwargs):
             # layer.
             new_layers[gname_l][key] = (operator.getitem, from_wrapper, name)
 
-    hlg = HighLevelGraph.from_collections(gname, graph, dependencies=[dataset])
+    hlg = HighLevelGraph.from_collections(
+        gname,
+        graph,
+        dependencies=[arg for arg in npargs if dask.is_dask_collection(arg)],
+    )
 
     for gname_l, layer in new_layers.items():
         # This adds in the getitems for each variable in the dataset.
@@ -378,12 +547,16 @@ def _wrapper(func, obj, to_array, args, kwargs):
         hlg.layers[gname_l] = layer
 
     result = Dataset(coords=indexes, attrs=template.attrs)
+    for index in result.indexes:
+        result[index].attrs = template[index].attrs
+        result[index].encoding = template[index].encoding
+
     for name, gname_l in var_key_map.items():
         dims = template[name].dims
         var_chunks = []
         for dim in dims:
-            if dim in input_chunks:
-                var_chunks.append(input_chunks[dim])
+            if dim in output_chunks:
+                var_chunks.append(output_chunks[dim])
             elif dim in indexes:
                 var_chunks.append((len(indexes[dim]),))
             elif dim in template.dims:
@@ -394,6 +567,7 @@ def _wrapper(func, obj, to_array, args, kwargs):
             hlg, name=gname_l, chunks=var_chunks, dtype=template[name].dtype
         )
         result[name] = (dims, data, template[name].attrs)
+        result[name].encoding = template[name].encoding
 
     result = result.set_coords(template._coord_names)
 
diff --git a/xarray/core/pdcompat.py b/xarray/core/pdcompat.py
index f2e4518e0dc..f2e22329fc8 100644
--- a/xarray/core/pdcompat.py
+++ b/xarray/core/pdcompat.py
@@ -55,4 +55,4 @@ def count_not_none(*args) -> int:
 
     Copied from pandas.core.common.count_not_none (not part of the public API)
     """
-    return sum([arg is not None for arg in args])
+    return sum(arg is not None for arg in args)
diff --git a/xarray/core/utils.py b/xarray/core/utils.py
index 1126cf3037f..0542f850b02 100644
--- a/xarray/core/utils.py
+++ b/xarray/core/utils.py
@@ -787,6 +787,24 @@ def drop_dims_from_indexers(
         )
 
 
+class UncachedAccessor:
+    """ Acts like a property, but on both classes and class instances
+
+    This class is necessary because some tools (e.g. pydoc and sphinx)
+    inspect classes for which property returns itself and not the
+    accessor.
+    """
+
+    def __init__(self, accessor):
+        self._accessor = accessor
+
+    def __get__(self, obj, cls):
+        if obj is None:
+            return self._accessor
+
+        return self._accessor(obj)
+
+
 # Singleton type, as per https://github.com/python/typing/pull/240
 class Default(Enum):
     token = 0
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index 68e823ca426..c505c749557 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -6,7 +6,17 @@
 from collections import defaultdict
 from datetime import timedelta
 from distutils.version import LooseVersion
-from typing import Any, Dict, Hashable, Mapping, Tuple, TypeVar, Union
+from typing import (
+    Any,
+    Dict,
+    Hashable,
+    Mapping,
+    Optional,
+    Sequence,
+    Tuple,
+    TypeVar,
+    Union,
+)
 
 import numpy as np
 import pandas as pd
@@ -2069,6 +2079,166 @@ def _to_numeric(self, offset=None, datetime_unit=None, dtype=float):
         )
         return type(self)(self.dims, numeric_array, self._attrs)
 
+    def _unravel_argminmax(
+        self,
+        argminmax: str,
+        dim: Union[Hashable, Sequence[Hashable], None],
+        axis: Union[int, None],
+        keep_attrs: Optional[bool],
+        skipna: Optional[bool],
+    ) -> Union["Variable", Dict[Hashable, "Variable"]]:
+        """Apply argmin or argmax over one or more dimensions, returning the result as a
+        dict of DataArray that can be passed directly to isel.
+        """
+        if dim is None and axis is None:
+            warnings.warn(
+                "Behaviour of argmin/argmax with neither dim nor axis argument will "
+                "change to return a dict of indices of each dimension. To get a "
+                "single, flat index, please use np.argmin(da.data) or "
+                "np.argmax(da.data) instead of da.argmin() or da.argmax().",
+                DeprecationWarning,
+                stacklevel=3,
+            )
+
+        argminmax_func = getattr(duck_array_ops, argminmax)
+
+        if dim is ...:
+            # In future, should do this also when (dim is None and axis is None)
+            dim = self.dims
+        if (
+            dim is None
+            or axis is not None
+            or not isinstance(dim, Sequence)
+            or isinstance(dim, str)
+        ):
+            # Return int index if single dimension is passed, and is not part of a
+            # sequence
+            return self.reduce(
+                argminmax_func, dim=dim, axis=axis, keep_attrs=keep_attrs, skipna=skipna
+            )
+
+        # Get a name for the new dimension that does not conflict with any existing
+        # dimension
+        newdimname = "_unravel_argminmax_dim_0"
+        count = 1
+        while newdimname in self.dims:
+            newdimname = "_unravel_argminmax_dim_{}".format(count)
+            count += 1
+
+        stacked = self.stack({newdimname: dim})
+
+        result_dims = stacked.dims[:-1]
+        reduce_shape = tuple(self.sizes[d] for d in dim)
+
+        result_flat_indices = stacked.reduce(argminmax_func, axis=-1, skipna=skipna)
+
+        result_unravelled_indices = duck_array_ops.unravel_index(
+            result_flat_indices.data, reduce_shape
+        )
+
+        result = {
+            d: Variable(dims=result_dims, data=i)
+            for d, i in zip(dim, result_unravelled_indices)
+        }
+
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
+        if keep_attrs:
+            for v in result.values():
+                v.attrs = self.attrs
+
+        return result
+
+    def argmin(
+        self,
+        dim: Union[Hashable, Sequence[Hashable]] = None,
+        axis: int = None,
+        keep_attrs: bool = None,
+        skipna: bool = None,
+    ) -> Union["Variable", Dict[Hashable, "Variable"]]:
+        """Index or indices of the minimum of the Variable over one or more dimensions.
+        If a sequence is passed to 'dim', then result returned as dict of Variables,
+        which can be passed directly to isel(). If a single str is passed to 'dim' then
+        returns a Variable with dtype int.
+
+        If there are multiple minima, the indices of the first one found will be
+        returned.
+
+        Parameters
+        ----------
+        dim : hashable, sequence of hashable or ..., optional
+            The dimensions over which to find the minimum. By default, finds minimum over
+            all dimensions - for now returning an int for backward compatibility, but
+            this is deprecated, in future will return a dict with indices for all
+            dimensions; to return a dict with all dimensions now, pass '...'.
+        axis : int, optional
+            Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments
+            can be supplied.
+        keep_attrs : bool, optional
+            If True, the attributes (`attrs`) will be copied from the original
+            object to the new one.  If False (default), the new object will be
+            returned without attributes.
+        skipna : bool, optional
+            If True, skip missing values (as marked by NaN). By default, only
+            skips missing values for float dtypes; other dtypes either do not
+            have a sentinel missing value (int) or skipna=True has not been
+            implemented (object, datetime64 or timedelta64).
+
+        Returns
+        -------
+        result : Variable or dict of Variable
+
+        See also
+        --------
+        DataArray.argmin, DataArray.idxmin
+        """
+        return self._unravel_argminmax("argmin", dim, axis, keep_attrs, skipna)
+
+    def argmax(
+        self,
+        dim: Union[Hashable, Sequence[Hashable]] = None,
+        axis: int = None,
+        keep_attrs: bool = None,
+        skipna: bool = None,
+    ) -> Union["Variable", Dict[Hashable, "Variable"]]:
+        """Index or indices of the maximum of the Variable over one or more dimensions.
+        If a sequence is passed to 'dim', then result returned as dict of Variables,
+        which can be passed directly to isel(). If a single str is passed to 'dim' then
+        returns a Variable with dtype int.
+
+        If there are multiple maxima, the indices of the first one found will be
+        returned.
+
+        Parameters
+        ----------
+        dim : hashable, sequence of hashable or ..., optional
+            The dimensions over which to find the maximum. By default, finds maximum over
+            all dimensions - for now returning an int for backward compatibility, but
+            this is deprecated, in future will return a dict with indices for all
+            dimensions; to return a dict with all dimensions now, pass '...'.
+        axis : int, optional
+            Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments
+            can be supplied.
+        keep_attrs : bool, optional
+            If True, the attributes (`attrs`) will be copied from the original
+            object to the new one.  If False (default), the new object will be
+            returned without attributes.
+        skipna : bool, optional
+            If True, skip missing values (as marked by NaN). By default, only
+            skips missing values for float dtypes; other dtypes either do not
+            have a sentinel missing value (int) or skipna=True has not been
+            implemented (object, datetime64 or timedelta64).
+
+        Returns
+        -------
+        result : Variable or dict of Variable
+
+        See also
+        --------
+        DataArray.argmax, DataArray.idxmax
+        """
+        return self._unravel_argminmax("argmax", dim, axis, keep_attrs, skipna)
+
 
 ops.inject_all_ops_and_reduce_methods(Variable)
 
@@ -2412,7 +2582,7 @@ def assert_unique_multiindex_level_names(variables):
 
     duplicate_names = [v for v in level_names.values() if len(v) > 1]
     if duplicate_names:
-        conflict_str = "\n".join([", ".join(v) for v in duplicate_names])
+        conflict_str = "\n".join(", ".join(v) for v in duplicate_names)
         raise ValueError("conflicting MultiIndex level name(s):\n%s" % conflict_str)
     # Check confliction between level names and dimensions GH:2299
     for k, v in variables.items():
diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py
index 996d2e4c43e..fa143342c06 100644
--- a/xarray/core/weighted.py
+++ b/xarray/core/weighted.py
@@ -72,11 +72,11 @@ class Weighted:
     def __init__(self, obj: "DataArray", weights: "DataArray") -> None:
         ...
 
-    @overload  # noqa: F811
-    def __init__(self, obj: "Dataset", weights: "DataArray") -> None:  # noqa: F811
+    @overload
+    def __init__(self, obj: "Dataset", weights: "DataArray") -> None:
         ...
 
-    def __init__(self, obj, weights):  # noqa: F811
+    def __init__(self, obj, weights):
         """
         Create a Weighted object
 
@@ -142,7 +142,14 @@ def _sum_of_weights(
         # we need to mask data values that are nan; else the weights are wrong
         mask = da.notnull()
 
-        sum_of_weights = self._reduce(mask, self.weights, dim=dim, skipna=False)
+        # bool -> int, because ``xr.dot([True, True], [True, True])`` -> True
+        # (and not 2); GH4074
+        if self.weights.dtype == bool:
+            sum_of_weights = self._reduce(
+                mask, self.weights.astype(int), dim=dim, skipna=False
+            )
+        else:
+            sum_of_weights = self._reduce(mask, self.weights, dim=dim, skipna=False)
 
         # 0-weights are not valid
         valid_weights = sum_of_weights != 0.0
diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py
index 4657bee9415..e4a981daf8c 100644
--- a/xarray/plot/plot.py
+++ b/xarray/plot/plot.py
@@ -14,6 +14,7 @@
 from .facetgrid import _easy_facetgrid
 from .utils import (
     _add_colorbar,
+    _assert_valid_xy,
     _ensure_plottable,
     _infer_interval_breaks,
     _infer_xy_labels,
@@ -29,19 +30,17 @@
 
 
 def _infer_line_data(darray, x, y, hue):
-    error_msg = "must be either None or one of ({:s})".format(
-        ", ".join([repr(dd) for dd in darray.dims])
-    )
+
     ndims = len(darray.dims)
 
-    if x is not None and x not in darray.dims and x not in darray.coords:
-        raise ValueError("x " + error_msg)
+    if x is not None and y is not None:
+        raise ValueError("Cannot specify both x and y kwargs for line plots.")
 
-    if y is not None and y not in darray.dims and y not in darray.coords:
-        raise ValueError("y " + error_msg)
+    if x is not None:
+        _assert_valid_xy(darray, x, "x")
 
-    if x is not None and y is not None:
-        raise ValueError("You cannot specify both x and y kwargs" "for line plots.")
+    if y is not None:
+        _assert_valid_xy(darray, y, "y")
 
     if ndims == 1:
         huename = None
@@ -252,7 +251,7 @@ def line(
         Dimension or coordinate for which you want multiple lines plotted.
         If plotting against a 2D coordinate, ``hue`` must be a dimension.
     x, y : string, optional
-        Dimensions or coordinates for x, y axis.
+        Dimension, coordinate or MultiIndex level for x, y axis.
         Only one of these may be specified.
         The other coordinate plots values from the DataArray on which this
         plot method is called.
@@ -446,6 +445,11 @@ def __init__(self, darray):
     def __call__(self, **kwargs):
         return plot(self._da, **kwargs)
 
+    # we can't use functools.wraps here since that also modifies the name / qualname
+    __doc__ = __call__.__doc__ = plot.__doc__
+    __call__.__wrapped__ = plot  # type: ignore
+    __call__.__annotations__ = plot.__annotations__
+
     @functools.wraps(hist)
     def hist(self, ax=None, **kwargs):
         return hist(self._da, ax=ax, **kwargs)
diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py
index c3512828888..e5c1fa89333 100644
--- a/xarray/plot/utils.py
+++ b/xarray/plot/utils.py
@@ -268,7 +268,7 @@ def _determine_cmap_params(
             cmap = OPTIONS["cmap_sequential"]
 
     # Handle discrete levels
-    if levels is not None and norm is None:
+    if levels is not None:
         if is_scalar(levels):
             if user_minmax:
                 levels = np.linspace(vmin, vmax, levels)
@@ -360,7 +360,9 @@ def _infer_xy_labels(darray, x, y, imshow=False, rgb=None):
 
     darray must be a 2 dimensional data array, or 3d for imshow only.
     """
-    assert x is None or x != y
+    if (x is not None) and (x == y):
+        raise ValueError("x and y cannot be equal.")
+
     if imshow and darray.ndim == 3:
         return _infer_xy_labels_3d(darray, x, y, rgb)
 
@@ -369,18 +371,41 @@ def _infer_xy_labels(darray, x, y, imshow=False, rgb=None):
             raise ValueError("DataArray must be 2d")
         y, x = darray.dims
     elif x is None:
-        if y not in darray.dims and y not in darray.coords:
-            raise ValueError("y must be a dimension name if x is not supplied")
+        _assert_valid_xy(darray, y, "y")
         x = darray.dims[0] if y == darray.dims[1] else darray.dims[1]
     elif y is None:
-        if x not in darray.dims and x not in darray.coords:
-            raise ValueError("x must be a dimension name if y is not supplied")
+        _assert_valid_xy(darray, x, "x")
         y = darray.dims[0] if x == darray.dims[1] else darray.dims[1]
-    elif any(k not in darray.coords and k not in darray.dims for k in (x, y)):
-        raise ValueError("x and y must be coordinate variables")
+    else:
+        _assert_valid_xy(darray, x, "x")
+        _assert_valid_xy(darray, y, "y")
+
+        if (
+            all(k in darray._level_coords for k in (x, y))
+            and darray._level_coords[x] == darray._level_coords[y]
+        ):
+            raise ValueError("x and y cannot be levels of the same MultiIndex")
+
     return x, y
 
 
+def _assert_valid_xy(darray, xy, name):
+    """
+    make sure x and y passed to plotting functions are valid
+    """
+
+    # MultiIndex cannot be plotted; no point in allowing them here
+    multiindex = set([darray._level_coords[lc] for lc in darray._level_coords])
+
+    valid_xy = (
+        set(darray.dims) | set(darray.coords) | set(darray._level_coords)
+    ) - multiindex
+
+    if xy not in valid_xy:
+        valid_xy_str = "', '".join(sorted(valid_xy))
+        raise ValueError(f"{name} must be one of None, '{valid_xy_str}'")
+
+
 def get_axis(figsize, size, aspect, ax):
     import matplotlib as mpl
     import matplotlib.pyplot as plt
diff --git a/xarray/static/css/style.css b/xarray/static/css/style.css
index 7e382de3b5b..39cd6d6755f 100644
--- a/xarray/static/css/style.css
+++ b/xarray/static/css/style.css
@@ -13,11 +13,29 @@
   --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);
 }
 
+html[theme=dark],
+body.vscode-dark {
+  --xr-font-color0: rgba(255, 255, 255, 1);
+  --xr-font-color2: rgba(255, 255, 255, 0.54);
+  --xr-font-color3: rgba(255, 255, 255, 0.38);
+  --xr-border-color: #1F1F1F;
+  --xr-disabled-color: #515151;
+  --xr-background-color: #111111;
+  --xr-background-color-row-even: #111111;
+  --xr-background-color-row-odd: #313131;
+}
+
 .xr-wrap {
+  display: block;
   min-width: 300px;
   max-width: 700px;
 }
 
+.xr-text-repr-fallback {
+  /* fallback to plain text repr when CSS is not injected (untrusted notebook) */
+  display: none;
+}
+
 .xr-header {
   padding-top: 6px;
   padding-bottom: 6px;
diff --git a/xarray/static/html/icons-svg-inline.html b/xarray/static/html/icons-svg-inline.html
index c44f89c4304..b0e837a26cd 100644
--- a/xarray/static/html/icons-svg-inline.html
+++ b/xarray/static/html/icons-svg-inline.html
@@ -1,13 +1,11 @@
 <svg style="position: absolute; width: 0; height: 0; overflow: hidden">
 <defs>
 <symbol id="icon-database" viewBox="0 0 32 32">
-<title>Show/Hide data repr</title>
 <path d="M16 0c-8.837 0-16 2.239-16 5v4c0 2.761 7.163 5 16 5s16-2.239 16-5v-4c0-2.761-7.163-5-16-5z"></path>
 <path d="M16 17c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z"></path>
 <path d="M16 26c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z"></path>
 </symbol>
 <symbol id="icon-file-text2" viewBox="0 0 32 32">
-<title>Show/Hide attributes</title>
 <path d="M28.681 7.159c-0.694-0.947-1.662-2.053-2.724-3.116s-2.169-2.030-3.116-2.724c-1.612-1.182-2.393-1.319-2.841-1.319h-15.5c-1.378 0-2.5 1.121-2.5 2.5v27c0 1.378 1.122 2.5 2.5 2.5h23c1.378 0 2.5-1.122 2.5-2.5v-19.5c0-0.448-0.137-1.23-1.319-2.841zM24.543 5.457c0.959 0.959 1.712 1.825 2.268 2.543h-4.811v-4.811c0.718 0.556 1.584 1.309 2.543 2.268zM28 29.5c0 0.271-0.229 0.5-0.5 0.5h-23c-0.271 0-0.5-0.229-0.5-0.5v-27c0-0.271 0.229-0.5 0.5-0.5 0 0 15.499-0 15.5 0v7c0 0.552 0.448 1 1 1h7v19.5z"></path>
 <path d="M23 26h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z"></path>
 <path d="M23 22h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z"></path>
diff --git a/xarray/testing.py b/xarray/testing.py
index ac189f7e023..9681503414e 100644
--- a/xarray/testing.py
+++ b/xarray/testing.py
@@ -1,21 +1,17 @@
 """Testing functions exposed to the user API"""
+import functools
 from typing import Hashable, Set, Union
 
 import numpy as np
 import pandas as pd
 
-from xarray.core import duck_array_ops, formatting
+from xarray.core import duck_array_ops, formatting, utils
 from xarray.core.dataarray import DataArray
 from xarray.core.dataset import Dataset
 from xarray.core.indexes import default_indexes
 from xarray.core.variable import IndexVariable, Variable
 
-__all__ = (
-    "assert_allclose",
-    "assert_chunks_equal",
-    "assert_equal",
-    "assert_identical",
-)
+__all__ = ("assert_allclose", "assert_chunks_equal", "assert_equal", "assert_identical")
 
 
 def _decode_string_data(data):
@@ -123,27 +119,31 @@ def assert_allclose(a, b, rtol=1e-05, atol=1e-08, decode_bytes=True):
     """
     __tracebackhide__ = True
     assert type(a) == type(b)
-    kwargs = dict(rtol=rtol, atol=atol, decode_bytes=decode_bytes)
+
+    equiv = functools.partial(
+        _data_allclose_or_equiv, rtol=rtol, atol=atol, decode_bytes=decode_bytes
+    )
+    equiv.__name__ = "allclose"
+
+    def compat_variable(a, b):
+        a = getattr(a, "variable", a)
+        b = getattr(b, "variable", b)
+
+        return a.dims == b.dims and (a._data is b._data or equiv(a.data, b.data))
+
     if isinstance(a, Variable):
-        assert a.dims == b.dims
-        allclose = _data_allclose_or_equiv(a.values, b.values, **kwargs)
-        assert allclose, f"{a.values}\n{b.values}"
+        allclose = compat_variable(a, b)
+        assert allclose, formatting.diff_array_repr(a, b, compat=equiv)
     elif isinstance(a, DataArray):
-        assert_allclose(a.variable, b.variable, **kwargs)
-        assert set(a.coords) == set(b.coords)
-        for v in a.coords.variables:
-            # can't recurse with this function as coord is sometimes a
-            # DataArray, so call into _data_allclose_or_equiv directly
-            allclose = _data_allclose_or_equiv(
-                a.coords[v].values, b.coords[v].values, **kwargs
-            )
-            assert allclose, "{}\n{}".format(a.coords[v].values, b.coords[v].values)
+        allclose = utils.dict_equiv(
+            a.coords, b.coords, compat=compat_variable
+        ) and compat_variable(a.variable, b.variable)
+        assert allclose, formatting.diff_array_repr(a, b, compat=equiv)
     elif isinstance(a, Dataset):
-        assert set(a.data_vars) == set(b.data_vars)
-        assert set(a.coords) == set(b.coords)
-        for k in list(a.variables) + list(a.coords):
-            assert_allclose(a[k], b[k], **kwargs)
-
+        allclose = a._coord_names == b._coord_names and utils.dict_equiv(
+            a.variables, b.variables, compat=compat_variable
+        )
+        assert allclose, formatting.diff_dataset_repr(a, b, compat=equiv)
     else:
         raise TypeError("{} not supported by assertion comparison".format(type(a)))
 
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 0af34612b08..b1fe5375723 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -30,6 +30,7 @@
     save_mfdataset,
 )
 from xarray.backends.common import robust_getitem
+from xarray.backends.netcdf3 import _nc3_dtype_coercions
 from xarray.backends.netCDF4_ import _extract_nc4_variable_encoding
 from xarray.backends.pydap_ import PydapDataStore
 from xarray.coding.variables import SerializationWarning
@@ -86,6 +87,7 @@
     dask_version = "10.0"
 
 ON_WINDOWS = sys.platform == "win32"
+default_value = object()
 
 
 def open_example_dataset(name, *args, **kwargs):
@@ -227,7 +229,27 @@ def __getitem__(self, key):
 
 
 class NetCDF3Only:
-    pass
+    netcdf3_formats = ("NETCDF3_CLASSIC", "NETCDF3_64BIT")
+
+    @requires_scipy
+    def test_dtype_coercion_error(self):
+        """Failing dtype coercion should lead to an error"""
+        for dtype, format in itertools.product(
+            _nc3_dtype_coercions, self.netcdf3_formats
+        ):
+            if dtype == "bool":
+                # coerced upcast (bool to int8) ==> can never fail
+                continue
+
+            # Using the largest representable value, create some data that will
+            # no longer compare equal after the coerced downcast
+            maxval = np.iinfo(dtype).max
+            x = np.array([0, 1, 2, maxval], dtype=dtype)
+            ds = Dataset({"x": ("t", x, {})})
+
+            with create_tmp_file(allow_cleanup_failure=False) as path:
+                with pytest.raises(ValueError, match="could not safely cast"):
+                    ds.to_netcdf(path, format=format)
 
 
 class DatasetIOBase:
@@ -296,9 +318,14 @@ def test_write_store(self):
     def check_dtypes_roundtripped(self, expected, actual):
         for k in expected.variables:
             expected_dtype = expected.variables[k].dtype
-            if isinstance(self, NetCDF3Only) and expected_dtype == "int64":
-                # downcast
-                expected_dtype = np.dtype("int32")
+
+            # For NetCDF3, the backend should perform dtype coercion
+            if (
+                isinstance(self, NetCDF3Only)
+                and str(expected_dtype) in _nc3_dtype_coercions
+            ):
+                expected_dtype = np.dtype(_nc3_dtype_coercions[str(expected_dtype)])
+
             actual_dtype = actual.variables[k].dtype
             # TODO: check expected behavior for string dtypes more carefully
             string_kinds = {"O", "S", "U"}
@@ -858,7 +885,7 @@ def test_roundtrip_endian(self):
                 "x": np.arange(3, 10, dtype=">i2"),
                 "y": np.arange(3, 20, dtype="<i4"),
                 "z": np.arange(3, 30, dtype="=i8"),
-                "w": ("x", np.arange(3, 10, dtype=np.float)),
+                "w": ("x", np.arange(3, 10, dtype=float)),
             }
         )
 
@@ -1526,12 +1553,6 @@ def roundtrip(
             with self.open(store_target, **open_kwargs) as ds:
                 yield ds
 
-    @contextlib.contextmanager
-    def roundtrip_append(
-        self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
-    ):
-        pytest.skip("zarr backend does not support appending")
-
     def test_roundtrip_consolidated(self):
         pytest.importorskip("zarr", minversion="2.2.1.dev2")
         expected = create_test_data()
@@ -1826,7 +1847,7 @@ def test_encoding_kwarg_fixed_width_string(self):
         # not relevant for zarr, since we don't use EncodedStringCoder
         pass
 
-    # TODO: someone who understand caching figure out whether chaching
+    # TODO: someone who understand caching figure out whether caching
     # makes sense for Zarr backend
     @pytest.mark.xfail(reason="Zarr caching not implemented")
     def test_dataset_caching(self):
@@ -1834,55 +1855,44 @@ def test_dataset_caching(self):
 
     @pytest.mark.skipif(LooseVersion(dask_version) < "2.4", reason="dask GH5334")
     def test_append_write(self):
-        ds, ds_to_append, _ = create_append_test_data()
-        with self.create_zarr_target() as store_target:
-            ds.to_zarr(store_target, mode="w")
-            ds_to_append.to_zarr(store_target, append_dim="time")
-            original = xr.concat([ds, ds_to_append], dim="time")
-            assert_identical(original, xr.open_dataset(store_target, chunks="auto"))
-
-    @pytest.mark.xfail(reason="Zarr stores can not be appended to")
-    def test_append_overwrite_values(self):
-        super().test_append_overwrite_values()
+        super().test_append_write()
 
     def test_append_with_invalid_dim_raises(self):
-
         ds, ds_to_append, _ = create_append_test_data()
-
-        # check failure when append_dim not valid
-        with pytest.raises(ValueError):
-            with self.create_zarr_target() as store_target:
-                ds.to_zarr(store_target, mode="w")
+        with self.create_zarr_target() as store_target:
+            ds.to_zarr(store_target, mode="w")
+            with pytest.raises(
+                ValueError, match="does not match any existing dataset dimensions"
+            ):
                 ds_to_append.to_zarr(store_target, append_dim="notvalid")
 
-    def test_append_with_append_dim_not_set_raises(self):
+    def test_append_with_no_dims_raises(self):
+        with self.create_zarr_target() as store_target:
+            Dataset({"foo": ("x", [1])}).to_zarr(store_target, mode="w")
+            with pytest.raises(ValueError, match="different dimension names"):
+                Dataset({"foo": ("y", [2])}).to_zarr(store_target, mode="a")
 
+    def test_append_with_append_dim_not_set_raises(self):
         ds, ds_to_append, _ = create_append_test_data()
-
-        # check failure when append_dim not set
-        with pytest.raises(ValueError):
-            with self.create_zarr_target() as store_target:
-                ds.to_zarr(store_target, mode="w")
+        with self.create_zarr_target() as store_target:
+            ds.to_zarr(store_target, mode="w")
+            with pytest.raises(ValueError, match="different dimension sizes"):
                 ds_to_append.to_zarr(store_target, mode="a")
 
     def test_append_with_mode_not_a_raises(self):
-
         ds, ds_to_append, _ = create_append_test_data()
-
-        # check failure when append_dim is set and mode != 'a'
-        with pytest.raises(ValueError):
-            with self.create_zarr_target() as store_target:
-                ds.to_zarr(store_target, mode="w")
+        with self.create_zarr_target() as store_target:
+            ds.to_zarr(store_target, mode="w")
+            with pytest.raises(
+                ValueError, match="append_dim was set along with mode='w'"
+            ):
                 ds_to_append.to_zarr(store_target, mode="w", append_dim="time")
 
     def test_append_with_existing_encoding_raises(self):
-
         ds, ds_to_append, _ = create_append_test_data()
-
-        # check failure when providing encoding to existing variable
-        with pytest.raises(ValueError):
-            with self.create_zarr_target() as store_target:
-                ds.to_zarr(store_target, mode="w")
+        with self.create_zarr_target() as store_target:
+            ds.to_zarr(store_target, mode="w")
+            with pytest.raises(ValueError, match="but encoding was provided"):
                 ds_to_append.to_zarr(
                     store_target,
                     append_dim="time",
@@ -2173,7 +2183,7 @@ def test_cross_engine_read_write_netcdf3(self):
             valid_engines.add("scipy")
 
         for write_engine in valid_engines:
-            for format in ["NETCDF3_CLASSIC", "NETCDF3_64BIT"]:
+            for format in self.netcdf3_formats:
                 with create_tmp_file() as tmp_file:
                     data.to_netcdf(tmp_file, format=format, engine=write_engine)
                     for read_engine in valid_engines:
@@ -2652,6 +2662,36 @@ def test_open_mfdataset_does_same_as_concat(self, combine, opt, join):
                 ds_expect = xr.concat([ds1, ds2], data_vars=opt, dim="t", join=join)
                 assert_identical(ds, ds_expect)
 
+    def test_open_mfdataset_dataset_attr_by_coords(self):
+        """
+        Case when an attribute differs across the multiple files
+        """
+        with self.setup_files_and_datasets() as (files, [ds1, ds2]):
+            # Give the files an inconsistent attribute
+            for i, f in enumerate(files):
+                ds = open_dataset(f).load()
+                ds.attrs["test_dataset_attr"] = 10 + i
+                ds.close()
+                ds.to_netcdf(f)
+
+            with xr.open_mfdataset(files, combine="by_coords", concat_dim="t") as ds:
+                assert ds.test_dataset_attr == 10
+
+    def test_open_mfdataset_dataarray_attr_by_coords(self):
+        """
+        Case when an attribute of a member DataArray differs across the multiple files
+        """
+        with self.setup_files_and_datasets() as (files, [ds1, ds2]):
+            # Give the files an inconsistent attribute
+            for i, f in enumerate(files):
+                ds = open_dataset(f).load()
+                ds["v1"].attrs["test_dataarray_attr"] = i
+                ds.close()
+                ds.to_netcdf(f)
+
+            with xr.open_mfdataset(files, combine="by_coords", concat_dim="t") as ds:
+                assert ds["v1"].test_dataarray_attr == 0
+
     @pytest.mark.parametrize("combine", ["nested", "by_coords"])
     @pytest.mark.parametrize("opt", ["all", "minimal", "different"])
     def test_open_mfdataset_exact_join_raises_error(self, combine, opt):
@@ -2937,16 +2977,6 @@ def test_open_mfdataset_auto_combine(self):
                 with open_mfdataset([tmp2, tmp1], combine="by_coords") as actual:
                     assert_identical(original, actual)
 
-    def test_open_mfdataset_combine_nested_no_concat_dim(self):
-        original = Dataset({"foo": ("x", np.random.randn(10)), "x": np.arange(10)})
-        with create_tmp_file() as tmp1:
-            with create_tmp_file() as tmp2:
-                original.isel(x=slice(5)).to_netcdf(tmp1)
-                original.isel(x=slice(5, 10)).to_netcdf(tmp2)
-
-                with raises_regex(ValueError, "Must supply concat_dim"):
-                    open_mfdataset([tmp2, tmp1], combine="nested")
-
     @pytest.mark.xfail(reason="mfdataset loses encoding currently.")
     def test_encoding_mfdataset(self):
         original = Dataset(
@@ -3040,6 +3070,15 @@ def test_open_mfdataset_concat_dim_none(self):
                 ) as actual:
                     assert_identical(data, actual)
 
+    def test_open_mfdataset_concat_dim_default_none(self):
+        with create_tmp_file() as tmp1:
+            with create_tmp_file() as tmp2:
+                data = Dataset({"x": 0})
+                data.to_netcdf(tmp1)
+                Dataset({"x": np.nan}).to_netcdf(tmp2)
+                with open_mfdataset([tmp1, tmp2], combine="nested") as actual:
+                    assert_identical(data, actual)
+
     def test_open_dataset(self):
         original = Dataset({"foo": ("x", np.random.randn(10))})
         with create_tmp_file() as tmp:
@@ -3163,73 +3202,6 @@ def test_load_dataarray(self):
             ds.to_netcdf(tmp)
 
 
-@requires_scipy_or_netCDF4
-@requires_dask
-class TestOpenMFDataSetDeprecation:
-    """
-    Set of tests to check that FutureWarnings are correctly raised until the
-    deprecation cycle is complete. #2616
-    """
-
-    def test_open_mfdataset_default(self):
-        ds1, ds2 = Dataset({"x": [0]}), Dataset({"x": [1]})
-        with create_tmp_file() as tmp1:
-            with create_tmp_file() as tmp2:
-                ds1.to_netcdf(tmp1)
-                ds2.to_netcdf(tmp2)
-
-                with pytest.warns(
-                    FutureWarning, match="default behaviour of" " `open_mfdataset`"
-                ):
-                    open_mfdataset([tmp1, tmp2])
-
-    def test_open_mfdataset_with_concat_dim(self):
-        ds1, ds2 = Dataset({"x": [0]}), Dataset({"x": [1]})
-        with create_tmp_file() as tmp1:
-            with create_tmp_file() as tmp2:
-                ds1.to_netcdf(tmp1)
-                ds2.to_netcdf(tmp2)
-
-                with pytest.warns(FutureWarning, match="`concat_dim`"):
-                    open_mfdataset([tmp1, tmp2], concat_dim="x")
-
-    def test_auto_combine_with_merge_and_concat(self):
-        ds1, ds2 = Dataset({"x": [0]}), Dataset({"x": [1]})
-        ds3 = Dataset({"z": ((), 99)})
-        with create_tmp_file() as tmp1:
-            with create_tmp_file() as tmp2:
-                with create_tmp_file() as tmp3:
-                    ds1.to_netcdf(tmp1)
-                    ds2.to_netcdf(tmp2)
-                    ds3.to_netcdf(tmp3)
-
-                    with pytest.warns(
-                        FutureWarning, match="require both concatenation"
-                    ):
-                        open_mfdataset([tmp1, tmp2, tmp3])
-
-    def test_auto_combine_with_coords(self):
-        ds1 = Dataset({"foo": ("x", [0])}, coords={"x": ("x", [0])})
-        ds2 = Dataset({"foo": ("x", [1])}, coords={"x": ("x", [1])})
-        with create_tmp_file() as tmp1:
-            with create_tmp_file() as tmp2:
-                ds1.to_netcdf(tmp1)
-                ds2.to_netcdf(tmp2)
-
-                with pytest.warns(FutureWarning, match="supplied have global"):
-                    open_mfdataset([tmp1, tmp2])
-
-    def test_auto_combine_without_coords(self):
-        ds1, ds2 = Dataset({"foo": ("x", [0])}), Dataset({"foo": ("x", [1])})
-        with create_tmp_file() as tmp1:
-            with create_tmp_file() as tmp2:
-                ds1.to_netcdf(tmp1)
-                ds2.to_netcdf(tmp2)
-
-                with pytest.warns(FutureWarning, match="supplied do not have global"):
-                    open_mfdataset([tmp1, tmp2])
-
-
 @requires_scipy_or_netCDF4
 @requires_pydap
 @pytest.mark.filterwarnings("ignore:The binary mode of fromstring is deprecated")
@@ -3621,11 +3593,21 @@ def create_tmp_geotiff(
     ny=3,
     nz=3,
     transform=None,
-    transform_args=[5000, 80000, 1000, 2000.0],
-    crs={"units": "m", "no_defs": True, "ellps": "WGS84", "proj": "utm", "zone": 18},
+    transform_args=default_value,
+    crs=default_value,
     open_kwargs=None,
     additional_attrs=None,
 ):
+    if transform_args is default_value:
+        transform_args = [5000, 80000, 1000, 2000.0]
+    if crs is default_value:
+        crs = {
+            "units": "m",
+            "no_defs": True,
+            "ellps": "WGS84",
+            "proj": "utm",
+            "zone": 18,
+        }
     # yields a temporary geotiff file and a corresponding expected DataArray
     import rasterio
     from rasterio.transform import from_origin
@@ -4140,6 +4122,19 @@ def test_rasterio_vrt_with_transform_and_size(self):
                         assert actual_shape == expected_shape
                         assert actual_transform == expected_transform
 
+    def test_rasterio_vrt_with_src_crs(self):
+        # Test open_rasterio() support of WarpedVRT with specified src_crs
+        import rasterio
+
+        # create geotiff with no CRS and specify it manually
+        with create_tmp_geotiff(crs=None) as (tmp_file, expected):
+            src_crs = rasterio.crs.CRS({"init": "epsg:32618"})
+            with rasterio.open(tmp_file) as src:
+                assert src.crs is None
+                with rasterio.vrt.WarpedVRT(src, src_crs=src_crs) as vrt:
+                    with xr.open_rasterio(vrt) as da:
+                        assert da.crs == src_crs
+
     @network
     def test_rasterio_vrt_network(self):
         # Make sure loading w/ rasterio give same results as xarray
diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py
index b30e32c92ad..745ae341370 100644
--- a/xarray/tests/test_cftimeindex.py
+++ b/xarray/tests/test_cftimeindex.py
@@ -1046,3 +1046,73 @@ def test_asi8_distant_date():
     result = index.asi8
     expected = np.array([1000000 * 86400 * 400 * 8000 + 12345 * 1000000 + 123456])
     np.testing.assert_array_equal(result, expected)
+
+
+@requires_cftime_1_1_0
+def test_infer_freq_valid_types():
+    cf_indx = xr.cftime_range("2000-01-01", periods=3, freq="D")
+    assert xr.infer_freq(cf_indx) == "D"
+    assert xr.infer_freq(xr.DataArray(cf_indx)) == "D"
+
+    pd_indx = pd.date_range("2000-01-01", periods=3, freq="D")
+    assert xr.infer_freq(pd_indx) == "D"
+    assert xr.infer_freq(xr.DataArray(pd_indx)) == "D"
+
+    pd_td_indx = pd.timedelta_range(start="1D", periods=3, freq="D")
+    assert xr.infer_freq(pd_td_indx) == "D"
+    assert xr.infer_freq(xr.DataArray(pd_td_indx)) == "D"
+
+
+@requires_cftime_1_1_0
+def test_infer_freq_invalid_inputs():
+    # Non-datetime DataArray
+    with pytest.raises(ValueError, match="must contain datetime-like objects"):
+        xr.infer_freq(xr.DataArray([0, 1, 2]))
+
+    indx = xr.cftime_range("1990-02-03", periods=4, freq="MS")
+    # 2D DataArray
+    with pytest.raises(ValueError, match="must be 1D"):
+        xr.infer_freq(xr.DataArray([indx, indx]))
+
+    # CFTimeIndex too short
+    with pytest.raises(ValueError, match="Need at least 3 dates to infer frequency"):
+        xr.infer_freq(indx[:2])
+
+    # Non-monotonic input
+    assert xr.infer_freq(indx[np.array([0, 2, 1, 3])]) is None
+
+    # Non-unique input
+    assert xr.infer_freq(indx[np.array([0, 1, 1, 2])]) is None
+
+    # No unique frequency (here 1st step is MS, second is 2MS)
+    assert xr.infer_freq(indx[np.array([0, 1, 3])]) is None
+
+    # Same, but for QS
+    indx = xr.cftime_range("1990-02-03", periods=4, freq="QS")
+    assert xr.infer_freq(indx[np.array([0, 1, 3])]) is None
+
+
+@requires_cftime_1_1_0
+@pytest.mark.parametrize(
+    "freq",
+    [
+        "300AS-JAN",
+        "A-DEC",
+        "AS-JUL",
+        "2AS-FEB",
+        "Q-NOV",
+        "3QS-DEC",
+        "MS",
+        "4M",
+        "7D",
+        "D",
+        "30H",
+        "5T",
+        "40S",
+    ],
+)
+@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
+def test_infer_freq(freq, calendar):
+    indx = xr.cftime_range("2000-01-01", periods=3, freq=freq, calendar=calendar)
+    out = xr.infer_freq(indx)
+    assert out == freq
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index 00c34940ce4..1efd4b02bf8 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -432,6 +432,18 @@ def test_decode_360_day_calendar():
         assert_array_equal(actual, expected)
 
 
+@requires_cftime
+def test_decode_abbreviation():
+    """Test making sure we properly fall back to cftime on abbreviated units."""
+    import cftime
+
+    val = np.array([1586628000000.0])
+    units = "msecs since 1970-01-01T00:00:00Z"
+    actual = coding.times.decode_cf_datetime(val, units)
+    expected = coding.times.cftime_to_nptime(cftime.num2date(val, units))
+    assert_array_equal(actual, expected)
+
+
 @arm_xfail
 @requires_cftime
 @pytest.mark.parametrize(
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index c3f981f10d1..59f61f59722 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -4,14 +4,7 @@
 import numpy as np
 import pytest
 
-from xarray import (
-    DataArray,
-    Dataset,
-    auto_combine,
-    combine_by_coords,
-    combine_nested,
-    concat,
-)
+from xarray import DataArray, Dataset, combine_by_coords, combine_nested, concat
 from xarray.core import dtypes
 from xarray.core.combine import (
     _check_shape_tile_ids,
@@ -818,173 +811,6 @@ def test_combine_by_coords_incomplete_hypercube(self):
             combine_by_coords([x1, x2, x3], fill_value=None)
 
 
-@pytest.mark.filterwarnings(
-    "ignore:In xarray version 0.15 `auto_combine` " "will be deprecated"
-)
-@pytest.mark.filterwarnings("ignore:Also `open_mfdataset` will no longer")
-@pytest.mark.filterwarnings("ignore:The datasets supplied")
-class TestAutoCombineOldAPI:
-    """
-    Set of tests which check that old 1-dimensional auto_combine behaviour is
-    still satisfied. #2616
-    """
-
-    def test_auto_combine(self):
-        objs = [Dataset({"x": [0]}), Dataset({"x": [1]})]
-        actual = auto_combine(objs)
-        expected = Dataset({"x": [0, 1]})
-        assert_identical(expected, actual)
-
-        actual = auto_combine([actual])
-        assert_identical(expected, actual)
-
-        objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})]
-        actual = auto_combine(objs)
-        expected = Dataset({"x": [0, 1, 2]})
-        assert_identical(expected, actual)
-
-        # ensure auto_combine handles non-sorted variables
-        objs = [
-            Dataset({"x": ("a", [0]), "y": ("a", [0])}),
-            Dataset({"y": ("a", [1]), "x": ("a", [1])}),
-        ]
-        actual = auto_combine(objs)
-        expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1])})
-        assert_identical(expected, actual)
-
-        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})]
-        with raises_regex(ValueError, "too many .* dimensions"):
-            auto_combine(objs)
-
-        objs = [Dataset({"x": 0}), Dataset({"x": 1})]
-        with raises_regex(ValueError, "cannot infer dimension"):
-            auto_combine(objs)
-
-        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})]
-        with raises_regex(ValueError, "'y' is not present in all datasets"):
-            auto_combine(objs)
-
-    def test_auto_combine_previously_failed(self):
-        # In the above scenario, one file is missing, containing the data for
-        # one year's data for one variable.
-        datasets = [
-            Dataset({"a": ("x", [0]), "x": [0]}),
-            Dataset({"b": ("x", [0]), "x": [0]}),
-            Dataset({"a": ("x", [1]), "x": [1]}),
-        ]
-        expected = Dataset({"a": ("x", [0, 1]), "b": ("x", [0, np.nan])}, {"x": [0, 1]})
-        actual = auto_combine(datasets)
-        assert_identical(expected, actual)
-
-        # Your data includes "time" and "station" dimensions, and each year's
-        # data has a different set of stations.
-        datasets = [
-            Dataset({"a": ("x", [2, 3]), "x": [1, 2]}),
-            Dataset({"a": ("x", [1, 2]), "x": [0, 1]}),
-        ]
-        expected = Dataset(
-            {"a": (("t", "x"), [[np.nan, 2, 3], [1, 2, np.nan]])}, {"x": [0, 1, 2]}
-        )
-        actual = auto_combine(datasets, concat_dim="t")
-        assert_identical(expected, actual)
-
-    def test_auto_combine_with_new_variables(self):
-        datasets = [Dataset({"x": 0}, {"y": 0}), Dataset({"x": 1}, {"y": 1, "z": 1})]
-        actual = auto_combine(datasets, "y")
-        expected = Dataset({"x": ("y", [0, 1])}, {"y": [0, 1], "z": 1})
-        assert_identical(expected, actual)
-
-    def test_auto_combine_no_concat(self):
-        objs = [Dataset({"x": 0}), Dataset({"y": 1})]
-        actual = auto_combine(objs)
-        expected = Dataset({"x": 0, "y": 1})
-        assert_identical(expected, actual)
-
-        objs = [Dataset({"x": 0, "y": 1}), Dataset({"y": np.nan, "z": 2})]
-        actual = auto_combine(objs)
-        expected = Dataset({"x": 0, "y": 1, "z": 2})
-        assert_identical(expected, actual)
-
-        data = Dataset({"x": 0})
-        actual = auto_combine([data, data, data], concat_dim=None)
-        assert_identical(data, actual)
-
-        # Single object, with a concat_dim explicitly provided
-        # Test the issue reported in GH #1988
-        objs = [Dataset({"x": 0, "y": 1})]
-        dim = DataArray([100], name="baz", dims="baz")
-        actual = auto_combine(objs, concat_dim=dim)
-        expected = Dataset({"x": ("baz", [0]), "y": ("baz", [1])}, {"baz": [100]})
-        assert_identical(expected, actual)
-
-        # Just making sure that auto_combine is doing what is
-        # expected for non-scalar values, too.
-        objs = [Dataset({"x": ("z", [0, 1]), "y": ("z", [1, 2])})]
-        dim = DataArray([100], name="baz", dims="baz")
-        actual = auto_combine(objs, concat_dim=dim)
-        expected = Dataset(
-            {"x": (("baz", "z"), [[0, 1]]), "y": (("baz", "z"), [[1, 2]])},
-            {"baz": [100]},
-        )
-        assert_identical(expected, actual)
-
-    def test_auto_combine_order_by_appearance_not_coords(self):
-        objs = [
-            Dataset({"foo": ("x", [0])}, coords={"x": ("x", [1])}),
-            Dataset({"foo": ("x", [1])}, coords={"x": ("x", [0])}),
-        ]
-        actual = auto_combine(objs)
-        expected = Dataset({"foo": ("x", [0, 1])}, coords={"x": ("x", [1, 0])})
-        assert_identical(expected, actual)
-
-    @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0])
-    def test_auto_combine_fill_value(self, fill_value):
-        datasets = [
-            Dataset({"a": ("x", [2, 3]), "x": [1, 2]}),
-            Dataset({"a": ("x", [1, 2]), "x": [0, 1]}),
-        ]
-        if fill_value == dtypes.NA:
-            # if we supply the default, we expect the missing value for a
-            # float array
-            fill_value = np.nan
-        expected = Dataset(
-            {"a": (("t", "x"), [[fill_value, 2, 3], [1, 2, fill_value]])},
-            {"x": [0, 1, 2]},
-        )
-        actual = auto_combine(datasets, concat_dim="t", fill_value=fill_value)
-        assert_identical(expected, actual)
-
-
-class TestAutoCombineDeprecation:
-    """
-    Set of tests to check that FutureWarnings are correctly raised until the
-    deprecation cycle is complete. #2616
-    """
-
-    def test_auto_combine_with_concat_dim(self):
-        objs = [Dataset({"x": [0]}), Dataset({"x": [1]})]
-        with pytest.warns(FutureWarning, match="`concat_dim`"):
-            auto_combine(objs, concat_dim="x")
-
-    def test_auto_combine_with_merge_and_concat(self):
-        objs = [Dataset({"x": [0]}), Dataset({"x": [1]}), Dataset({"z": ((), 99)})]
-        with pytest.warns(FutureWarning, match="require both concatenation"):
-            auto_combine(objs)
-
-    def test_auto_combine_with_coords(self):
-        objs = [
-            Dataset({"foo": ("x", [0])}, coords={"x": ("x", [0])}),
-            Dataset({"foo": ("x", [1])}, coords={"x": ("x", [1])}),
-        ]
-        with pytest.warns(FutureWarning, match="supplied have global"):
-            auto_combine(objs)
-
-    def test_auto_combine_without_coords(self):
-        objs = [Dataset({"foo": ("x", [0])}), Dataset({"foo": ("x", [1])})]
-        with pytest.warns(FutureWarning, match="supplied do not have global"):
-            auto_combine(objs)
-
-
 @requires_cftime
 def test_combine_by_coords_distant_cftime_dates():
     # Regression test for https://github.com/pydata/xarray/issues/3535
diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 4eed464d2dc..88f500e9b1e 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -5,9 +5,10 @@
 import numpy as np
 import pandas as pd
 import pytest
-from numpy.testing import assert_array_equal
+from numpy.testing import assert_allclose, assert_array_equal
 
 import xarray as xr
+from xarray.core.alignment import broadcast
 from xarray.core.computation import (
     _UFuncSignature,
     apply_ufunc,
@@ -817,6 +818,161 @@ def test_vectorize_dask():
     assert_identical(expected, actual)
 
 
+with raises_regex(TypeError, "Only xr.DataArray is supported"):
+    xr.corr(xr.Dataset(), xr.Dataset())
+
+
+def arrays_w_tuples():
+    da = xr.DataArray(
+        np.random.random((3, 21, 4)),
+        coords={"time": pd.date_range("2000-01-01", freq="1D", periods=21)},
+        dims=("a", "time", "x"),
+    )
+
+    arrays = [
+        da.isel(time=range(0, 18)),
+        da.isel(time=range(2, 20)).rolling(time=3, center=True).mean(),
+        xr.DataArray([[1, 2], [1, np.nan]], dims=["x", "time"]),
+        xr.DataArray([[1, 2], [np.nan, np.nan]], dims=["x", "time"]),
+    ]
+
+    array_tuples = [
+        (arrays[0], arrays[0]),
+        (arrays[0], arrays[1]),
+        (arrays[1], arrays[1]),
+        (arrays[2], arrays[2]),
+        (arrays[2], arrays[3]),
+        (arrays[3], arrays[3]),
+    ]
+
+    return arrays, array_tuples
+
+
+@pytest.mark.parametrize("ddof", [0, 1])
+@pytest.mark.parametrize(
+    "da_a, da_b",
+    [arrays_w_tuples()[1][0], arrays_w_tuples()[1][1], arrays_w_tuples()[1][2]],
+)
+@pytest.mark.parametrize("dim", [None, "time"])
+def test_cov(da_a, da_b, dim, ddof):
+    if dim is not None:
+
+        def np_cov_ind(ts1, ts2, a, x):
+            # Ensure the ts are aligned and missing values ignored
+            ts1, ts2 = broadcast(ts1, ts2)
+            valid_values = ts1.notnull() & ts2.notnull()
+
+            ts1 = ts1.where(valid_values)
+            ts2 = ts2.where(valid_values)
+
+            return np.cov(
+                ts1.sel(a=a, x=x).data.flatten(),
+                ts2.sel(a=a, x=x).data.flatten(),
+                ddof=ddof,
+            )[0, 1]
+
+        expected = np.zeros((3, 4))
+        for a in [0, 1, 2]:
+            for x in [0, 1, 2, 3]:
+                expected[a, x] = np_cov_ind(da_a, da_b, a=a, x=x)
+        actual = xr.cov(da_a, da_b, dim=dim, ddof=ddof)
+        assert_allclose(actual, expected)
+
+    else:
+
+        def np_cov(ts1, ts2):
+            # Ensure the ts are aligned and missing values ignored
+            ts1, ts2 = broadcast(ts1, ts2)
+            valid_values = ts1.notnull() & ts2.notnull()
+
+            ts1 = ts1.where(valid_values)
+            ts2 = ts2.where(valid_values)
+
+            return np.cov(ts1.data.flatten(), ts2.data.flatten(), ddof=ddof)[0, 1]
+
+        expected = np_cov(da_a, da_b)
+        actual = xr.cov(da_a, da_b, dim=dim, ddof=ddof)
+        assert_allclose(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "da_a, da_b",
+    [arrays_w_tuples()[1][0], arrays_w_tuples()[1][1], arrays_w_tuples()[1][2]],
+)
+@pytest.mark.parametrize("dim", [None, "time"])
+def test_corr(da_a, da_b, dim):
+    if dim is not None:
+
+        def np_corr_ind(ts1, ts2, a, x):
+            # Ensure the ts are aligned and missing values ignored
+            ts1, ts2 = broadcast(ts1, ts2)
+            valid_values = ts1.notnull() & ts2.notnull()
+
+            ts1 = ts1.where(valid_values)
+            ts2 = ts2.where(valid_values)
+
+            return np.corrcoef(
+                ts1.sel(a=a, x=x).data.flatten(), ts2.sel(a=a, x=x).data.flatten()
+            )[0, 1]
+
+        expected = np.zeros((3, 4))
+        for a in [0, 1, 2]:
+            for x in [0, 1, 2, 3]:
+                expected[a, x] = np_corr_ind(da_a, da_b, a=a, x=x)
+        actual = xr.corr(da_a, da_b, dim)
+        assert_allclose(actual, expected)
+
+    else:
+
+        def np_corr(ts1, ts2):
+            # Ensure the ts are aligned and missing values ignored
+            ts1, ts2 = broadcast(ts1, ts2)
+            valid_values = ts1.notnull() & ts2.notnull()
+
+            ts1 = ts1.where(valid_values)
+            ts2 = ts2.where(valid_values)
+
+            return np.corrcoef(ts1.data.flatten(), ts2.data.flatten())[0, 1]
+
+        expected = np_corr(da_a, da_b)
+        actual = xr.corr(da_a, da_b, dim)
+        assert_allclose(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "da_a, da_b", arrays_w_tuples()[1],
+)
+@pytest.mark.parametrize("dim", [None, "time", "x"])
+def test_covcorr_consistency(da_a, da_b, dim):
+    # Testing that xr.corr and xr.cov are consistent with each other
+    # 1. Broadcast the two arrays
+    da_a, da_b = broadcast(da_a, da_b)
+    # 2. Ignore the nans
+    valid_values = da_a.notnull() & da_b.notnull()
+    da_a = da_a.where(valid_values)
+    da_b = da_b.where(valid_values)
+
+    expected = xr.cov(da_a, da_b, dim=dim, ddof=0) / (
+        da_a.std(dim=dim) * da_b.std(dim=dim)
+    )
+    actual = xr.corr(da_a, da_b, dim=dim)
+    assert_allclose(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "da_a", arrays_w_tuples()[0],
+)
+@pytest.mark.parametrize("dim", [None, "time", "x"])
+def test_autocov(da_a, dim):
+    # Testing that the autocovariance*(N-1) is ~=~ to the variance matrix
+    # 1. Ignore the nans
+    valid_values = da_a.notnull()
+    da_a = da_a.where(valid_values)
+    expected = ((da_a - da_a.mean(dim=dim)) ** 2).sum(dim=dim, skipna=False)
+    actual = xr.cov(da_a, da_a, dim=dim) * (valid_values.sum(dim) - 1)
+    assert_allclose(actual, expected)
+
+
 @requires_dask
 def test_vectorize_dask_new_output_dims():
     # regression test for GH3574
diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py
index acb2400ea04..f7b113d0110 100644
--- a/xarray/tests/test_conventions.py
+++ b/xarray/tests/test_conventions.py
@@ -32,10 +32,8 @@ class TestBoolTypeArray:
     def test_booltype_array(self):
         x = np.array([1, 0, 1, 1, 0], dtype="i1")
         bx = conventions.BoolTypeArray(x)
-        assert bx.dtype == np.bool
-        assert_array_equal(
-            bx, np.array([True, False, True, True, False], dtype=np.bool)
-        )
+        assert bx.dtype == bool
+        assert_array_equal(bx, np.array([True, False, True, True, False], dtype=bool))
 
 
 class TestNativeEndiannessArray:
@@ -311,6 +309,41 @@ def test_decode_dask_times(self):
             conventions.decode_cf(original).chunk(),
         )
 
+    def test_decode_cf_time_kwargs(self):
+        ds = Dataset.from_dict(
+            {
+                "coords": {
+                    "timedelta": {
+                        "data": np.array([1, 2, 3], dtype="int64"),
+                        "dims": "timedelta",
+                        "attrs": {"units": "days"},
+                    },
+                    "time": {
+                        "data": np.array([1, 2, 3], dtype="int64"),
+                        "dims": "time",
+                        "attrs": {"units": "days since 2000-01-01"},
+                    },
+                },
+                "dims": {"time": 3, "timedelta": 3},
+                "data_vars": {
+                    "a": {"dims": ("time", "timedelta"), "data": np.ones((3, 3))},
+                },
+            }
+        )
+
+        dsc = conventions.decode_cf(ds)
+        assert dsc.timedelta.dtype == np.dtype("m8[ns]")
+        assert dsc.time.dtype == np.dtype("M8[ns]")
+        dsc = conventions.decode_cf(ds, decode_times=False)
+        assert dsc.timedelta.dtype == np.dtype("int64")
+        assert dsc.time.dtype == np.dtype("int64")
+        dsc = conventions.decode_cf(ds, decode_times=True, decode_timedelta=False)
+        assert dsc.timedelta.dtype == np.dtype("int64")
+        assert dsc.time.dtype == np.dtype("M8[ns]")
+        dsc = conventions.decode_cf(ds, decode_times=False, decode_timedelta=True)
+        assert dsc.timedelta.dtype == np.dtype("m8[ns]")
+        assert dsc.time.dtype == np.dtype("int64")
+
 
 class CFEncodedInMemoryStore(WritableCFDataStore, InMemoryDataStore):
     def encode_variable(self, var):
@@ -328,8 +361,12 @@ def create_store(self):
 
     @contextlib.contextmanager
     def roundtrip(
-        self, data, save_kwargs={}, open_kwargs={}, allow_cleanup_failure=False
+        self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
     ):
+        if save_kwargs is None:
+            save_kwargs = {}
+        if open_kwargs is None:
+            open_kwargs = {}
         store = CFEncodedInMemoryStore()
         data.dump_to_store(store, **save_kwargs)
         yield open_dataset(store, **open_kwargs)
diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
index 538dbbfb58b..caeb7ad4dc8 100644
--- a/xarray/tests/test_dask.py
+++ b/xarray/tests/test_dask.py
@@ -972,6 +972,7 @@ def make_da():
         coords={"x": np.arange(10), "y": np.arange(100, 120)},
         name="a",
     ).chunk({"x": 4, "y": 5})
+    da.x.attrs["long_name"] = "x"
     da.attrs["test"] = "test"
     da.coords["c2"] = 0.5
     da.coords["ndcoord"] = da.x * 2
@@ -995,6 +996,9 @@ def make_ds():
     map_ds.attrs["test"] = "test"
     map_ds.coords["xx"] = map_ds["a"] * map_ds.y
 
+    map_ds.x.attrs["long_name"] = "x"
+    map_ds.y.attrs["long_name"] = "y"
+
     return map_ds
 
 
@@ -1035,11 +1039,19 @@ def test_unify_chunks_shallow_copy(obj, transform):
     assert_identical(obj, unified) and obj is not obj.unify_chunks()
 
 
+@pytest.mark.parametrize("obj", [make_da()])
+def test_auto_chunk_da(obj):
+    actual = obj.chunk("auto").data
+    expected = obj.data.rechunk("auto")
+    np.testing.assert_array_equal(actual, expected)
+    assert actual.chunks == expected.chunks
+
+
 def test_map_blocks_error(map_da, map_ds):
     def bad_func(darray):
         return (darray * darray.x + 5 * darray.y)[:1, :1]
 
-    with raises_regex(ValueError, "Length of the.* has changed."):
+    with raises_regex(ValueError, "Received dimension 'x' of length 1"):
         xr.map_blocks(bad_func, map_da).compute()
 
     def returns_numpy(darray):
@@ -1066,9 +1078,6 @@ def really_bad_func(darray):
     with raises_regex(ValueError, "inconsistent chunks"):
         xr.map_blocks(bad_func, ds_copy)
 
-    with raises_regex(TypeError, "Cannot pass dask collections"):
-        xr.map_blocks(bad_func, map_da, args=[map_da.chunk()])
-
     with raises_regex(TypeError, "Cannot pass dask collections"):
         xr.map_blocks(bad_func, map_da, kwargs=dict(a=map_da.chunk()))
 
@@ -1095,6 +1104,58 @@ def test_map_blocks_convert_args_to_list(obj):
     assert_identical(actual, expected)
 
 
+def test_map_blocks_dask_args():
+    da1 = xr.DataArray(
+        np.ones((10, 20)),
+        dims=["x", "y"],
+        coords={"x": np.arange(10), "y": np.arange(20)},
+    ).chunk({"x": 5, "y": 4})
+
+    # check that block shapes are the same
+    def sumda(da1, da2):
+        assert da1.shape == da2.shape
+        return da1 + da2
+
+    da2 = da1 + 1
+    with raise_if_dask_computes():
+        mapped = xr.map_blocks(sumda, da1, args=[da2])
+    xr.testing.assert_equal(da1 + da2, mapped)
+
+    # one dimension in common
+    da2 = (da1 + 1).isel(x=1, drop=True)
+    with raise_if_dask_computes():
+        mapped = xr.map_blocks(operator.add, da1, args=[da2])
+    xr.testing.assert_equal(da1 + da2, mapped)
+
+    # test that everything works when dimension names are different
+    da2 = (da1 + 1).isel(x=1, drop=True).rename({"y": "k"})
+    with raise_if_dask_computes():
+        mapped = xr.map_blocks(operator.add, da1, args=[da2])
+    xr.testing.assert_equal(da1 + da2, mapped)
+
+    with raises_regex(ValueError, "Chunk sizes along dimension 'x'"):
+        xr.map_blocks(operator.add, da1, args=[da1.chunk({"x": 1})])
+
+    with raises_regex(ValueError, "indexes along dimension 'x' are not equal"):
+        xr.map_blocks(operator.add, da1, args=[da1.reindex(x=np.arange(20))])
+
+    # reduction
+    da1 = da1.chunk({"x": -1})
+    da2 = da1 + 1
+    with raise_if_dask_computes():
+        mapped = xr.map_blocks(lambda a, b: (a + b).sum("x"), da1, args=[da2])
+    xr.testing.assert_equal((da1 + da2).sum("x"), mapped)
+
+    # reduction with template
+    da1 = da1.chunk({"x": -1})
+    da2 = da1 + 1
+    with raise_if_dask_computes():
+        mapped = xr.map_blocks(
+            lambda a, b: (a + b).sum("x"), da1, args=[da2], template=da1.sum("x")
+        )
+    xr.testing.assert_equal((da1 + da2).sum("x"), mapped)
+
+
 @pytest.mark.parametrize("obj", [make_da(), make_ds()])
 def test_map_blocks_add_attrs(obj):
     def add_attrs(obj):
@@ -1109,6 +1170,11 @@ def add_attrs(obj):
 
     assert_identical(actual, expected)
 
+    # when template is specified, attrs are copied from template, not set by function
+    with raise_if_dask_computes():
+        actual = xr.map_blocks(add_attrs, obj, template=obj)
+    assert_identical(actual, obj)
+
 
 def test_map_blocks_change_name(map_da):
     def change_name(obj):
@@ -1150,7 +1216,7 @@ def test_map_blocks_to_array(map_ds):
         lambda x: x.expand_dims(k=3),
         lambda x: x.assign_coords(new_coord=("y", x.y * 2)),
         lambda x: x.astype(np.int32),
-        # TODO: [lambda x: x.isel(x=1).drop_vars("x"), map_da],
+        lambda x: x.x,
     ],
 )
 def test_map_blocks_da_transformations(func, map_da):
@@ -1170,7 +1236,7 @@ def test_map_blocks_da_transformations(func, map_da):
         lambda x: x.expand_dims(k=[1, 2, 3]),
         lambda x: x.expand_dims(k=3),
         lambda x: x.rename({"a": "new1", "b": "new2"}),
-        # TODO: [lambda x: x.isel(x=1)],
+        lambda x: x.x,
     ],
 )
 def test_map_blocks_ds_transformations(func, map_ds):
@@ -1180,6 +1246,64 @@ def test_map_blocks_ds_transformations(func, map_ds):
     assert_identical(actual, func(map_ds))
 
 
+@pytest.mark.parametrize("obj", [make_da(), make_ds()])
+def test_map_blocks_da_ds_with_template(obj):
+    func = lambda x: x.isel(x=[1])
+    template = obj.isel(x=[1, 5, 9])
+    with raise_if_dask_computes():
+        actual = xr.map_blocks(func, obj, template=template)
+    assert_identical(actual, template)
+
+    with raise_if_dask_computes():
+        actual = obj.map_blocks(func, template=template)
+    assert_identical(actual, template)
+
+
+def test_map_blocks_template_convert_object():
+    da = make_da()
+    func = lambda x: x.to_dataset().isel(x=[1])
+    template = da.to_dataset().isel(x=[1, 5, 9])
+    with raise_if_dask_computes():
+        actual = xr.map_blocks(func, da, template=template)
+    assert_identical(actual, template)
+
+    ds = da.to_dataset()
+    func = lambda x: x.to_array().isel(x=[1])
+    template = ds.to_array().isel(x=[1, 5, 9])
+    with raise_if_dask_computes():
+        actual = xr.map_blocks(func, ds, template=template)
+    assert_identical(actual, template)
+
+
+@pytest.mark.parametrize("obj", [make_da(), make_ds()])
+def test_map_blocks_errors_bad_template(obj):
+    with raises_regex(ValueError, "unexpected coordinate variables"):
+        xr.map_blocks(lambda x: x.assign_coords(a=10), obj, template=obj).compute()
+    with raises_regex(ValueError, "does not contain coordinate variables"):
+        xr.map_blocks(lambda x: x.drop_vars("cxy"), obj, template=obj).compute()
+    with raises_regex(ValueError, "Dimensions {'x'} missing"):
+        xr.map_blocks(lambda x: x.isel(x=1), obj, template=obj).compute()
+    with raises_regex(ValueError, "Received dimension 'x' of length 1"):
+        xr.map_blocks(lambda x: x.isel(x=[1]), obj, template=obj).compute()
+    with raises_regex(TypeError, "must be a DataArray"):
+        xr.map_blocks(lambda x: x.isel(x=[1]), obj, template=(obj,)).compute()
+    with raises_regex(ValueError, "map_blocks requires that one block"):
+        xr.map_blocks(
+            lambda x: x.isel(x=[1]).assign_coords(x=10), obj, template=obj.isel(x=[1])
+        ).compute()
+    with raises_regex(ValueError, "Expected index 'x' to be"):
+        xr.map_blocks(
+            lambda a: a.isel(x=[1]).assign_coords(x=[120]),  # assign bad index values
+            obj,
+            template=obj.isel(x=[1, 5, 9]),
+        ).compute()
+
+
+def test_map_blocks_errors_bad_template_2(map_ds):
+    with raises_regex(ValueError, "unexpected data variables {'xyz'}"):
+        xr.map_blocks(lambda x: x.assign(xyz=1), map_ds, template=map_ds).compute()
+
+
 @pytest.mark.parametrize("obj", [make_da(), make_ds()])
 def test_map_blocks_object_method(obj):
     def func(obj):
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index c3e5aafabfe..793090cc122 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -34,6 +34,8 @@
     source_ndarray,
 )
 
+from .test_dask import raise_if_dask_computes
+
 
 class TestDataArray:
     @pytest.fixture(autouse=True)
@@ -1501,7 +1503,7 @@ def test_reindex_regressions(self):
             da.reindex(time=time2)
 
         # regression test for #736, reindex can not change complex nums dtype
-        x = np.array([1, 2, 3], dtype=np.complex)
+        x = np.array([1, 2, 3], dtype=complex)
         x = DataArray(x, coords=[[0.1, 0.2, 0.3]])
         y = DataArray([2, 5, 6, 7, 8], coords=[[-1.1, 0.21, 0.31, 0.41, 0.51]])
         re_dtype = x.reindex_like(y, method="pad").dtype
@@ -1828,6 +1830,13 @@ def test_reset_index(self):
         expected = DataArray([1, 2], coords={"x_": ("x", ["a", "b"])}, dims="x")
         assert_identical(array.reset_index("x"), expected)
 
+    def test_reset_index_keep_attrs(self):
+        coord_1 = DataArray([1, 2], dims=["coord_1"], attrs={"attrs": True})
+        da = DataArray([1, 0], [coord_1])
+        expected = DataArray([1, 0], {"coord_1_": coord_1}, dims=["coord_1"])
+        obj = da.reset_index("coord_1")
+        assert_identical(expected, obj)
+
     def test_reorder_levels(self):
         midx = self.mindex.reorder_levels(["level_2", "level_1"])
         expected = DataArray(self.mda.values, coords={"x": midx}, dims="x")
@@ -1921,9 +1930,9 @@ def test_inplace_math_basics(self):
     def test_inplace_math_automatic_alignment(self):
         a = DataArray(range(5), [("x", range(5))])
         b = DataArray(range(1, 6), [("x", range(1, 6))])
-        with pytest.raises(xr.MergeError):
+        with pytest.raises(xr.MergeError, match="Automatic alignment is not supported"):
             a += b
-        with pytest.raises(xr.MergeError):
+        with pytest.raises(xr.MergeError, match="Automatic alignment is not supported"):
             b += a
 
     def test_math_name(self):
@@ -2161,9 +2170,6 @@ def test_transpose(self):
         with pytest.raises(ValueError):
             da.transpose("x", "y")
 
-        with pytest.warns(FutureWarning):
-            da.transpose()
-
     def test_squeeze(self):
         assert_equal(self.dv.variable.squeeze(), self.dv.squeeze().variable)
 
@@ -2753,9 +2759,6 @@ def test_groupby_restore_coord_dims(self):
             )["c"]
             assert result.dims == expected_dims
 
-        with pytest.warns(FutureWarning):
-            array.groupby("x").map(lambda x: x.squeeze())
-
     def test_groupby_first_and_last(self):
         array = DataArray([1, 2, 3, 4, 5], dims="x")
         by = DataArray(["a"] * 2 + ["b"] * 3, dims="x", name="ab")
@@ -3536,6 +3539,24 @@ def test_from_series_sparse(self):
         actual_sparse.data = actual_sparse.data.todense()
         assert_identical(actual_sparse, actual_dense)
 
+    @requires_sparse
+    def test_from_multiindex_series_sparse(self):
+        # regression test for GH4019
+        import sparse
+
+        idx = pd.MultiIndex.from_product([np.arange(3), np.arange(5)], names=["a", "b"])
+        series = pd.Series(np.random.RandomState(0).random(len(idx)), index=idx).sample(
+            n=5, random_state=3
+        )
+
+        dense = DataArray.from_series(series, sparse=False)
+        expected_coords = sparse.COO.from_numpy(dense.data, np.nan).coords
+
+        actual_sparse = xr.DataArray.from_series(series, sparse=True)
+        actual_coords = actual_sparse.data.coords
+
+        np.testing.assert_equal(actual_coords, expected_coords)
+
     def test_to_and_from_empty_series(self):
         # GH697
         expected = pd.Series([], dtype=np.float64)
@@ -4472,6 +4493,9 @@ def test_max(self, x, minindex, maxindex, nanindex):
 
         assert_identical(result2, expected2)
 
+    @pytest.mark.filterwarnings(
+        "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
+    )
     def test_argmin(self, x, minindex, maxindex, nanindex):
         ar = xr.DataArray(
             x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
@@ -4501,6 +4525,9 @@ def test_argmin(self, x, minindex, maxindex, nanindex):
 
         assert_identical(result2, expected2)
 
+    @pytest.mark.filterwarnings(
+        "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
+    )
     def test_argmax(self, x, minindex, maxindex, nanindex):
         ar = xr.DataArray(
             x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
@@ -4530,11 +4557,21 @@ def test_argmax(self, x, minindex, maxindex, nanindex):
 
         assert_identical(result2, expected2)
 
-    def test_idxmin(self, x, minindex, maxindex, nanindex):
-        ar0 = xr.DataArray(
+    @pytest.mark.parametrize("use_dask", [True, False])
+    def test_idxmin(self, x, minindex, maxindex, nanindex, use_dask):
+        if use_dask and not has_dask:
+            pytest.skip("requires dask")
+        if use_dask and x.dtype.kind == "M":
+            pytest.xfail("dask operation 'argmin' breaks when dtype is datetime64 (M)")
+        ar0_raw = xr.DataArray(
             x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
         )
 
+        if use_dask:
+            ar0 = ar0_raw.chunk({})
+        else:
+            ar0 = ar0_raw
+
         # dim doesn't exist
         with pytest.raises(KeyError):
             ar0.idxmin(dim="spam")
@@ -4626,11 +4663,21 @@ def test_idxmin(self, x, minindex, maxindex, nanindex):
         result7 = ar0.idxmin(fill_value=-1j)
         assert_identical(result7, expected7)
 
-    def test_idxmax(self, x, minindex, maxindex, nanindex):
-        ar0 = xr.DataArray(
+    @pytest.mark.parametrize("use_dask", [True, False])
+    def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask):
+        if use_dask and not has_dask:
+            pytest.skip("requires dask")
+        if use_dask and x.dtype.kind == "M":
+            pytest.xfail("dask operation 'argmax' breaks when dtype is datetime64 (M)")
+        ar0_raw = xr.DataArray(
             x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
         )
 
+        if use_dask:
+            ar0 = ar0_raw.chunk({})
+        else:
+            ar0 = ar0_raw
+
         # dim doesn't exist
         with pytest.raises(KeyError):
             ar0.idxmax(dim="spam")
@@ -4722,6 +4769,78 @@ def test_idxmax(self, x, minindex, maxindex, nanindex):
         result7 = ar0.idxmax(fill_value=-1j)
         assert_identical(result7, expected7)
 
+    @pytest.mark.filterwarnings(
+        "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
+    )
+    def test_argmin_dim(self, x, minindex, maxindex, nanindex):
+        ar = xr.DataArray(
+            x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
+        )
+        indarr = xr.DataArray(np.arange(x.size, dtype=np.intp), dims=["x"])
+
+        if np.isnan(minindex):
+            with pytest.raises(ValueError):
+                ar.argmin()
+            return
+
+        expected0 = {"x": indarr[minindex]}
+        result0 = ar.argmin(...)
+        for key in expected0:
+            assert_identical(result0[key], expected0[key])
+
+        result1 = ar.argmin(..., keep_attrs=True)
+        expected1 = deepcopy(expected0)
+        for da in expected1.values():
+            da.attrs = self.attrs
+        for key in expected1:
+            assert_identical(result1[key], expected1[key])
+
+        result2 = ar.argmin(..., skipna=False)
+        if nanindex is not None and ar.dtype.kind != "O":
+            expected2 = {"x": indarr.isel(x=nanindex, drop=True)}
+            expected2["x"].attrs = {}
+        else:
+            expected2 = expected0
+
+        for key in expected2:
+            assert_identical(result2[key], expected2[key])
+
+    @pytest.mark.filterwarnings(
+        "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
+    )
+    def test_argmax_dim(self, x, minindex, maxindex, nanindex):
+        ar = xr.DataArray(
+            x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
+        )
+        indarr = xr.DataArray(np.arange(x.size, dtype=np.intp), dims=["x"])
+
+        if np.isnan(maxindex):
+            with pytest.raises(ValueError):
+                ar.argmax()
+            return
+
+        expected0 = {"x": indarr[maxindex]}
+        result0 = ar.argmax(...)
+        for key in expected0:
+            assert_identical(result0[key], expected0[key])
+
+        result1 = ar.argmax(..., keep_attrs=True)
+        expected1 = deepcopy(expected0)
+        for da in expected1.values():
+            da.attrs = self.attrs
+        for key in expected1:
+            assert_identical(result1[key], expected1[key])
+
+        result2 = ar.argmax(..., skipna=False)
+        if nanindex is not None and ar.dtype.kind != "O":
+            expected2 = {"x": indarr.isel(x=nanindex, drop=True)}
+            expected2["x"].attrs = {}
+        else:
+            expected2 = expected0
+
+        for key in expected2:
+            assert_identical(result2[key], expected2[key])
+
 
 @pytest.mark.parametrize(
     "x, minindex, maxindex, nanindex",
@@ -4950,14 +5069,31 @@ def test_argmax(self, x, minindex, maxindex, nanindex):
 
         assert_identical(result3, expected2)
 
-    def test_idxmin(self, x, minindex, maxindex, nanindex):
-        ar0 = xr.DataArray(
+    @pytest.mark.parametrize("use_dask", [True, False])
+    def test_idxmin(self, x, minindex, maxindex, nanindex, use_dask):
+        if use_dask and not has_dask:
+            pytest.skip("requires dask")
+        if use_dask and x.dtype.kind == "M":
+            pytest.xfail("dask operation 'argmin' breaks when dtype is datetime64 (M)")
+
+        if x.dtype.kind == "O":
+            # TODO: nanops._nan_argminmax_object computes once to check for all-NaN slices.
+            max_computes = 1
+        else:
+            max_computes = 0
+
+        ar0_raw = xr.DataArray(
             x,
             dims=["y", "x"],
             coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])},
             attrs=self.attrs,
         )
 
+        if use_dask:
+            ar0 = ar0_raw.chunk({})
+        else:
+            ar0 = ar0_raw
+
         assert_identical(ar0, ar0)
 
         # No dimension specified
@@ -4988,15 +5124,18 @@ def test_idxmin(self, x, minindex, maxindex, nanindex):
         expected0.name = "x"
 
         # Default fill value (NaN)
-        result0 = ar0.idxmin(dim="x")
+        with raise_if_dask_computes(max_computes=max_computes):
+            result0 = ar0.idxmin(dim="x")
         assert_identical(result0, expected0)
 
         # Manually specify NaN fill_value
-        result1 = ar0.idxmin(dim="x", fill_value=np.NaN)
+        with raise_if_dask_computes(max_computes=max_computes):
+            result1 = ar0.idxmin(dim="x", fill_value=np.NaN)
         assert_identical(result1, expected0)
 
         # keep_attrs
-        result2 = ar0.idxmin(dim="x", keep_attrs=True)
+        with raise_if_dask_computes(max_computes=max_computes):
+            result2 = ar0.idxmin(dim="x", keep_attrs=True)
         expected2 = expected0.copy()
         expected2.attrs = self.attrs
         assert_identical(result2, expected2)
@@ -5014,11 +5153,13 @@ def test_idxmin(self, x, minindex, maxindex, nanindex):
         expected3.name = "x"
         expected3.attrs = {}
 
-        result3 = ar0.idxmin(dim="x", skipna=False)
+        with raise_if_dask_computes(max_computes=max_computes):
+            result3 = ar0.idxmin(dim="x", skipna=False)
         assert_identical(result3, expected3)
 
         # fill_value should be ignored with skipna=False
-        result4 = ar0.idxmin(dim="x", skipna=False, fill_value=-100j)
+        with raise_if_dask_computes(max_computes=max_computes):
+            result4 = ar0.idxmin(dim="x", skipna=False, fill_value=-100j)
         assert_identical(result4, expected3)
 
         # Float fill_value
@@ -5030,7 +5171,8 @@ def test_idxmin(self, x, minindex, maxindex, nanindex):
         expected5 = xr.concat(expected5, dim="y")
         expected5.name = "x"
 
-        result5 = ar0.idxmin(dim="x", fill_value=-1.1)
+        with raise_if_dask_computes(max_computes=max_computes):
+            result5 = ar0.idxmin(dim="x", fill_value=-1.1)
         assert_identical(result5, expected5)
 
         # Integer fill_value
@@ -5042,7 +5184,8 @@ def test_idxmin(self, x, minindex, maxindex, nanindex):
         expected6 = xr.concat(expected6, dim="y")
         expected6.name = "x"
 
-        result6 = ar0.idxmin(dim="x", fill_value=-1)
+        with raise_if_dask_computes(max_computes=max_computes):
+            result6 = ar0.idxmin(dim="x", fill_value=-1)
         assert_identical(result6, expected6)
 
         # Complex fill_value
@@ -5054,17 +5197,35 @@ def test_idxmin(self, x, minindex, maxindex, nanindex):
         expected7 = xr.concat(expected7, dim="y")
         expected7.name = "x"
 
-        result7 = ar0.idxmin(dim="x", fill_value=-5j)
+        with raise_if_dask_computes(max_computes=max_computes):
+            result7 = ar0.idxmin(dim="x", fill_value=-5j)
         assert_identical(result7, expected7)
 
-    def test_idxmax(self, x, minindex, maxindex, nanindex):
-        ar0 = xr.DataArray(
+    @pytest.mark.parametrize("use_dask", [True, False])
+    def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask):
+        if use_dask and not has_dask:
+            pytest.skip("requires dask")
+        if use_dask and x.dtype.kind == "M":
+            pytest.xfail("dask operation 'argmax' breaks when dtype is datetime64 (M)")
+
+        if x.dtype.kind == "O":
+            # TODO: nanops._nan_argminmax_object computes once to check for all-NaN slices.
+            max_computes = 1
+        else:
+            max_computes = 0
+
+        ar0_raw = xr.DataArray(
             x,
             dims=["y", "x"],
             coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])},
             attrs=self.attrs,
         )
 
+        if use_dask:
+            ar0 = ar0_raw.chunk({})
+        else:
+            ar0 = ar0_raw
+
         # No dimension specified
         with pytest.raises(ValueError):
             ar0.idxmax()
@@ -5096,15 +5257,18 @@ def test_idxmax(self, x, minindex, maxindex, nanindex):
         expected0.name = "x"
 
         # Default fill value (NaN)
-        result0 = ar0.idxmax(dim="x")
+        with raise_if_dask_computes(max_computes=max_computes):
+            result0 = ar0.idxmax(dim="x")
         assert_identical(result0, expected0)
 
         # Manually specify NaN fill_value
-        result1 = ar0.idxmax(dim="x", fill_value=np.NaN)
+        with raise_if_dask_computes(max_computes=max_computes):
+            result1 = ar0.idxmax(dim="x", fill_value=np.NaN)
         assert_identical(result1, expected0)
 
         # keep_attrs
-        result2 = ar0.idxmax(dim="x", keep_attrs=True)
+        with raise_if_dask_computes(max_computes=max_computes):
+            result2 = ar0.idxmax(dim="x", keep_attrs=True)
         expected2 = expected0.copy()
         expected2.attrs = self.attrs
         assert_identical(result2, expected2)
@@ -5122,11 +5286,13 @@ def test_idxmax(self, x, minindex, maxindex, nanindex):
         expected3.name = "x"
         expected3.attrs = {}
 
-        result3 = ar0.idxmax(dim="x", skipna=False)
+        with raise_if_dask_computes(max_computes=max_computes):
+            result3 = ar0.idxmax(dim="x", skipna=False)
         assert_identical(result3, expected3)
 
         # fill_value should be ignored with skipna=False
-        result4 = ar0.idxmax(dim="x", skipna=False, fill_value=-100j)
+        with raise_if_dask_computes(max_computes=max_computes):
+            result4 = ar0.idxmax(dim="x", skipna=False, fill_value=-100j)
         assert_identical(result4, expected3)
 
         # Float fill_value
@@ -5138,7 +5304,8 @@ def test_idxmax(self, x, minindex, maxindex, nanindex):
         expected5 = xr.concat(expected5, dim="y")
         expected5.name = "x"
 
-        result5 = ar0.idxmax(dim="x", fill_value=-1.1)
+        with raise_if_dask_computes(max_computes=max_computes):
+            result5 = ar0.idxmax(dim="x", fill_value=-1.1)
         assert_identical(result5, expected5)
 
         # Integer fill_value
@@ -5150,7 +5317,8 @@ def test_idxmax(self, x, minindex, maxindex, nanindex):
         expected6 = xr.concat(expected6, dim="y")
         expected6.name = "x"
 
-        result6 = ar0.idxmax(dim="x", fill_value=-1)
+        with raise_if_dask_computes(max_computes=max_computes):
+            result6 = ar0.idxmax(dim="x", fill_value=-1)
         assert_identical(result6, expected6)
 
         # Complex fill_value
@@ -5162,9 +5330,774 @@ def test_idxmax(self, x, minindex, maxindex, nanindex):
         expected7 = xr.concat(expected7, dim="y")
         expected7.name = "x"
 
-        result7 = ar0.idxmax(dim="x", fill_value=-5j)
+        with raise_if_dask_computes(max_computes=max_computes):
+            result7 = ar0.idxmax(dim="x", fill_value=-5j)
         assert_identical(result7, expected7)
 
+    @pytest.mark.filterwarnings(
+        "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
+    )
+    def test_argmin_dim(self, x, minindex, maxindex, nanindex):
+        ar = xr.DataArray(
+            x,
+            dims=["y", "x"],
+            coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])},
+            attrs=self.attrs,
+        )
+        indarr = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1])
+        indarr = xr.DataArray(indarr, dims=ar.dims, coords=ar.coords)
+
+        if np.isnan(minindex).any():
+            with pytest.raises(ValueError):
+                ar.argmin(dim="x")
+            return
+
+        expected0 = [
+            indarr.isel(y=yi).isel(x=indi, drop=True)
+            for yi, indi in enumerate(minindex)
+        ]
+        expected0 = {"x": xr.concat(expected0, dim="y")}
+
+        result0 = ar.argmin(dim=["x"])
+        for key in expected0:
+            assert_identical(result0[key], expected0[key])
+
+        result1 = ar.argmin(dim=["x"], keep_attrs=True)
+        expected1 = deepcopy(expected0)
+        expected1["x"].attrs = self.attrs
+        for key in expected1:
+            assert_identical(result1[key], expected1[key])
+
+        minindex = [
+            x if y is None or ar.dtype.kind == "O" else y
+            for x, y in zip(minindex, nanindex)
+        ]
+        expected2 = [
+            indarr.isel(y=yi).isel(x=indi, drop=True)
+            for yi, indi in enumerate(minindex)
+        ]
+        expected2 = {"x": xr.concat(expected2, dim="y")}
+        expected2["x"].attrs = {}
+
+        result2 = ar.argmin(dim=["x"], skipna=False)
+
+        for key in expected2:
+            assert_identical(result2[key], expected2[key])
+
+        result3 = ar.argmin(...)
+        min_xind = ar.isel(expected0).argmin()
+        expected3 = {
+            "y": DataArray(min_xind),
+            "x": DataArray(minindex[min_xind.item()]),
+        }
+
+        for key in expected3:
+            assert_identical(result3[key], expected3[key])
+
+    @pytest.mark.filterwarnings(
+        "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
+    )
+    def test_argmax_dim(self, x, minindex, maxindex, nanindex):
+        ar = xr.DataArray(
+            x,
+            dims=["y", "x"],
+            coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])},
+            attrs=self.attrs,
+        )
+        indarr = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1])
+        indarr = xr.DataArray(indarr, dims=ar.dims, coords=ar.coords)
+
+        if np.isnan(maxindex).any():
+            with pytest.raises(ValueError):
+                ar.argmax(dim="x")
+            return
+
+        expected0 = [
+            indarr.isel(y=yi).isel(x=indi, drop=True)
+            for yi, indi in enumerate(maxindex)
+        ]
+        expected0 = {"x": xr.concat(expected0, dim="y")}
+
+        result0 = ar.argmax(dim=["x"])
+        for key in expected0:
+            assert_identical(result0[key], expected0[key])
+
+        result1 = ar.argmax(dim=["x"], keep_attrs=True)
+        expected1 = deepcopy(expected0)
+        expected1["x"].attrs = self.attrs
+        for key in expected1:
+            assert_identical(result1[key], expected1[key])
+
+        maxindex = [
+            x if y is None or ar.dtype.kind == "O" else y
+            for x, y in zip(maxindex, nanindex)
+        ]
+        expected2 = [
+            indarr.isel(y=yi).isel(x=indi, drop=True)
+            for yi, indi in enumerate(maxindex)
+        ]
+        expected2 = {"x": xr.concat(expected2, dim="y")}
+        expected2["x"].attrs = {}
+
+        result2 = ar.argmax(dim=["x"], skipna=False)
+
+        for key in expected2:
+            assert_identical(result2[key], expected2[key])
+
+        result3 = ar.argmax(...)
+        max_xind = ar.isel(expected0).argmax()
+        expected3 = {
+            "y": DataArray(max_xind),
+            "x": DataArray(maxindex[max_xind.item()]),
+        }
+
+        for key in expected3:
+            assert_identical(result3[key], expected3[key])
+
+
+@pytest.mark.parametrize(
+    "x, minindices_x, minindices_y, minindices_z, minindices_xy, "
+    "minindices_xz, minindices_yz, minindices_xyz, maxindices_x, "
+    "maxindices_y, maxindices_z, maxindices_xy, maxindices_xz, maxindices_yz, "
+    "maxindices_xyz, nanindices_x, nanindices_y, nanindices_z, nanindices_xy, "
+    "nanindices_xz, nanindices_yz, nanindices_xyz",
+    [
+        (
+            np.array(
+                [
+                    [[0, 1, 2, 0], [-2, -4, 2, 0]],
+                    [[1, 1, 1, 1], [1, 1, 1, 1]],
+                    [[0, 0, -10, 5], [20, 0, 0, 0]],
+                ]
+            ),
+            {"x": np.array([[0, 2, 2, 0], [0, 0, 2, 0]])},
+            {"y": np.array([[1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1]])},
+            {"z": np.array([[0, 1], [0, 0], [2, 1]])},
+            {"x": np.array([0, 0, 2, 0]), "y": np.array([1, 1, 0, 0])},
+            {"x": np.array([2, 0]), "z": np.array([2, 1])},
+            {"y": np.array([1, 0, 0]), "z": np.array([1, 0, 2])},
+            {"x": np.array(2), "y": np.array(0), "z": np.array(2)},
+            {"x": np.array([[1, 0, 0, 2], [2, 1, 0, 1]])},
+            {"y": np.array([[0, 0, 0, 0], [0, 0, 0, 0], [1, 0, 1, 0]])},
+            {"z": np.array([[2, 2], [0, 0], [3, 0]])},
+            {"x": np.array([2, 0, 0, 2]), "y": np.array([1, 0, 0, 0])},
+            {"x": np.array([2, 2]), "z": np.array([3, 0])},
+            {"y": np.array([0, 0, 1]), "z": np.array([2, 0, 0])},
+            {"x": np.array(2), "y": np.array(1), "z": np.array(0)},
+            {"x": np.array([[None, None, None, None], [None, None, None, None]])},
+            {
+                "y": np.array(
+                    [
+                        [None, None, None, None],
+                        [None, None, None, None],
+                        [None, None, None, None],
+                    ]
+                )
+            },
+            {"z": np.array([[None, None], [None, None], [None, None]])},
+            {
+                "x": np.array([None, None, None, None]),
+                "y": np.array([None, None, None, None]),
+            },
+            {"x": np.array([None, None]), "z": np.array([None, None])},
+            {"y": np.array([None, None, None]), "z": np.array([None, None, None])},
+            {"x": np.array(None), "y": np.array(None), "z": np.array(None)},
+        ),
+        (
+            np.array(
+                [
+                    [[2.0, 1.0, 2.0, 0.0], [-2.0, -4.0, 2.0, 0.0]],
+                    [[-4.0, np.NaN, 2.0, np.NaN], [-2.0, -4.0, 2.0, 0.0]],
+                    [[np.NaN] * 4, [np.NaN] * 4],
+                ]
+            ),
+            {"x": np.array([[1, 0, 0, 0], [0, 0, 0, 0]])},
+            {
+                "y": np.array(
+                    [[1, 1, 0, 0], [0, 1, 0, 1], [np.NaN, np.NaN, np.NaN, np.NaN]]
+                )
+            },
+            {"z": np.array([[3, 1], [0, 1], [np.NaN, np.NaN]])},
+            {"x": np.array([1, 0, 0, 0]), "y": np.array([0, 1, 0, 0])},
+            {"x": np.array([1, 0]), "z": np.array([0, 1])},
+            {"y": np.array([1, 0, np.NaN]), "z": np.array([1, 0, np.NaN])},
+            {"x": np.array(0), "y": np.array(1), "z": np.array(1)},
+            {"x": np.array([[0, 0, 0, 0], [0, 0, 0, 0]])},
+            {
+                "y": np.array(
+                    [[0, 0, 0, 0], [1, 1, 0, 1], [np.NaN, np.NaN, np.NaN, np.NaN]]
+                )
+            },
+            {"z": np.array([[0, 2], [2, 2], [np.NaN, np.NaN]])},
+            {"x": np.array([0, 0, 0, 0]), "y": np.array([0, 0, 0, 0])},
+            {"x": np.array([0, 0]), "z": np.array([2, 2])},
+            {"y": np.array([0, 0, np.NaN]), "z": np.array([0, 2, np.NaN])},
+            {"x": np.array(0), "y": np.array(0), "z": np.array(0)},
+            {"x": np.array([[2, 1, 2, 1], [2, 2, 2, 2]])},
+            {
+                "y": np.array(
+                    [[None, None, None, None], [None, 0, None, 0], [0, 0, 0, 0]]
+                )
+            },
+            {"z": np.array([[None, None], [1, None], [0, 0]])},
+            {"x": np.array([2, 1, 2, 1]), "y": np.array([0, 0, 0, 0])},
+            {"x": np.array([1, 2]), "z": np.array([1, 0])},
+            {"y": np.array([None, 0, 0]), "z": np.array([None, 1, 0])},
+            {"x": np.array(1), "y": np.array(0), "z": np.array(1)},
+        ),
+        (
+            np.array(
+                [
+                    [[2.0, 1.0, 2.0, 0.0], [-2.0, -4.0, 2.0, 0.0]],
+                    [[-4.0, np.NaN, 2.0, np.NaN], [-2.0, -4.0, 2.0, 0.0]],
+                    [[np.NaN] * 4, [np.NaN] * 4],
+                ]
+            ).astype("object"),
+            {"x": np.array([[1, 0, 0, 0], [0, 0, 0, 0]])},
+            {
+                "y": np.array(
+                    [[1, 1, 0, 0], [0, 1, 0, 1], [np.NaN, np.NaN, np.NaN, np.NaN]]
+                )
+            },
+            {"z": np.array([[3, 1], [0, 1], [np.NaN, np.NaN]])},
+            {"x": np.array([1, 0, 0, 0]), "y": np.array([0, 1, 0, 0])},
+            {"x": np.array([1, 0]), "z": np.array([0, 1])},
+            {"y": np.array([1, 0, np.NaN]), "z": np.array([1, 0, np.NaN])},
+            {"x": np.array(0), "y": np.array(1), "z": np.array(1)},
+            {"x": np.array([[0, 0, 0, 0], [0, 0, 0, 0]])},
+            {
+                "y": np.array(
+                    [[0, 0, 0, 0], [1, 1, 0, 1], [np.NaN, np.NaN, np.NaN, np.NaN]]
+                )
+            },
+            {"z": np.array([[0, 2], [2, 2], [np.NaN, np.NaN]])},
+            {"x": np.array([0, 0, 0, 0]), "y": np.array([0, 0, 0, 0])},
+            {"x": np.array([0, 0]), "z": np.array([2, 2])},
+            {"y": np.array([0, 0, np.NaN]), "z": np.array([0, 2, np.NaN])},
+            {"x": np.array(0), "y": np.array(0), "z": np.array(0)},
+            {"x": np.array([[2, 1, 2, 1], [2, 2, 2, 2]])},
+            {
+                "y": np.array(
+                    [[None, None, None, None], [None, 0, None, 0], [0, 0, 0, 0]]
+                )
+            },
+            {"z": np.array([[None, None], [1, None], [0, 0]])},
+            {"x": np.array([2, 1, 2, 1]), "y": np.array([0, 0, 0, 0])},
+            {"x": np.array([1, 2]), "z": np.array([1, 0])},
+            {"y": np.array([None, 0, 0]), "z": np.array([None, 1, 0])},
+            {"x": np.array(1), "y": np.array(0), "z": np.array(1)},
+        ),
+        (
+            np.array(
+                [
+                    [["2015-12-31", "2020-01-02"], ["2020-01-01", "2016-01-01"]],
+                    [["2020-01-02", "2020-01-02"], ["2020-01-02", "2020-01-02"]],
+                    [["1900-01-01", "1-02-03"], ["1900-01-02", "1-02-03"]],
+                ],
+                dtype="datetime64[ns]",
+            ),
+            {"x": np.array([[2, 2], [2, 2]])},
+            {"y": np.array([[0, 1], [0, 0], [0, 0]])},
+            {"z": np.array([[0, 1], [0, 0], [1, 1]])},
+            {"x": np.array([2, 2]), "y": np.array([0, 0])},
+            {"x": np.array([2, 2]), "z": np.array([1, 1])},
+            {"y": np.array([0, 0, 0]), "z": np.array([0, 0, 1])},
+            {"x": np.array(2), "y": np.array(0), "z": np.array(1)},
+            {"x": np.array([[1, 0], [1, 1]])},
+            {"y": np.array([[1, 0], [0, 0], [1, 0]])},
+            {"z": np.array([[1, 0], [0, 0], [0, 0]])},
+            {"x": np.array([1, 0]), "y": np.array([0, 0])},
+            {"x": np.array([0, 1]), "z": np.array([1, 0])},
+            {"y": np.array([0, 0, 1]), "z": np.array([1, 0, 0])},
+            {"x": np.array(0), "y": np.array(0), "z": np.array(1)},
+            {"x": np.array([[None, None], [None, None]])},
+            {"y": np.array([[None, None], [None, None], [None, None]])},
+            {"z": np.array([[None, None], [None, None], [None, None]])},
+            {"x": np.array([None, None]), "y": np.array([None, None])},
+            {"x": np.array([None, None]), "z": np.array([None, None])},
+            {"y": np.array([None, None, None]), "z": np.array([None, None, None])},
+            {"x": np.array(None), "y": np.array(None), "z": np.array(None)},
+        ),
+    ],
+)
+class TestReduce3D(TestReduce):
+    def test_argmin_dim(
+        self,
+        x,
+        minindices_x,
+        minindices_y,
+        minindices_z,
+        minindices_xy,
+        minindices_xz,
+        minindices_yz,
+        minindices_xyz,
+        maxindices_x,
+        maxindices_y,
+        maxindices_z,
+        maxindices_xy,
+        maxindices_xz,
+        maxindices_yz,
+        maxindices_xyz,
+        nanindices_x,
+        nanindices_y,
+        nanindices_z,
+        nanindices_xy,
+        nanindices_xz,
+        nanindices_yz,
+        nanindices_xyz,
+    ):
+
+        ar = xr.DataArray(
+            x,
+            dims=["x", "y", "z"],
+            coords={
+                "x": np.arange(x.shape[0]) * 4,
+                "y": 1 - np.arange(x.shape[1]),
+                "z": 2 + 3 * np.arange(x.shape[2]),
+            },
+            attrs=self.attrs,
+        )
+        xindarr = np.tile(
+            np.arange(x.shape[0], dtype=np.intp)[:, np.newaxis, np.newaxis],
+            [1, x.shape[1], x.shape[2]],
+        )
+        xindarr = xr.DataArray(xindarr, dims=ar.dims, coords=ar.coords)
+        yindarr = np.tile(
+            np.arange(x.shape[1], dtype=np.intp)[np.newaxis, :, np.newaxis],
+            [x.shape[0], 1, x.shape[2]],
+        )
+        yindarr = xr.DataArray(yindarr, dims=ar.dims, coords=ar.coords)
+        zindarr = np.tile(
+            np.arange(x.shape[2], dtype=np.intp)[np.newaxis, np.newaxis, :],
+            [x.shape[0], x.shape[1], 1],
+        )
+        zindarr = xr.DataArray(zindarr, dims=ar.dims, coords=ar.coords)
+
+        for inds in [
+            minindices_x,
+            minindices_y,
+            minindices_z,
+            minindices_xy,
+            minindices_xz,
+            minindices_yz,
+            minindices_xyz,
+        ]:
+            if np.array([np.isnan(i) for i in inds.values()]).any():
+                with pytest.raises(ValueError):
+                    ar.argmin(dim=[d for d in inds])
+                return
+
+        result0 = ar.argmin(dim=["x"])
+        expected0 = {
+            key: xr.DataArray(value, dims=("y", "z"))
+            for key, value in minindices_x.items()
+        }
+        for key in expected0:
+            assert_identical(result0[key].drop_vars(["y", "z"]), expected0[key])
+
+        result1 = ar.argmin(dim=["y"])
+        expected1 = {
+            key: xr.DataArray(value, dims=("x", "z"))
+            for key, value in minindices_y.items()
+        }
+        for key in expected1:
+            assert_identical(result1[key].drop_vars(["x", "z"]), expected1[key])
+
+        result2 = ar.argmin(dim=["z"])
+        expected2 = {
+            key: xr.DataArray(value, dims=("x", "y"))
+            for key, value in minindices_z.items()
+        }
+        for key in expected2:
+            assert_identical(result2[key].drop_vars(["x", "y"]), expected2[key])
+
+        result3 = ar.argmin(dim=("x", "y"))
+        expected3 = {
+            key: xr.DataArray(value, dims=("z")) for key, value in minindices_xy.items()
+        }
+        for key in expected3:
+            assert_identical(result3[key].drop_vars("z"), expected3[key])
+
+        result4 = ar.argmin(dim=("x", "z"))
+        expected4 = {
+            key: xr.DataArray(value, dims=("y")) for key, value in minindices_xz.items()
+        }
+        for key in expected4:
+            assert_identical(result4[key].drop_vars("y"), expected4[key])
+
+        result5 = ar.argmin(dim=("y", "z"))
+        expected5 = {
+            key: xr.DataArray(value, dims=("x")) for key, value in minindices_yz.items()
+        }
+        for key in expected5:
+            assert_identical(result5[key].drop_vars("x"), expected5[key])
+
+        result6 = ar.argmin(...)
+        expected6 = {key: xr.DataArray(value) for key, value in minindices_xyz.items()}
+        for key in expected6:
+            assert_identical(result6[key], expected6[key])
+
+        minindices_x = {
+            key: xr.where(
+                nanindices_x[key] == None,  # noqa: E711
+                minindices_x[key],
+                nanindices_x[key],
+            )
+            for key in minindices_x
+        }
+        expected7 = {
+            key: xr.DataArray(value, dims=("y", "z"))
+            for key, value in minindices_x.items()
+        }
+
+        result7 = ar.argmin(dim=["x"], skipna=False)
+        for key in expected7:
+            assert_identical(result7[key].drop_vars(["y", "z"]), expected7[key])
+
+        minindices_y = {
+            key: xr.where(
+                nanindices_y[key] == None,  # noqa: E711
+                minindices_y[key],
+                nanindices_y[key],
+            )
+            for key in minindices_y
+        }
+        expected8 = {
+            key: xr.DataArray(value, dims=("x", "z"))
+            for key, value in minindices_y.items()
+        }
+
+        result8 = ar.argmin(dim=["y"], skipna=False)
+        for key in expected8:
+            assert_identical(result8[key].drop_vars(["x", "z"]), expected8[key])
+
+        minindices_z = {
+            key: xr.where(
+                nanindices_z[key] == None,  # noqa: E711
+                minindices_z[key],
+                nanindices_z[key],
+            )
+            for key in minindices_z
+        }
+        expected9 = {
+            key: xr.DataArray(value, dims=("x", "y"))
+            for key, value in minindices_z.items()
+        }
+
+        result9 = ar.argmin(dim=["z"], skipna=False)
+        for key in expected9:
+            assert_identical(result9[key].drop_vars(["x", "y"]), expected9[key])
+
+        minindices_xy = {
+            key: xr.where(
+                nanindices_xy[key] == None,  # noqa: E711
+                minindices_xy[key],
+                nanindices_xy[key],
+            )
+            for key in minindices_xy
+        }
+        expected10 = {
+            key: xr.DataArray(value, dims="z") for key, value in minindices_xy.items()
+        }
+
+        result10 = ar.argmin(dim=("x", "y"), skipna=False)
+        for key in expected10:
+            assert_identical(result10[key].drop_vars("z"), expected10[key])
+
+        minindices_xz = {
+            key: xr.where(
+                nanindices_xz[key] == None,  # noqa: E711
+                minindices_xz[key],
+                nanindices_xz[key],
+            )
+            for key in minindices_xz
+        }
+        expected11 = {
+            key: xr.DataArray(value, dims="y") for key, value in minindices_xz.items()
+        }
+
+        result11 = ar.argmin(dim=("x", "z"), skipna=False)
+        for key in expected11:
+            assert_identical(result11[key].drop_vars("y"), expected11[key])
+
+        minindices_yz = {
+            key: xr.where(
+                nanindices_yz[key] == None,  # noqa: E711
+                minindices_yz[key],
+                nanindices_yz[key],
+            )
+            for key in minindices_yz
+        }
+        expected12 = {
+            key: xr.DataArray(value, dims="x") for key, value in minindices_yz.items()
+        }
+
+        result12 = ar.argmin(dim=("y", "z"), skipna=False)
+        for key in expected12:
+            assert_identical(result12[key].drop_vars("x"), expected12[key])
+
+        minindices_xyz = {
+            key: xr.where(
+                nanindices_xyz[key] == None,  # noqa: E711
+                minindices_xyz[key],
+                nanindices_xyz[key],
+            )
+            for key in minindices_xyz
+        }
+        expected13 = {key: xr.DataArray(value) for key, value in minindices_xyz.items()}
+
+        result13 = ar.argmin(..., skipna=False)
+        for key in expected13:
+            assert_identical(result13[key], expected13[key])
+
+    def test_argmax_dim(
+        self,
+        x,
+        minindices_x,
+        minindices_y,
+        minindices_z,
+        minindices_xy,
+        minindices_xz,
+        minindices_yz,
+        minindices_xyz,
+        maxindices_x,
+        maxindices_y,
+        maxindices_z,
+        maxindices_xy,
+        maxindices_xz,
+        maxindices_yz,
+        maxindices_xyz,
+        nanindices_x,
+        nanindices_y,
+        nanindices_z,
+        nanindices_xy,
+        nanindices_xz,
+        nanindices_yz,
+        nanindices_xyz,
+    ):
+
+        ar = xr.DataArray(
+            x,
+            dims=["x", "y", "z"],
+            coords={
+                "x": np.arange(x.shape[0]) * 4,
+                "y": 1 - np.arange(x.shape[1]),
+                "z": 2 + 3 * np.arange(x.shape[2]),
+            },
+            attrs=self.attrs,
+        )
+        xindarr = np.tile(
+            np.arange(x.shape[0], dtype=np.intp)[:, np.newaxis, np.newaxis],
+            [1, x.shape[1], x.shape[2]],
+        )
+        xindarr = xr.DataArray(xindarr, dims=ar.dims, coords=ar.coords)
+        yindarr = np.tile(
+            np.arange(x.shape[1], dtype=np.intp)[np.newaxis, :, np.newaxis],
+            [x.shape[0], 1, x.shape[2]],
+        )
+        yindarr = xr.DataArray(yindarr, dims=ar.dims, coords=ar.coords)
+        zindarr = np.tile(
+            np.arange(x.shape[2], dtype=np.intp)[np.newaxis, np.newaxis, :],
+            [x.shape[0], x.shape[1], 1],
+        )
+        zindarr = xr.DataArray(zindarr, dims=ar.dims, coords=ar.coords)
+
+        for inds in [
+            maxindices_x,
+            maxindices_y,
+            maxindices_z,
+            maxindices_xy,
+            maxindices_xz,
+            maxindices_yz,
+            maxindices_xyz,
+        ]:
+            if np.array([np.isnan(i) for i in inds.values()]).any():
+                with pytest.raises(ValueError):
+                    ar.argmax(dim=[d for d in inds])
+                return
+
+        result0 = ar.argmax(dim=["x"])
+        expected0 = {
+            key: xr.DataArray(value, dims=("y", "z"))
+            for key, value in maxindices_x.items()
+        }
+        for key in expected0:
+            assert_identical(result0[key].drop_vars(["y", "z"]), expected0[key])
+
+        result1 = ar.argmax(dim=["y"])
+        expected1 = {
+            key: xr.DataArray(value, dims=("x", "z"))
+            for key, value in maxindices_y.items()
+        }
+        for key in expected1:
+            assert_identical(result1[key].drop_vars(["x", "z"]), expected1[key])
+
+        result2 = ar.argmax(dim=["z"])
+        expected2 = {
+            key: xr.DataArray(value, dims=("x", "y"))
+            for key, value in maxindices_z.items()
+        }
+        for key in expected2:
+            assert_identical(result2[key].drop_vars(["x", "y"]), expected2[key])
+
+        result3 = ar.argmax(dim=("x", "y"))
+        expected3 = {
+            key: xr.DataArray(value, dims=("z")) for key, value in maxindices_xy.items()
+        }
+        for key in expected3:
+            assert_identical(result3[key].drop_vars("z"), expected3[key])
+
+        result4 = ar.argmax(dim=("x", "z"))
+        expected4 = {
+            key: xr.DataArray(value, dims=("y")) for key, value in maxindices_xz.items()
+        }
+        for key in expected4:
+            assert_identical(result4[key].drop_vars("y"), expected4[key])
+
+        result5 = ar.argmax(dim=("y", "z"))
+        expected5 = {
+            key: xr.DataArray(value, dims=("x")) for key, value in maxindices_yz.items()
+        }
+        for key in expected5:
+            assert_identical(result5[key].drop_vars("x"), expected5[key])
+
+        result6 = ar.argmax(...)
+        expected6 = {key: xr.DataArray(value) for key, value in maxindices_xyz.items()}
+        for key in expected6:
+            assert_identical(result6[key], expected6[key])
+
+        maxindices_x = {
+            key: xr.where(
+                nanindices_x[key] == None,  # noqa: E711
+                maxindices_x[key],
+                nanindices_x[key],
+            )
+            for key in maxindices_x
+        }
+        expected7 = {
+            key: xr.DataArray(value, dims=("y", "z"))
+            for key, value in maxindices_x.items()
+        }
+
+        result7 = ar.argmax(dim=["x"], skipna=False)
+        for key in expected7:
+            assert_identical(result7[key].drop_vars(["y", "z"]), expected7[key])
+
+        maxindices_y = {
+            key: xr.where(
+                nanindices_y[key] == None,  # noqa: E711
+                maxindices_y[key],
+                nanindices_y[key],
+            )
+            for key in maxindices_y
+        }
+        expected8 = {
+            key: xr.DataArray(value, dims=("x", "z"))
+            for key, value in maxindices_y.items()
+        }
+
+        result8 = ar.argmax(dim=["y"], skipna=False)
+        for key in expected8:
+            assert_identical(result8[key].drop_vars(["x", "z"]), expected8[key])
+
+        maxindices_z = {
+            key: xr.where(
+                nanindices_z[key] == None,  # noqa: E711
+                maxindices_z[key],
+                nanindices_z[key],
+            )
+            for key in maxindices_z
+        }
+        expected9 = {
+            key: xr.DataArray(value, dims=("x", "y"))
+            for key, value in maxindices_z.items()
+        }
+
+        result9 = ar.argmax(dim=["z"], skipna=False)
+        for key in expected9:
+            assert_identical(result9[key].drop_vars(["x", "y"]), expected9[key])
+
+        maxindices_xy = {
+            key: xr.where(
+                nanindices_xy[key] == None,  # noqa: E711
+                maxindices_xy[key],
+                nanindices_xy[key],
+            )
+            for key in maxindices_xy
+        }
+        expected10 = {
+            key: xr.DataArray(value, dims="z") for key, value in maxindices_xy.items()
+        }
+
+        result10 = ar.argmax(dim=("x", "y"), skipna=False)
+        for key in expected10:
+            assert_identical(result10[key].drop_vars("z"), expected10[key])
+
+        maxindices_xz = {
+            key: xr.where(
+                nanindices_xz[key] == None,  # noqa: E711
+                maxindices_xz[key],
+                nanindices_xz[key],
+            )
+            for key in maxindices_xz
+        }
+        expected11 = {
+            key: xr.DataArray(value, dims="y") for key, value in maxindices_xz.items()
+        }
+
+        result11 = ar.argmax(dim=("x", "z"), skipna=False)
+        for key in expected11:
+            assert_identical(result11[key].drop_vars("y"), expected11[key])
+
+        maxindices_yz = {
+            key: xr.where(
+                nanindices_yz[key] == None,  # noqa: E711
+                maxindices_yz[key],
+                nanindices_yz[key],
+            )
+            for key in maxindices_yz
+        }
+        expected12 = {
+            key: xr.DataArray(value, dims="x") for key, value in maxindices_yz.items()
+        }
+
+        result12 = ar.argmax(dim=("y", "z"), skipna=False)
+        for key in expected12:
+            assert_identical(result12[key].drop_vars("x"), expected12[key])
+
+        maxindices_xyz = {
+            key: xr.where(
+                nanindices_xyz[key] == None,  # noqa: E711
+                maxindices_xyz[key],
+                nanindices_xyz[key],
+            )
+            for key in maxindices_xyz
+        }
+        expected13 = {key: xr.DataArray(value) for key, value in maxindices_xyz.items()}
+
+        result13 = ar.argmax(..., skipna=False)
+        for key in expected13:
+            assert_identical(result13[key], expected13[key])
+
+
+class TestReduceND(TestReduce):
+    @pytest.mark.parametrize("op", ["idxmin", "idxmax"])
+    @pytest.mark.parametrize("ndim", [3, 5])
+    def test_idxminmax_dask(self, op, ndim):
+        if not has_dask:
+            pytest.skip("requires dask")
+
+        ar0_raw = xr.DataArray(
+            np.random.random_sample(size=[10] * ndim),
+            dims=[i for i in "abcdefghij"[: ndim - 1]] + ["x"],
+            coords={"x": np.arange(10)},
+            attrs=self.attrs,
+        )
+
+        ar0_dsk = ar0_raw.chunk({})
+        # Assert idx is the same with dask and without
+        assert_equal(getattr(ar0_dsk, op)(dim="x"), getattr(ar0_raw, op)(dim="x"))
+
 
 @pytest.fixture(params=[1])
 def da(request):
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index a1cb7361e77..0c4082a553e 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -32,7 +32,6 @@
 
 from . import (
     InaccessibleArray,
-    LooseVersion,
     UnexpectedDataAccess,
     assert_allclose,
     assert_array_equal,
@@ -99,8 +98,8 @@ def create_append_test_data(seed=None):
     datetime_var_to_append = np.array(
         ["2019-01-04", "2019-01-05"], dtype="datetime64[s]"
     )
-    bool_var = np.array([True, False, True], dtype=np.bool)
-    bool_var_to_append = np.array([False, True], dtype=np.bool)
+    bool_var = np.array([True, False, True], dtype=bool)
+    bool_var_to_append = np.array([False, True], dtype=bool)
 
     ds = xr.Dataset(
         data_vars={
@@ -496,16 +495,11 @@ def test_constructor_pandas_single(self):
             DataArray(np.random.rand(4, 3), dims=["a", "b"]),  # df
         ]
 
-        if LooseVersion(pd.__version__) < "0.25.0":
-            das.append(DataArray(np.random.rand(4, 3, 2), dims=["a", "b", "c"]))
-
-        with warnings.catch_warnings():
-            warnings.filterwarnings("ignore", r"\W*Panel is deprecated")
-            for a in das:
-                pandas_obj = a.to_pandas()
-                ds_based_on_pandas = Dataset(pandas_obj)
-                for dim in ds_based_on_pandas.data_vars:
-                    assert_array_equal(ds_based_on_pandas[dim], pandas_obj[dim])
+        for a in das:
+            pandas_obj = a.to_pandas()
+            ds_based_on_pandas = Dataset(pandas_obj)
+            for dim in ds_based_on_pandas.data_vars:
+                assert_array_equal(ds_based_on_pandas[dim], pandas_obj[dim])
 
     def test_constructor_compat(self):
         data = {"x": DataArray(0, coords={"y": 1}), "y": ("z", [1, 1, 1])}
@@ -2870,6 +2864,13 @@ def test_reset_index(self):
         with pytest.raises(TypeError):
             ds.reset_index("x", inplace=True)
 
+    def test_reset_index_keep_attrs(self):
+        coord_1 = DataArray([1, 2], dims=["coord_1"], attrs={"attrs": True})
+        ds = Dataset({}, {"coord_1": coord_1})
+        expected = Dataset({}, {"coord_1_": coord_1})
+        obj = ds.reset_index("coord_1")
+        assert_identical(expected, obj)
+
     def test_reorder_levels(self):
         ds = create_test_multiindex()
         mindex = ds["x"].to_index()
@@ -2942,12 +2943,12 @@ def test_unstack_fill_value(self):
         ds = ds.isel(x=[0, 2, 3, 4]).set_index(index=["x", "y"])
         # test fill_value
         actual = ds.unstack("index", fill_value=-1)
-        expected = ds.unstack("index").fillna(-1).astype(np.int)
-        assert actual["var"].dtype == np.int
+        expected = ds.unstack("index").fillna(-1).astype(int)
+        assert actual["var"].dtype == int
         assert_equal(actual, expected)
 
         actual = ds["var"].unstack("index", fill_value=-1)
-        expected = ds["var"].unstack("index").fillna(-1).astype(np.int)
+        expected = ds["var"].unstack("index").fillna(-1).astype(int)
         assert actual.equals(expected)
 
     @requires_sparse
@@ -4596,6 +4597,9 @@ def test_reduce_non_numeric(self):
         assert_equal(data1.mean(), data2.mean())
         assert_equal(data1.mean(dim="dim1"), data2.mean(dim="dim1"))
 
+    @pytest.mark.filterwarnings(
+        "ignore:Once the behaviour of DataArray:DeprecationWarning"
+    )
     def test_reduce_strings(self):
         expected = Dataset({"x": "a"})
         ds = Dataset({"x": ("y", ["a", "b"])})
@@ -4667,6 +4671,9 @@ def test_reduce_keep_attrs(self):
         for k, v in ds.data_vars.items():
             assert v.attrs == data[k].attrs
 
+    @pytest.mark.filterwarnings(
+        "ignore:Once the behaviour of DataArray:DeprecationWarning"
+    )
     def test_reduce_argmin(self):
         # regression test for #205
         ds = Dataset({"a": ("x", [0, 1])})
diff --git a/xarray/tests/test_dtypes.py b/xarray/tests/test_dtypes.py
index 1f3aee84979..5ad1a6355e6 100644
--- a/xarray/tests/test_dtypes.py
+++ b/xarray/tests/test_dtypes.py
@@ -7,8 +7,8 @@
 @pytest.mark.parametrize(
     "args, expected",
     [
-        ([np.bool], np.bool),
-        ([np.bool, np.string_], np.object_),
+        ([bool], bool),
+        ([bool, np.string_], np.object_),
         ([np.float32, np.float64], np.float64),
         ([np.float32, np.string_], np.object_),
         ([np.unicode_, np.int64], np.object_),
diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
index e61881cfce3..feedcd27164 100644
--- a/xarray/tests/test_duck_array_ops.py
+++ b/xarray/tests/test_duck_array_ops.py
@@ -384,7 +384,7 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim):
 
                 actual = getattr(da, func)(skipna=skipna, dim=aggdim)
                 assert_dask_array(actual, dask)
-                assert np.allclose(
+                np.testing.assert_allclose(
                     actual.values, np.array(expected), rtol=1.0e-4, equal_nan=True
                 )
             except (TypeError, AttributeError, ZeroDivisionError):
diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py
index 6881c0bc0ff..82de8080c80 100644
--- a/xarray/tests/test_formatting.py
+++ b/xarray/tests/test_formatting.py
@@ -405,10 +405,19 @@ def test_short_numpy_repr():
         np.random.randn(20, 20),
         np.random.randn(5, 10, 15),
         np.random.randn(5, 10, 15, 3),
+        np.random.randn(100, 5, 1),
     ]
     # number of lines:
-    # for default numpy repr: 167, 140, 254, 248
-    # for short_numpy_repr: 1, 7, 24, 19
+    # for default numpy repr: 167, 140, 254, 248, 599
+    # for short_numpy_repr: 1, 7, 24, 19, 25
     for array in cases:
         num_lines = formatting.short_numpy_repr(array).count("\n") + 1
         assert num_lines < 30
+
+
+def test_large_array_repr_length():
+
+    da = xr.DataArray(np.random.randn(100, 5, 1))
+
+    result = repr(da).splitlines()
+    assert len(result) < 50
diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py
index 239f339208d..9a210ad6fa3 100644
--- a/xarray/tests/test_formatting_html.py
+++ b/xarray/tests/test_formatting_html.py
@@ -48,7 +48,7 @@ def dataset():
 
 def test_short_data_repr_html(dataarray):
     data_repr = fh.short_data_repr_html(dataarray)
-    assert data_repr.startswith("array")
+    assert data_repr.startswith("<pre>array")
 
 
 def test_short_data_repr_html_non_str_keys(dataset):
@@ -108,8 +108,8 @@ def test_summarize_attrs_with_unsafe_attr_name_and_value():
 def test_repr_of_dataarray(dataarray):
     formatted = fh.array_repr(dataarray)
     assert "dim_0" in formatted
-    # has an expandable data section
-    assert formatted.count("class='xr-array-in' type='checkbox' >") == 1
+    # has an expanded data section
+    assert formatted.count("class='xr-array-in' type='checkbox' checked>") == 1
     # coords and attrs don't have an items so they'll be be disabled and collapsed
     assert (
         formatted.count("class='xr-section-summary-in' type='checkbox' disabled >") == 2
@@ -137,3 +137,22 @@ def test_repr_of_dataset(dataset):
     )
     assert "&lt;U4" in formatted or "&gt;U4" in formatted
     assert "&lt;IA&gt;" in formatted
+
+
+def test_repr_text_fallback(dataset):
+    formatted = fh.dataset_repr(dataset)
+
+    # Just test that the "pre" block used for fallback to plain text is present.
+    assert "<pre class='xr-text-repr-fallback'>" in formatted
+
+
+def test_variable_repr_html():
+    v = xr.Variable(["time", "x"], [[1, 2, 3], [4, 5, 6]], {"foo": "bar"})
+    assert hasattr(v, "_repr_html_")
+    with xr.set_options(display_style="html"):
+        html = v._repr_html_().strip()
+    # We don't do a complete string identity since
+    # html output is probably subject to change, is long and... reasons.
+    # Just test that something reasonable was produced.
+    assert html.startswith("<div") and html.endswith("</div>")
+    assert "xarray.Variable" in html
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
index 866d5fb0899..aa54c8f36f1 100644
--- a/xarray/tests/test_groupby.py
+++ b/xarray/tests/test_groupby.py
@@ -538,4 +538,16 @@ def test_groupby_bins_timeseries():
     assert_identical(actual, expected)
 
 
+def test_groupby_none_group_name():
+    # GH158
+    # xarray should not fail if a DataArray's name attribute is None
+
+    data = np.arange(10) + 10
+    da = xr.DataArray(data)  # da.name = None
+    key = xr.DataArray(np.floor_divide(data, 2))
+
+    mean = da.groupby(key).mean()
+    assert "group" in mean.dims
+
+
 # TODO: move other groupby tests from test_dataset and test_dataarray over here
diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py
index 0502348160e..7a0dda216e2 100644
--- a/xarray/tests/test_interp.py
+++ b/xarray/tests/test_interp.py
@@ -699,3 +699,21 @@ def test_3641():
     times = xr.cftime_range("0001", periods=3, freq="500Y")
     da = xr.DataArray(range(3), dims=["time"], coords=[times])
     da.interp(time=["0002-05-01"])
+
+
+@requires_scipy
+@pytest.mark.parametrize("method", ["nearest", "linear"])
+def test_decompose(method):
+    da = xr.DataArray(
+        np.arange(6).reshape(3, 2),
+        dims=["x", "y"],
+        coords={"x": [0, 1, 2], "y": [-0.1, -0.3]},
+    )
+    x_new = xr.DataArray([0.5, 1.5, 2.5], dims=["x1"])
+    y_new = xr.DataArray([-0.15, -0.25], dims=["y1"])
+    x_broadcast, y_broadcast = xr.broadcast(x_new, y_new)
+    assert x_broadcast.ndim == 2
+
+    actual = da.interp(x=x_new, y=y_new, method=method).drop(("x", "y"))
+    expected = da.interp(x=x_broadcast, y=y_broadcast, method=method).drop(("x", "y"))
+    assert_allclose(actual, expected)
diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py
index 731cd165244..bc186c8bd15 100644
--- a/xarray/tests/test_missing.py
+++ b/xarray/tests/test_missing.py
@@ -534,6 +534,18 @@ def test_get_clean_interp_index_potential_overflow():
     get_clean_interp_index(da, "time")
 
 
+@pytest.mark.parametrize("index", ([0, 2, 1], [0, 1, 1]))
+def test_get_clean_interp_index_strict(index):
+    da = xr.DataArray([0, 1, 2], dims=("x",), coords={"x": index})
+
+    with pytest.raises(ValueError):
+        get_clean_interp_index(da, "x")
+
+    clean = get_clean_interp_index(da, "x", strict=False)
+    np.testing.assert_array_equal(index, clean)
+    assert clean.dtype == np.float64
+
+
 @pytest.fixture
 def da_time():
     return xr.DataArray(
diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py
index bf1f9ed60bb..938f403e01b 100644
--- a/xarray/tests/test_plot.py
+++ b/xarray/tests/test_plot.py
@@ -111,6 +111,12 @@ class TestPlot(PlotTestCase):
     def setup_array(self):
         self.darray = DataArray(easy_array((2, 3, 4)))
 
+    def test_accessor(self):
+        from ..plot.plot import _PlotMethods
+
+        assert DataArray.plot is _PlotMethods
+        assert isinstance(self.darray.plot, _PlotMethods)
+
     def test_label_from_attrs(self):
         da = self.darray.copy()
         assert "" == label_from_attrs(da)
@@ -136,14 +142,14 @@ def test_label_from_attrs(self):
     def test1d(self):
         self.darray[:, 0, 0].plot()
 
-        with raises_regex(ValueError, "None"):
+        with raises_regex(ValueError, "x must be one of None, 'dim_0'"):
             self.darray[:, 0, 0].plot(x="dim_1")
 
         with raises_regex(TypeError, "complex128"):
             (self.darray[:, 0, 0] + 1j).plot()
 
     def test_1d_bool(self):
-        xr.ones_like(self.darray[:, 0, 0], dtype=np.bool).plot()
+        xr.ones_like(self.darray[:, 0, 0], dtype=bool).plot()
 
     def test_1d_x_y_kw(self):
         z = np.arange(10)
@@ -155,14 +161,31 @@ def test_1d_x_y_kw(self):
         for aa, (x, y) in enumerate(xy):
             da.plot(x=x, y=y, ax=ax.flat[aa])
 
-        with raises_regex(ValueError, "cannot"):
+        with raises_regex(ValueError, "Cannot specify both"):
             da.plot(x="z", y="z")
 
-        with raises_regex(ValueError, "None"):
-            da.plot(x="f", y="z")
+        error_msg = "must be one of None, 'z'"
+        with raises_regex(ValueError, f"x {error_msg}"):
+            da.plot(x="f")
+
+        with raises_regex(ValueError, f"y {error_msg}"):
+            da.plot(y="f")
+
+    def test_multiindex_level_as_coord(self):
+        da = xr.DataArray(
+            np.arange(5),
+            dims="x",
+            coords=dict(a=("x", np.arange(5)), b=("x", np.arange(5, 10))),
+        )
+        da = da.set_index(x=["a", "b"])
+
+        for x in ["a", "b"]:
+            h = da.plot(x=x)[0]
+            assert_array_equal(h.get_xdata(), da[x].values)
 
-        with raises_regex(ValueError, "None"):
-            da.plot(x="z", y="f")
+        for y in ["a", "b"]:
+            h = da.plot(y=y)[0]
+            assert_array_equal(h.get_ydata(), da[y].values)
 
     # Test for bug in GH issue #2725
     def test_infer_line_data(self):
@@ -211,7 +234,7 @@ def test_2d_line(self):
         self.darray[:, :, 0].plot.line(x="dim_0", hue="dim_1")
         self.darray[:, :, 0].plot.line(y="dim_0", hue="dim_1")
 
-        with raises_regex(ValueError, "cannot"):
+        with raises_regex(ValueError, "Cannot"):
             self.darray[:, :, 0].plot.line(x="dim_1", y="dim_0", hue="dim_1")
 
     def test_2d_line_accepts_legend_kw(self):
@@ -854,21 +877,22 @@ def test_norm_sets_vmin_vmax(self):
         vmin = self.data.min()
         vmax = self.data.max()
 
-        for norm, extend in zip(
+        for norm, extend, levels in zip(
             [
+                mpl.colors.Normalize(),
                 mpl.colors.Normalize(),
                 mpl.colors.Normalize(vmin + 0.1, vmax - 0.1),
                 mpl.colors.Normalize(None, vmax - 0.1),
                 mpl.colors.Normalize(vmin + 0.1, None),
             ],
-            ["neither", "both", "max", "min"],
+            ["neither", "neither", "both", "max", "min"],
+            [7, None, None, None, None],
         ):
 
             test_min = vmin if norm.vmin is None else norm.vmin
             test_max = vmax if norm.vmax is None else norm.vmax
 
-            cmap_params = _determine_cmap_params(self.data, norm=norm)
-
+            cmap_params = _determine_cmap_params(self.data, norm=norm, levels=levels)
             assert cmap_params["vmin"] == test_min
             assert cmap_params["vmax"] == test_max
             assert cmap_params["extend"] == extend
@@ -1013,7 +1037,7 @@ def test_1d_raises_valueerror(self):
             self.plotfunc(self.darray[0, :])
 
     def test_bool(self):
-        xr.ones_like(self.darray, dtype=np.bool).plot()
+        xr.ones_like(self.darray, dtype=bool).plot()
 
     def test_complex_raises_typeerror(self):
         with raises_regex(TypeError, "complex128"):
@@ -1031,6 +1055,16 @@ def test_nonnumeric_index_raises_typeerror(self):
         with raises_regex(TypeError, r"[Pp]lot"):
             self.plotfunc(a)
 
+    def test_multiindex_raises_typeerror(self):
+        a = DataArray(
+            easy_array((3, 2)),
+            dims=("x", "y"),
+            coords=dict(x=("x", [0, 1, 2]), a=("y", [0, 1]), b=("y", [2, 3])),
+        )
+        a = a.set_index(y=("a", "b"))
+        with raises_regex(TypeError, r"[Pp]lot"):
+            self.plotfunc(a)
+
     def test_can_pass_in_axis(self):
         self.pass_in_axis(self.plotmethod)
 
@@ -1139,15 +1173,16 @@ def test_positional_coord_string(self):
         assert "y_long_name [y_units]" == ax.get_ylabel()
 
     def test_bad_x_string_exception(self):
-        with raises_regex(ValueError, "x and y must be coordinate variables"):
+
+        with raises_regex(ValueError, "x and y cannot be equal."):
+            self.plotmethod(x="y", y="y")
+
+        error_msg = "must be one of None, 'x', 'x2d', 'y', 'y2d'"
+        with raises_regex(ValueError, f"x {error_msg}"):
             self.plotmethod("not_a_real_dim", "y")
-        with raises_regex(
-            ValueError, "x must be a dimension name if y is not supplied"
-        ):
+        with raises_regex(ValueError, f"x {error_msg}"):
             self.plotmethod(x="not_a_real_dim")
-        with raises_regex(
-            ValueError, "y must be a dimension name if x is not supplied"
-        ):
+        with raises_regex(ValueError, f"y {error_msg}"):
             self.plotmethod(y="not_a_real_dim")
         self.darray.coords["z"] = 100
 
@@ -1182,6 +1217,27 @@ def test_non_linked_coords_transpose(self):
         # simply ensure that these high coords were passed over
         assert np.min(ax.get_xlim()) > 100.0
 
+    def test_multiindex_level_as_coord(self):
+        da = DataArray(
+            easy_array((3, 2)),
+            dims=("x", "y"),
+            coords=dict(x=("x", [0, 1, 2]), a=("y", [0, 1]), b=("y", [2, 3])),
+        )
+        da = da.set_index(y=["a", "b"])
+
+        for x, y in (("a", "x"), ("b", "x"), ("x", "a"), ("x", "b")):
+            self.plotfunc(da, x=x, y=y)
+
+            ax = plt.gca()
+            assert x == ax.get_xlabel()
+            assert y == ax.get_ylabel()
+
+        with raises_regex(ValueError, "levels of the same MultiIndex"):
+            self.plotfunc(da, x="a", y="b")
+
+        with raises_regex(ValueError, "y must be one of None, 'a', 'b', 'x'"):
+            self.plotfunc(da, x="a", y="y")
+
     def test_default_title(self):
         a = DataArray(easy_array((4, 3, 2)), dims=["a", "b", "c"])
         a.coords["c"] = [0, 1]
@@ -2048,6 +2104,12 @@ def setUp(self):
         ds.B.attrs["units"] = "Bunits"
         self.ds = ds
 
+    def test_accessor(self):
+        from ..plot.dataset_plot import _Dataset_PlotMethods
+
+        assert Dataset.plot is _Dataset_PlotMethods
+        assert isinstance(self.ds.plot, _Dataset_PlotMethods)
+
     @pytest.mark.parametrize(
         "add_guide, hue_style, legend, colorbar",
         [
diff --git a/xarray/tests/test_testing.py b/xarray/tests/test_testing.py
index 041b7341ade..f4961af58e9 100644
--- a/xarray/tests/test_testing.py
+++ b/xarray/tests/test_testing.py
@@ -1,3 +1,5 @@
+import pytest
+
 import xarray as xr
 
 
@@ -5,3 +7,26 @@ def test_allclose_regression():
     x = xr.DataArray(1.01)
     y = xr.DataArray(1.02)
     xr.testing.assert_allclose(x, y, atol=0.01)
+
+
+@pytest.mark.parametrize(
+    "obj1,obj2",
+    (
+        pytest.param(
+            xr.Variable("x", [1e-17, 2]), xr.Variable("x", [0, 3]), id="Variable",
+        ),
+        pytest.param(
+            xr.DataArray([1e-17, 2], dims="x"),
+            xr.DataArray([0, 3], dims="x"),
+            id="DataArray",
+        ),
+        pytest.param(
+            xr.Dataset({"a": ("x", [1e-17, 2]), "b": ("y", [-2e-18, 2])}),
+            xr.Dataset({"a": ("x", [0, 2]), "b": ("y", [0, 1])}),
+            id="Dataset",
+        ),
+    ),
+)
+def test_assert_allclose(obj1, obj2):
+    with pytest.raises(AssertionError):
+        xr.testing.assert_allclose(obj1, obj2)
diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py
index 2826dc2479c..20a5f0e8613 100644
--- a/xarray/tests/test_units.py
+++ b/xarray/tests/test_units.py
@@ -7,9 +7,8 @@
 import pytest
 
 import xarray as xr
-from xarray.core import formatting
 from xarray.core.npcompat import IS_NEP18_ACTIVE
-from xarray.testing import assert_allclose, assert_identical
+from xarray.testing import assert_allclose, assert_equal, assert_identical
 
 from .test_variable import _PAD_XR_NP_ARGS, VariableSubclassobjects
 
@@ -27,11 +26,6 @@
     pytest.mark.skipif(
         not IS_NEP18_ACTIVE, reason="NUMPY_EXPERIMENTAL_ARRAY_FUNCTION is not enabled"
     ),
-    # TODO: remove this once pint has a released version with __array_function__
-    pytest.mark.skipif(
-        not hasattr(unit_registry.Quantity, "__array_function__"),
-        reason="pint does not implement __array_function__ yet",
-    ),
     # pytest.mark.filterwarnings("ignore:::pint[.*]"),
 ]
 
@@ -51,10 +45,23 @@ def dimensionality(obj):
 def compatible_mappings(first, second):
     return {
         key: is_compatible(unit1, unit2)
-        for key, (unit1, unit2) in merge_mappings(first, second)
+        for key, (unit1, unit2) in zip_mappings(first, second)
     }
 
 
+def merge_mappings(base, *mappings):
+    result = base.copy()
+    for m in mappings:
+        result.update(m)
+
+    return result
+
+
+def zip_mappings(*mappings):
+    for key in set(mappings[0]).intersection(*mappings[1:]):
+        yield key, tuple(m[key] for m in mappings)
+
+
 def array_extract_units(obj):
     if isinstance(obj, (xr.Variable, xr.DataArray, xr.Dataset)):
         obj = obj.data
@@ -257,50 +264,11 @@ def assert_units_equal(a, b):
     assert extract_units(a) == extract_units(b)
 
 
-def assert_equal_with_units(a, b):
-    # works like xr.testing.assert_equal, but also explicitly checks units
-    # so, it is more like assert_identical
-    __tracebackhide__ = True
-
-    if isinstance(a, xr.Dataset) or isinstance(b, xr.Dataset):
-        a_units = extract_units(a)
-        b_units = extract_units(b)
-
-        a_without_units = strip_units(a)
-        b_without_units = strip_units(b)
-
-        assert a_without_units.equals(b_without_units), formatting.diff_dataset_repr(
-            a, b, "equals"
-        )
-        assert a_units == b_units
-    else:
-        a = a if not isinstance(a, (xr.DataArray, xr.Variable)) else a.data
-        b = b if not isinstance(b, (xr.DataArray, xr.Variable)) else b.data
-
-        assert type(a) == type(b) or (
-            isinstance(a, Quantity) and isinstance(b, Quantity)
-        )
-
-        # workaround until pint implements allclose in __array_function__
-        if isinstance(a, Quantity) or isinstance(b, Quantity):
-            assert (
-                hasattr(a, "magnitude") and hasattr(b, "magnitude")
-            ) and np.allclose(a.magnitude, b.magnitude, equal_nan=True)
-            assert (hasattr(a, "units") and hasattr(b, "units")) and a.units == b.units
-        else:
-            assert np.allclose(a, b, equal_nan=True)
-
-
 @pytest.fixture(params=[float, int])
 def dtype(request):
     return request.param
 
 
-def merge_mappings(*mappings):
-    for key in set(mappings[0]).intersection(*mappings[1:]):
-        yield key, tuple(m[key] for m in mappings)
-
-
 def merge_args(default_args, new_args):
     from itertools import zip_longest
 
@@ -329,19 +297,29 @@ def __call__(self, obj, *args, **kwargs):
         all_args = merge_args(self.args, args)
         all_kwargs = {**self.kwargs, **kwargs}
 
+        xarray_classes = (
+            xr.Variable,
+            xr.DataArray,
+            xr.Dataset,
+            xr.core.groupby.GroupBy,
+        )
+
+        if not isinstance(obj, xarray_classes):
+            # remove typical xarray args like "dim"
+            exclude_kwargs = ("dim", "dims")
+            all_kwargs = {
+                key: value
+                for key, value in all_kwargs.items()
+                if key not in exclude_kwargs
+            }
+
         func = getattr(obj, self.name, None)
+
         if func is None or not isinstance(func, Callable):
             # fall back to module level numpy functions if not a xarray object
             if not isinstance(obj, (xr.Variable, xr.DataArray, xr.Dataset)):
                 numpy_func = getattr(np, self.name)
                 func = partial(numpy_func, obj)
-                # remove typical xarray args like "dim"
-                exclude_kwargs = ("dim", "dims")
-                all_kwargs = {
-                    key: value
-                    for key, value in all_kwargs.items()
-                    if key not in exclude_kwargs
-                }
             else:
                 raise AttributeError(f"{obj} has no method named '{self.name}'")
 
@@ -425,6 +403,10 @@ def test_apply_ufunc_dataset(dtype):
     assert_identical(expected, actual)
 
 
+# TODO: remove once pint==0.12 has been released
+@pytest.mark.xfail(
+    LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
+)
 @pytest.mark.parametrize(
     "unit,error",
     (
@@ -512,6 +494,10 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype):
     assert_allclose(expected_b, actual_b)
 
 
+# TODO: remove once pint==0.12 has been released
+@pytest.mark.xfail(
+    LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
+)
 @pytest.mark.parametrize(
     "unit,error",
     (
@@ -929,6 +915,10 @@ def test_concat_dataset(variant, unit, error, dtype):
     assert_identical(expected, actual)
 
 
+# TODO: remove once pint==0.12 has been released
+@pytest.mark.xfail(
+    LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
+)
 @pytest.mark.parametrize(
     "unit,error",
     (
@@ -1036,6 +1026,10 @@ def test_merge_dataarray(variant, unit, error, dtype):
     assert_allclose(expected, actual)
 
 
+# TODO: remove once pint==0.12 has been released
+@pytest.mark.xfail(
+    LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
+)
 @pytest.mark.parametrize(
     "unit,error",
     (
@@ -1385,7 +1379,6 @@ def wrapper(cls):
     "test_datetime64_conversion",
     "test_timedelta64_conversion",
     "test_pandas_period_index",
-    "test_1d_math",
     "test_1d_reduce",
     "test_array_interface",
     "test___array__",
@@ -1413,13 +1406,20 @@ def example_1d_objects(self):
         ]:
             yield (self.cls("x", data), data)
 
+    # TODO: remove once pint==0.12 has been released
+    @pytest.mark.xfail(
+        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
+    )
+    def test_real_and_imag(self):
+        super().test_real_and_imag()
+
     @pytest.mark.parametrize(
         "func",
         (
             method("all"),
             method("any"),
-            method("argmax"),
-            method("argmin"),
+            method("argmax", dim="x"),
+            method("argmin", dim="x"),
             method("argsort"),
             method("cumprod"),
             method("cumsum"),
@@ -1443,13 +1443,33 @@ def test_aggregation(self, func, dtype):
         )
         variable = xr.Variable("x", array)
 
-        units = extract_units(func(array))
+        numpy_kwargs = func.kwargs.copy()
+        if "dim" in func.kwargs:
+            numpy_kwargs["axis"] = variable.get_axis_num(numpy_kwargs.pop("dim"))
+
+        units = extract_units(func(array, **numpy_kwargs))
         expected = attach_units(func(strip_units(variable)), units)
         actual = func(variable)
 
         assert_units_equal(expected, actual)
-        xr.testing.assert_identical(expected, actual)
+        assert_allclose(expected, actual)
+
+    # TODO: remove once pint==0.12 has been released
+    @pytest.mark.xfail(
+        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
+    )
+    def test_aggregate_complex(self):
+        variable = xr.Variable("x", [1, 2j, np.nan] * unit_registry.m)
+        expected = xr.Variable((), (0.5 + 1j) * unit_registry.m)
+        actual = variable.mean()
+
+        assert_units_equal(expected, actual)
+        xr.testing.assert_allclose(expected, actual)
 
+    # TODO: remove once pint==0.12 has been released
+    @pytest.mark.xfail(
+        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
+    )
     @pytest.mark.parametrize(
         "func",
         (
@@ -1660,7 +1680,7 @@ def test_missing_value_fillna(self, unit, error):
             method("equals"),
             pytest.param(
                 method("identical"),
-                marks=pytest.mark.skip(reason="behaviour of identical is unclear"),
+                marks=pytest.mark.skip(reason="behavior of identical is undecided"),
             ),
         ),
         ids=repr,
@@ -1748,6 +1768,10 @@ def test_isel(self, indices, dtype):
         assert_units_equal(expected, actual)
         xr.testing.assert_identical(expected, actual)
 
+    # TODO: remove once pint==0.12 has been released
+    @pytest.mark.xfail(
+        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
+    )
     @pytest.mark.parametrize(
         "unit,error",
         (
@@ -1885,7 +1909,10 @@ def test_squeeze(self, dtype):
             method("coarsen", windows={"y": 2}, func=np.mean),
             pytest.param(
                 method("quantile", q=[0.25, 0.75]),
-                marks=pytest.mark.xfail(reason="nanquantile not implemented"),
+                marks=pytest.mark.xfail(
+                    LooseVersion(pint.__version__) <= "0.12",
+                    reason="quantile / nanquantile not implemented yet",
+                ),
             ),
             pytest.param(
                 method("rank", dim="x"),
@@ -2161,8 +2188,8 @@ class TestDataArray:
                 "with_dims",
                 marks=pytest.mark.xfail(reason="units in indexes are not supported"),
             ),
-            pytest.param("with_coords"),
-            pytest.param("without_coords"),
+            "with_coords",
+            "without_coords",
         ),
     )
     def test_init(self, variant, dtype):
@@ -2221,24 +2248,36 @@ def test_repr(self, func, variant, dtype):
         # warnings or errors, but does not check the result
         func(data_array)
 
+    # TODO: remove once pint==0.12 has been released
+    @pytest.mark.xfail(
+        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose",
+    )
     @pytest.mark.parametrize(
         "func",
         (
+            function("all"),
+            function("any"),
             pytest.param(
-                function("all"),
-                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
+                function("argmax"),
+                marks=pytest.mark.skip(
+                    reason="calling np.argmax as a function on xarray objects is not "
+                    "supported"
+                ),
             ),
             pytest.param(
-                function("any"),
-                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
+                function("argmin"),
+                marks=pytest.mark.skip(
+                    reason="calling np.argmin as a function on xarray objects is not "
+                    "supported"
+                ),
             ),
-            function("argmax"),
-            function("argmin"),
             function("max"),
             function("mean"),
             pytest.param(
                 function("median"),
-                marks=pytest.mark.xfail(reason="not implemented by xarray"),
+                marks=pytest.mark.skip(
+                    reason="median does not work with dataarrays yet"
+                ),
             ),
             function("min"),
             pytest.param(
@@ -2249,38 +2288,24 @@ def test_repr(self, func, variant, dtype):
             function("std"),
             function("var"),
             function("cumsum"),
-            pytest.param(
-                function("cumprod"),
-                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
-            ),
-            pytest.param(
-                method("all"),
-                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
-            ),
-            pytest.param(
-                method("any"),
-                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
-            ),
-            method("argmax"),
-            method("argmin"),
+            function("cumprod"),
+            method("all"),
+            method("any"),
+            method("argmax", dim="x"),
+            method("argmin", dim="x"),
             method("max"),
             method("mean"),
             method("median"),
             method("min"),
             pytest.param(
                 method("prod"),
-                marks=pytest.mark.xfail(
-                    reason="comparison of quantity with ndarrays in nanops not implemented"
-                ),
+                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
             ),
             method("sum"),
             method("std"),
             method("var"),
             method("cumsum"),
-            pytest.param(
-                method("cumprod"),
-                marks=pytest.mark.xfail(reason="pint does not implement cumprod yet"),
-            ),
+            method("cumprod"),
         ),
         ids=repr,
     )
@@ -2290,13 +2315,18 @@ def test_aggregation(self, func, dtype):
         )
         data_array = xr.DataArray(data=array, dims="x")
 
+        numpy_kwargs = func.kwargs.copy()
+        if "dim" in numpy_kwargs:
+            numpy_kwargs["axis"] = data_array.get_axis_num(numpy_kwargs.pop("dim"))
+
         # units differ based on the applied function, so we need to
         # first compute the units
         units = extract_units(func(array))
         expected = attach_units(func(strip_units(data_array)), units)
         actual = func(data_array)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        assert_allclose(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
@@ -2314,7 +2344,8 @@ def test_unary_operations(self, func, dtype):
         expected = attach_units(func(strip_units(data_array)), units)
         actual = func(data_array)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
@@ -2333,7 +2364,8 @@ def test_binary_operations(self, func, dtype):
         expected = attach_units(func(strip_units(data_array)), units)
         actual = func(data_array)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.parametrize(
         "comparison",
@@ -2383,7 +2415,8 @@ def test_comparison_operations(self, comparison, unit, error, dtype):
             strip_units(convert_units(to_compare_with, expected_units)),
         )
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.parametrize(
         "units,error",
@@ -2411,9 +2444,10 @@ def test_univariate_ufunc(self, units, error, dtype):
         )
         actual = func(data_array)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
-    @pytest.mark.xfail(reason="xarray's `np.maximum` strips units")
+    @pytest.mark.xfail(reason="needs the type register system for __array_ufunc__")
     @pytest.mark.parametrize(
         "unit,error",
         (
@@ -2422,7 +2456,12 @@ def test_univariate_ufunc(self, units, error, dtype):
                 unit_registry.dimensionless, DimensionalityError, id="dimensionless"
             ),
             pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
-            pytest.param(unit_registry.mm, None, id="compatible_unit"),
+            pytest.param(
+                unit_registry.mm,
+                None,
+                id="compatible_unit",
+                marks=pytest.mark.xfail(reason="pint converts to the wrong units"),
+            ),
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
@@ -2433,7 +2472,7 @@ def test_bivariate_ufunc(self, unit, error, dtype):
 
         if error is not None:
             with pytest.raises(error):
-                np.maximum(data_array, 0 * unit)
+                np.maximum(data_array, 1 * unit)
 
             return
 
@@ -2441,16 +2480,18 @@ def test_bivariate_ufunc(self, unit, error, dtype):
         expected = attach_units(
             np.maximum(
                 strip_units(data_array),
-                strip_units(convert_units(0 * unit, expected_units)),
+                strip_units(convert_units(1 * unit, expected_units)),
             ),
             expected_units,
         )
 
-        actual = np.maximum(data_array, 0 * unit)
-        assert_equal_with_units(expected, actual)
+        actual = np.maximum(data_array, 1 * unit)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
-        actual = np.maximum(0 * unit, data_array)
-        assert_equal_with_units(expected, actual)
+        actual = np.maximum(1 * unit, data_array)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.parametrize("property", ("T", "imag", "real"))
     def test_numpy_properties(self, property, dtype):
@@ -2466,7 +2507,8 @@ def test_numpy_properties(self, property, dtype):
         )
         actual = getattr(data_array, property)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
@@ -2481,16 +2523,86 @@ def test_numpy_methods(self, func, dtype):
         expected = attach_units(strip_units(data_array), units)
         actual = func(data_array)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
+
+    def test_item(self, dtype):
+        array = np.arange(10).astype(dtype) * unit_registry.m
+        data_array = xr.DataArray(data=array)
+
+        func = method("item", 2)
+
+        expected = func(strip_units(data_array)) * unit_registry.m
+        actual = func(data_array)
+
+        np.testing.assert_allclose(expected, actual)
+
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, DimensionalityError, id="no_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
+    @pytest.mark.parametrize(
+        "func",
+        (
+            method("searchsorted", 5),
+            pytest.param(
+                function("searchsorted", 5),
+                marks=pytest.mark.xfail(
+                    reason="xarray does not implement __array_function__"
+                ),
+            ),
+        ),
+        ids=repr,
+    )
+    def test_searchsorted(self, func, unit, error, dtype):
+        array = np.arange(10).astype(dtype) * unit_registry.m
+        data_array = xr.DataArray(data=array)
+
+        scalar_types = (int, float)
+        args = list(value * unit for value in func.args)
+        kwargs = {
+            key: (value * unit if isinstance(value, scalar_types) else value)
+            for key, value in func.kwargs.items()
+        }
+
+        if error is not None:
+            with pytest.raises(error):
+                func(data_array, *args, **kwargs)
+
+            return
+
+        units = extract_units(data_array)
+        expected_units = extract_units(func(array, *args, **kwargs))
+        stripped_args = [strip_units(convert_units(value, units)) for value in args]
+        stripped_kwargs = {
+            key: strip_units(convert_units(value, units))
+            for key, value in kwargs.items()
+        }
+        expected = attach_units(
+            func(strip_units(data_array), *stripped_args, **stripped_kwargs),
+            expected_units,
+        )
+        actual = func(data_array, *args, **kwargs)
+
+        assert_units_equal(expected, actual)
+        np.testing.assert_allclose(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
         (
             method("clip", min=3, max=8),
             pytest.param(
-                method("searchsorted", v=5),
+                function("clip", a_min=3, a_max=8),
                 marks=pytest.mark.xfail(
-                    reason="searchsorted somehow requires a undocumented `keys` argument"
+                    reason="xarray does not implement __array_function__"
                 ),
             ),
         ),
@@ -2513,28 +2625,32 @@ def test_numpy_methods_with_args(self, func, unit, error, dtype):
         data_array = xr.DataArray(data=array)
 
         scalar_types = (int, float)
+        args = list(value * unit for value in func.args)
         kwargs = {
             key: (value * unit if isinstance(value, scalar_types) else value)
             for key, value in func.kwargs.items()
         }
         if error is not None:
             with pytest.raises(error):
-                func(data_array, **kwargs)
+                func(data_array, *args, **kwargs)
 
             return
 
         units = extract_units(data_array)
-        expected_units = extract_units(func(array, **kwargs))
+        expected_units = extract_units(func(array, *args, **kwargs))
+        stripped_args = [strip_units(convert_units(value, units)) for value in args]
         stripped_kwargs = {
             key: strip_units(convert_units(value, units))
             for key, value in kwargs.items()
         }
         expected = attach_units(
-            func(strip_units(data_array), **stripped_kwargs), expected_units
+            func(strip_units(data_array), *stripped_args, **stripped_kwargs),
+            expected_units,
         )
-        actual = func(data_array, **kwargs)
+        actual = func(data_array, *args, **kwargs)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.parametrize(
         "func", (method("isnull"), method("notnull"), method("count")), ids=repr
@@ -2551,15 +2667,13 @@ def test_missing_value_detection(self, func, dtype):
             )
             * unit_registry.degK
         )
-        x = np.arange(array.shape[0]) * unit_registry.m
-        y = np.arange(array.shape[1]) * unit_registry.m
-
-        data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
+        data_array = xr.DataArray(data=array)
 
         expected = func(strip_units(data_array))
         actual = func(data_array)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.xfail(reason="ffill and bfill lose units in data")
     @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr)
@@ -2576,7 +2690,8 @@ def test_missing_value_filling(self, func, dtype):
         )
         actual = func(data_array, dim="x")
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.parametrize(
         "unit,error",
@@ -2586,12 +2701,7 @@ def test_missing_value_filling(self, func, dtype):
                 unit_registry.dimensionless, DimensionalityError, id="dimensionless"
             ),
             pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
-            pytest.param(
-                unit_registry.cm,
-                None,
-                id="compatible_unit",
-                marks=pytest.mark.xfail(reason="fillna converts to value's unit"),
-            ),
+            pytest.param(unit_registry.cm, None, id="compatible_unit"),
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
@@ -2629,7 +2739,8 @@ def test_fillna(self, fill_value, unit, error, dtype):
         )
         actual = func(data_array, value=value)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     def test_dropna(self, dtype):
         array = (
@@ -2643,18 +2754,13 @@ def test_dropna(self, dtype):
         expected = attach_units(strip_units(data_array).dropna(dim="x"), units)
         actual = data_array.dropna(dim="x")
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.parametrize(
         "unit",
         (
-            pytest.param(
-                1,
-                id="no_unit",
-                marks=pytest.mark.xfail(
-                    reason="pint's isin implementation does not work well with mixed args"
-                ),
-            ),
+            pytest.param(1, id="no_unit"),
             pytest.param(unit_registry.dimensionless, id="dimensionless"),
             pytest.param(unit_registry.s, id="incompatible_unit"),
             pytest.param(unit_registry.cm, id="compatible_unit"),
@@ -2677,22 +2783,11 @@ def test_isin(self, unit, dtype):
         ) & array.check(unit)
         actual = data_array.isin(values)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.parametrize(
-        "variant",
-        (
-            pytest.param(
-                "masking",
-                marks=pytest.mark.xfail(reason="array(nan) is not a quantity"),
-            ),
-            "replacing_scalar",
-            "replacing_array",
-            pytest.param(
-                "dropping",
-                marks=pytest.mark.xfail(reason="array(nan) is not a quantity"),
-            ),
-        ),
+        "variant", ("masking", "replacing_scalar", "replacing_array", "dropping")
     )
     @pytest.mark.parametrize(
         "unit,error",
@@ -2742,22 +2837,24 @@ def test_where(self, variant, unit, error, dtype):
         )
         actual = data_array.where(**kwargs)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
-    @pytest.mark.xfail(reason="interpolate strips units")
-    def test_interpolate_na(self, dtype):
+    @pytest.mark.xfail(reason="uses numpy.vectorize")
+    def test_interpolate_na(self):
         array = (
             np.array([-1.03, 0.1, 1.4, np.nan, 2.3, np.nan, np.nan, 9.1])
             * unit_registry.m
         )
         x = np.arange(len(array))
-        data_array = xr.DataArray(data=array, coords={"x": x}, dims="x").astype(dtype)
+        data_array = xr.DataArray(data=array, coords={"x": x}, dims="x")
 
         units = extract_units(data_array)
         expected = attach_units(strip_units(data_array).interpolate_na(dim="x"), units)
         actual = data_array.interpolate_na(dim="x")
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.parametrize(
         "unit,error",
@@ -2767,18 +2864,8 @@ def test_interpolate_na(self, dtype):
                 unit_registry.dimensionless, DimensionalityError, id="dimensionless"
             ),
             pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
-            pytest.param(
-                unit_registry.cm,
-                None,
-                id="compatible_unit",
-                marks=pytest.mark.xfail(reason="depends on reindex"),
-            ),
-            pytest.param(
-                unit_registry.m,
-                None,
-                id="identical_unit",
-                marks=pytest.mark.xfail(reason="depends on reindex"),
-            ),
+            pytest.param(unit_registry.cm, None, id="compatible_unit",),
+            pytest.param(unit_registry.m, None, id="identical_unit",),
         ),
     )
     def test_combine_first(self, unit, error, dtype):
@@ -2807,7 +2894,8 @@ def test_combine_first(self, unit, error, dtype):
         )
         actual = data_array.combine_first(other)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.parametrize(
         "unit",
@@ -2829,7 +2917,17 @@ def test_combine_first(self, unit, error, dtype):
             "coords",
         ),
     )
-    @pytest.mark.parametrize("func", (method("equals"), method("identical")), ids=repr)
+    @pytest.mark.parametrize(
+        "func",
+        (
+            method("equals"),
+            pytest.param(
+                method("identical"),
+                marks=pytest.mark.skip(reason="the behavior of identical is undecided"),
+            ),
+        ),
+        ids=repr,
+    )
     def test_comparisons(self, func, variation, unit, dtype):
         def is_compatible(a, b):
             a = a if a is not None else 1
@@ -2903,7 +3001,8 @@ def test_broadcast_like(self, unit, dtype):
         )
         actual = arr1.broadcast_like(arr2)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.parametrize(
         "unit",
@@ -2950,7 +3049,6 @@ def test_broadcast_equals(self, unit, dtype):
             method("reset_coords", names="x2"),
             method("copy"),
             method("astype", np.float32),
-            method("item", 1),
         ),
         ids=repr,
     )
@@ -2978,7 +3076,8 @@ def test_content_manipulation(self, func, dtype):
         expected = attach_units(func(strip_units(data_array), **stripped_kwargs), units)
         actual = func(data_array)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.parametrize(
         "func", (pytest.param(method("copy", data=np.arange(20))),), ids=repr
@@ -3004,7 +3103,9 @@ def test_content_manipulation_with_units(self, func, unit, dtype):
         )
 
         actual = func(data_array, **kwargs)
-        assert_equal_with_units(expected, actual)
+
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.parametrize(
         "indices",
@@ -3024,7 +3125,8 @@ def test_isel(self, indices, dtype):
         )
         actual = data_array.isel(x=indices)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.xfail(reason="indexes don't support units")
     @pytest.mark.parametrize(
@@ -3067,7 +3169,9 @@ def test_sel(self, raw_values, unit, error, dtype):
             extract_units(data_array),
         )
         actual = data_array.sel(x=values)
-        assert_equal_with_units(expected, actual)
+
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.xfail(reason="indexes don't support units")
     @pytest.mark.parametrize(
@@ -3110,7 +3214,9 @@ def test_loc(self, raw_values, unit, error, dtype):
             extract_units(data_array),
         )
         actual = data_array.loc[{"x": values}]
-        assert_equal_with_units(expected, actual)
+
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.xfail(reason="indexes don't support units")
     @pytest.mark.parametrize(
@@ -3153,7 +3259,9 @@ def test_drop_sel(self, raw_values, unit, error, dtype):
             extract_units(data_array),
         )
         actual = data_array.drop_sel(x=values)
-        assert_equal_with_units(expected, actual)
+
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.parametrize(
         "shape",
@@ -3181,7 +3289,9 @@ def test_squeeze(self, shape, dtype):
             strip_units(data_array).squeeze(), extract_units(data_array)
         )
         actual = data_array.squeeze()
-        assert_equal_with_units(expected, actual)
+
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
         # try squeezing the dimensions separately
         names = tuple(dim for dim, coord in coords.items() if len(coord) == 1)
@@ -3190,7 +3300,9 @@ def test_squeeze(self, shape, dtype):
                 strip_units(data_array).squeeze(dim=name), extract_units(data_array)
             )
             actual = data_array.squeeze(dim=name)
-            assert_equal_with_units(expected, actual)
+
+            assert_units_equal(expected, actual)
+            xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
@@ -3212,49 +3324,46 @@ def test_head_tail_thin(self, func, dtype):
         )
         actual = func(data_array)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
-    @pytest.mark.xfail(reason="indexes don't support units")
+    # TODO: remove once pint==0.12 has been released
+    @pytest.mark.xfail(
+        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
+    )
+    @pytest.mark.parametrize("variant", ("data", "coords"))
     @pytest.mark.parametrize(
-        "unit,error",
+        "func",
         (
-            pytest.param(1, DimensionalityError, id="no_unit"),
             pytest.param(
-                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+                method("interp"), marks=pytest.mark.xfail(reason="uses scipy")
             ),
-            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
-            pytest.param(unit_registry.cm, None, id="compatible_unit"),
-            pytest.param(unit_registry.m, None, id="identical_unit"),
+            method("reindex"),
         ),
+        ids=repr,
     )
-    def test_interp(self, unit, error):
-        array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
-        new_coords = (np.arange(10) + 0.5) * unit
-        coords = {
-            "x": np.arange(10) * unit_registry.m,
-            "y": np.arange(5) * unit_registry.m,
+    def test_interp_reindex(self, variant, func, dtype):
+        variants = {
+            "data": (unit_registry.m, 1),
+            "coords": (1, unit_registry.m),
         }
+        data_unit, coord_unit = variants.get(variant)
 
-        data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
+        array = np.linspace(1, 2, 10).astype(dtype) * data_unit
+        y = np.arange(10) * coord_unit
 
-        if error is not None:
-            with pytest.raises(error):
-                data_array.interp(x=new_coords)
-
-            return
+        x = np.arange(10)
+        new_x = np.arange(10) + 0.5
+        data_array = xr.DataArray(array, coords={"x": x, "y": ("x", y)}, dims="x")
 
         units = extract_units(data_array)
-        expected = attach_units(
-            strip_units(data_array).interp(
-                x=strip_units(convert_units(new_coords, {None: unit_registry.m}))
-            ),
-            units,
-        )
-        actual = data_array.interp(x=new_coords)
+        expected = attach_units(func(strip_units(data_array), x=new_x), units)
+        actual = func(data_array, x=new_x)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_allclose(expected, actual)
 
-    @pytest.mark.xfail(reason="indexes strip units")
+    @pytest.mark.xfail(reason="indexes don't support units")
     @pytest.mark.parametrize(
         "unit,error",
         (
@@ -3267,79 +3376,70 @@ def test_interp(self, unit, error):
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
-    def test_interp_like(self, unit, error):
-        array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
-        coords = {
-            "x": (np.arange(10) + 0.3) * unit_registry.m,
-            "y": (np.arange(5) + 0.3) * unit_registry.m,
-        }
-
-        data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
-        other = xr.DataArray(
-            data=np.empty((20, 10)) * unit_registry.degK,
-            coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit},
-            dims=("x", "y"),
-        )
+    @pytest.mark.parametrize(
+        "func", (method("interp"), method("reindex")), ids=repr,
+    )
+    def test_interp_reindex_indexing(self, func, unit, error, dtype):
+        array = np.linspace(1, 2, 10).astype(dtype)
+        x = np.arange(10) * unit_registry.m
+        new_x = (np.arange(10) + 0.5) * unit
+        data_array = xr.DataArray(array, coords={"x": x}, dims="x")
 
         if error is not None:
             with pytest.raises(error):
-                data_array.interp_like(other)
+                func(data_array, x=new_x)
 
             return
 
         units = extract_units(data_array)
         expected = attach_units(
-            strip_units(data_array).interp_like(
-                strip_units(convert_units(other, units))
+            func(
+                strip_units(data_array),
+                x=strip_units(convert_units(new_x, {None: unit_registry.m})),
             ),
             units,
         )
-        actual = data_array.interp_like(other)
+        actual = func(data_array, x=new_x)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
-    @pytest.mark.xfail(reason="indexes don't support units")
+    # TODO: remove once pint==0.12 has been released
+    @pytest.mark.xfail(
+        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
+    )
+    @pytest.mark.parametrize("variant", ("data", "coords"))
     @pytest.mark.parametrize(
-        "unit,error",
+        "func",
         (
-            pytest.param(1, DimensionalityError, id="no_unit"),
             pytest.param(
-                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+                method("interp_like"), marks=pytest.mark.xfail(reason="uses scipy")
             ),
-            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
-            pytest.param(unit_registry.cm, None, id="compatible_unit"),
-            pytest.param(unit_registry.m, None, id="identical_unit"),
+            method("reindex_like"),
         ),
+        ids=repr,
     )
-    def test_reindex(self, unit, error, dtype):
-        array = (
-            np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
-        )
-        new_coords = (np.arange(10) + 0.5) * unit
-        coords = {
-            "x": np.arange(10) * unit_registry.m,
-            "y": np.arange(5) * unit_registry.m,
+    def test_interp_reindex_like(self, variant, func, dtype):
+        variants = {
+            "data": (unit_registry.m, 1),
+            "coords": (1, unit_registry.m),
         }
+        data_unit, coord_unit = variants.get(variant)
 
-        data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
-        func = method("reindex")
-
-        if error is not None:
-            with pytest.raises(error):
-                func(data_array, x=new_coords)
+        array = np.linspace(1, 2, 10).astype(dtype) * data_unit
+        coord = np.arange(10) * coord_unit
 
-            return
+        x = np.arange(10)
+        new_x = np.arange(-2, 2) + 0.5
+        data_array = xr.DataArray(array, coords={"x": x, "y": ("x", coord)}, dims="x")
+        other = xr.DataArray(np.empty_like(new_x), coords={"x": new_x}, dims="x")
 
-        expected = attach_units(
-            func(
-                strip_units(data_array),
-                x=strip_units(convert_units(new_coords, {None: unit_registry.m})),
-            ),
-            {None: unit_registry.degK},
-        )
-        actual = func(data_array, x=new_coords)
+        units = extract_units(data_array)
+        expected = attach_units(func(strip_units(data_array), other), units)
+        actual = func(data_array, other)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_allclose(expected, actual)
 
     @pytest.mark.xfail(reason="indexes don't support units")
     @pytest.mark.parametrize(
@@ -3354,38 +3454,35 @@ def test_reindex(self, unit, error, dtype):
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
-    def test_reindex_like(self, unit, error, dtype):
-        array = (
-            np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
-        )
-        coords = {
-            "x": (np.arange(10) + 0.3) * unit_registry.m,
-            "y": (np.arange(5) + 0.3) * unit_registry.m,
-        }
+    @pytest.mark.parametrize(
+        "func", (method("interp_like"), method("reindex_like")), ids=repr,
+    )
+    def test_interp_reindex_like_indexing(self, func, unit, error, dtype):
+        array = np.linspace(1, 2, 10).astype(dtype)
+        x = np.arange(10) * unit_registry.m
+        new_x = (np.arange(-2, 2) + 0.5) * unit
 
-        data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
-        other = xr.DataArray(
-            data=np.empty((20, 10)) * unit_registry.degK,
-            coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit},
-            dims=("x", "y"),
-        )
+        data_array = xr.DataArray(array, coords={"x": x}, dims="x")
+        other = xr.DataArray(np.empty_like(new_x), {"x": new_x}, dims="x")
 
         if error is not None:
             with pytest.raises(error):
-                data_array.reindex_like(other)
+                func(data_array, other)
 
             return
 
         units = extract_units(data_array)
         expected = attach_units(
-            strip_units(data_array).reindex_like(
-                strip_units(convert_units(other, units))
+            func(
+                strip_units(data_array),
+                strip_units(convert_units(other, {None: unit_registry.m})),
             ),
             units,
         )
-        actual = data_array.reindex_like(other)
+        actual = func(data_array, other)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
@@ -3407,7 +3504,8 @@ def test_stacking_stacked(self, func, dtype):
         expected = attach_units(func(strip_units(stacked)), {"data": unit_registry.m})
         actual = func(stacked)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.xfail(reason="indexes don't support units")
     def test_to_unstacked_dataset(self, dtype):
@@ -3430,7 +3528,8 @@ def test_to_unstacked_dataset(self, dtype):
         ).rename({elem.magnitude: elem for elem in x})
         actual = func(data_array)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
@@ -3438,9 +3537,7 @@ def test_to_unstacked_dataset(self, dtype):
             method("transpose", "y", "x", "z"),
             method("stack", a=("x", "y")),
             method("set_index", x="x2"),
-            pytest.param(
-                method("shift", x=2), marks=pytest.mark.xfail(reason="strips units")
-            ),
+            method("shift", x=2),
             method("roll", x=2, roll_coords=False),
             method("sortby", "x2"),
         ),
@@ -3466,7 +3563,8 @@ def test_stacking_reordering(self, func, dtype):
         expected = attach_units(func(strip_units(data_array)), {None: unit_registry.m})
         actual = func(data_array)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
@@ -3476,16 +3574,13 @@ def test_stacking_reordering(self, func, dtype):
             method("integrate", dim="x"),
             pytest.param(
                 method("quantile", q=[0.25, 0.75]),
-                marks=pytest.mark.xfail(reason="nanquantile not implemented"),
-            ),
-            method("reduce", func=np.sum, dim="x"),
-            pytest.param(
-                lambda x: x.dot(x),
-                id="method_dot",
                 marks=pytest.mark.xfail(
-                    reason="pint does not implement the dot method"
+                    LooseVersion(pint.__version__) <= "0.12",
+                    reason="quantile / nanquantile not implemented yet",
                 ),
             ),
+            method("reduce", func=np.sum, dim="x"),
+            pytest.param(lambda x: x.dot(x), id="method_dot"),
         ),
         ids=repr,
     )
@@ -3512,8 +3607,13 @@ def test_computation(self, func, dtype):
         expected = attach_units(func(strip_units(data_array)), units)
         actual = func(data_array)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
+    # TODO: remove once pint==0.12 has been released
+    @pytest.mark.xfail(
+        LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose"
+    )
     @pytest.mark.parametrize(
         "func",
         (
@@ -3522,11 +3622,15 @@ def test_computation(self, func, dtype):
             method("coarsen", y=2),
             pytest.param(
                 method("rolling", y=3),
-                marks=pytest.mark.xfail(reason="rolling strips units"),
+                marks=pytest.mark.xfail(
+                    reason="numpy.lib.stride_tricks.as_strided converts to ndarray"
+                ),
             ),
             pytest.param(
                 method("rolling_exp", y=3),
-                marks=pytest.mark.xfail(reason="units not supported by numbagg"),
+                marks=pytest.mark.xfail(
+                    reason="numbagg functions are not supported by pint"
+                ),
             ),
         ),
         ids=repr,
@@ -3545,7 +3649,8 @@ def test_computation_objects(self, func, dtype):
         expected = attach_units(func(strip_units(data_array)).mean(), units)
         actual = func(data_array).mean()
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_allclose(expected, actual)
 
     def test_resample(self, dtype):
         array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
@@ -3559,7 +3664,8 @@ def test_resample(self, dtype):
         expected = attach_units(func(strip_units(data_array)).mean(), units)
         actual = func(data_array).mean()
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
@@ -3569,7 +3675,10 @@ def test_resample(self, dtype):
             method("last"),
             pytest.param(
                 method("quantile", q=[0.25, 0.5, 0.75], dim="x"),
-                marks=pytest.mark.xfail(reason="nanquantile not implemented"),
+                marks=pytest.mark.xfail(
+                    LooseVersion(pint.__version__) <= "0.12",
+                    reason="quantile / nanquantile not implemented yet",
+                ),
             ),
         ),
         ids=repr,
@@ -3598,18 +3707,20 @@ def test_grouped_operations(self, func, dtype):
         )
         actual = func(data_array.groupby("y"))
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        xr.testing.assert_identical(expected, actual)
 
 
+@pytest.mark.filterwarnings("error::pint.UnitStrippedWarning")
 class TestDataset:
     @pytest.mark.parametrize(
         "unit,error",
         (
-            pytest.param(1, DimensionalityError, id="no_unit"),
+            pytest.param(1, xr.MergeError, id="no_unit"),
             pytest.param(
-                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+                unit_registry.dimensionless, xr.MergeError, id="dimensionless"
             ),
-            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+            pytest.param(unit_registry.s, xr.MergeError, id="incompatible_unit"),
             pytest.param(unit_registry.mm, None, id="compatible_unit"),
             pytest.param(unit_registry.m, None, id="same_unit"),
         ),
@@ -3618,11 +3729,10 @@ class TestDataset:
         "shared",
         (
             "nothing",
-            pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
             pytest.param(
-                "coords",
-                marks=pytest.mark.xfail(reason="reindex does not work with pint yet"),
+                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
             ),
+            "coords",
         ),
     )
     def test_init(self, shared, unit, error, dtype):
@@ -3630,60 +3740,53 @@ def test_init(self, shared, unit, error, dtype):
         scaled_unit = unit_registry.mm
 
         a = np.linspace(0, 1, 10).astype(dtype) * unit_registry.Pa
-        b = np.linspace(-1, 0, 12).astype(dtype) * unit_registry.Pa
-
-        raw_x = np.arange(a.shape[0])
-        x = raw_x * original_unit
-        x2 = x.to(scaled_unit)
-
-        raw_y = np.arange(b.shape[0])
-        y = raw_y * unit
-        y_units = unit if isinstance(y, unit_registry.Quantity) else None
-        if isinstance(y, unit_registry.Quantity):
-            if y.check(scaled_unit):
-                y2 = y.to(scaled_unit)
-            else:
-                y2 = y * 1000
-            y2_units = y2.units
-        else:
-            y2 = y * 1000
-            y2_units = None
+        b = np.linspace(-1, 0, 10).astype(dtype) * unit_registry.degK
+
+        values_a = np.arange(a.shape[0])
+        dim_a = values_a * original_unit
+        coord_a = dim_a.to(scaled_unit)
+
+        values_b = np.arange(b.shape[0])
+        dim_b = values_b * unit
+        coord_b = (
+            dim_b.to(scaled_unit)
+            if unit_registry.is_compatible_with(dim_b, scaled_unit)
+            and unit != scaled_unit
+            else dim_b * 1000
+        )
 
         variants = {
-            "nothing": ({"x": x, "x2": ("x", x2)}, {"y": y, "y2": ("y", y2)}),
-            "dims": (
-                {"x": x, "x2": ("x", strip_units(x2))},
-                {"x": y, "y2": ("x", strip_units(y2))},
+            "nothing": ({}, {}),
+            "dims": ({"x": dim_a}, {"x": dim_b}),
+            "coords": (
+                {"x": values_a, "y": ("x", coord_a)},
+                {"x": values_b, "y": ("x", coord_b)},
             ),
-            "coords": ({"x": raw_x, "y": ("x", x2)}, {"x": raw_y, "y": ("x", y2)}),
         }
         coords_a, coords_b = variants.get(shared)
 
         dims_a, dims_b = ("x", "y") if shared == "nothing" else ("x", "x")
 
-        arr1 = xr.DataArray(data=a, coords=coords_a, dims=dims_a)
-        arr2 = xr.DataArray(data=b, coords=coords_b, dims=dims_b)
+        a = xr.DataArray(data=a, coords=coords_a, dims=dims_a)
+        b = xr.DataArray(data=b, coords=coords_b, dims=dims_b)
+
         if error is not None and shared != "nothing":
             with pytest.raises(error):
-                xr.Dataset(data_vars={"a": arr1, "b": arr2})
+                xr.Dataset(data_vars={"a": a, "b": b})
 
             return
 
-        actual = xr.Dataset(data_vars={"a": arr1, "b": arr2})
+        actual = xr.Dataset(data_vars={"a": a, "b": b})
 
-        expected_units = {
-            "a": a.units,
-            "b": b.units,
-            "x": x.units,
-            "x2": x2.units,
-            "y": y_units,
-            "y2": y2_units,
-        }
+        units = merge_mappings(
+            extract_units(a.rename("a")), extract_units(b.rename("b"))
+        )
         expected = attach_units(
-            xr.Dataset(data_vars={"a": strip_units(arr1), "b": strip_units(arr2)}),
-            expected_units,
+            xr.Dataset(data_vars={"a": strip_units(a), "b": strip_units(b)}), units
         )
-        assert_equal_with_units(actual, expected)
+
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.parametrize(
         "func", (pytest.param(str, id="str"), pytest.param(repr, id="repr"))
@@ -3691,79 +3794,79 @@ def test_init(self, shared, unit, error, dtype):
     @pytest.mark.parametrize(
         "variant",
         (
+            "data",
             pytest.param(
-                "with_dims",
+                "dims",
                 marks=pytest.mark.xfail(reason="units in indexes are not supported"),
             ),
-            pytest.param("with_coords"),
-            pytest.param("without_coords"),
+            "coords",
         ),
     )
-    @pytest.mark.filterwarnings("error:::pint[.*]")
     def test_repr(self, func, variant, dtype):
-        array1 = np.linspace(1, 2, 10, dtype=dtype) * unit_registry.Pa
-        array2 = np.linspace(0, 1, 10, dtype=dtype) * unit_registry.degK
+        unit1, unit2 = (
+            (unit_registry.Pa, unit_registry.degK) if variant == "data" else (1, 1)
+        )
+
+        array1 = np.linspace(1, 2, 10, dtype=dtype) * unit1
+        array2 = np.linspace(0, 1, 10, dtype=dtype) * unit2
 
         x = np.arange(len(array1)) * unit_registry.s
         y = x.to(unit_registry.ms)
 
         variants = {
-            "with_dims": {"x": x},
-            "with_coords": {"y": ("x", y)},
-            "without_coords": {},
+            "dims": {"x": x},
+            "coords": {"y": ("x", y)},
+            "data": {},
         }
 
-        data_array = xr.Dataset(
+        ds = xr.Dataset(
             data_vars={"a": ("x", array1), "b": ("x", array2)},
             coords=variants.get(variant),
         )
 
         # FIXME: this just checks that the repr does not raise
         # warnings or errors, but does not check the result
-        func(data_array)
+        func(ds)
 
     @pytest.mark.parametrize(
         "func",
         (
+            function("all"),
+            function("any"),
             pytest.param(
-                function("all"),
-                marks=pytest.mark.xfail(reason="not implemented by pint"),
+                function("argmax"),
+                marks=pytest.mark.skip(
+                    reason="calling np.argmax as a function on xarray objects is not "
+                    "supported"
+                ),
             ),
             pytest.param(
-                function("any"),
-                marks=pytest.mark.xfail(reason="not implemented by pint"),
+                function("argmin"),
+                marks=pytest.mark.skip(
+                    reason="calling np.argmin as a function on xarray objects is not "
+                    "supported"
+                ),
             ),
-            function("argmax"),
-            function("argmin"),
             function("max"),
             function("min"),
             function("mean"),
             pytest.param(
                 function("median"),
-                marks=pytest.mark.xfail(
-                    reason="np.median does not work with dataset yet"
-                ),
+                marks=pytest.mark.xfail(reason="median does not work with dataset yet"),
             ),
             function("sum"),
             pytest.param(
                 function("prod"),
-                marks=pytest.mark.xfail(reason="not implemented by pint"),
+                marks=pytest.mark.xfail(reason="prod does not work with dataset yet"),
             ),
             function("std"),
             function("var"),
             function("cumsum"),
-            pytest.param(
-                function("cumprod"),
-                marks=pytest.mark.xfail(reason="fails within xarray"),
-            ),
-            pytest.param(
-                method("all"), marks=pytest.mark.xfail(reason="not implemented by pint")
-            ),
-            pytest.param(
-                method("any"), marks=pytest.mark.xfail(reason="not implemented by pint")
-            ),
-            method("argmax"),
-            method("argmin"),
+            function("cumprod"),
+            method("all"),
+            method("any"),
+            method("argmax", dim="x"),
+            method("argmin", dim="x"),
             method("max"),
             method("min"),
             method("mean"),
@@ -3771,68 +3874,64 @@ def test_repr(self, func, variant, dtype):
             method("sum"),
             pytest.param(
                 method("prod"),
-                marks=pytest.mark.xfail(reason="not implemented by pint"),
+                marks=pytest.mark.xfail(reason="prod does not work with dataset yet"),
             ),
             method("std"),
             method("var"),
             method("cumsum"),
-            pytest.param(
-                method("cumprod"), marks=pytest.mark.xfail(reason="fails within xarray")
-            ),
+            method("cumprod"),
         ),
         ids=repr,
     )
     def test_aggregation(self, func, dtype):
-        unit_a = (
-            unit_registry.Pa if func.name != "cumprod" else unit_registry.dimensionless
-        )
-        unit_b = (
-            unit_registry.kg / unit_registry.m ** 3
+        unit_a, unit_b = (
+            (unit_registry.Pa, unit_registry.degK)
             if func.name != "cumprod"
-            else unit_registry.dimensionless
-        )
-        a = xr.DataArray(data=np.linspace(0, 1, 10).astype(dtype) * unit_a, dims="x")
-        b = xr.DataArray(data=np.linspace(-1, 0, 10).astype(dtype) * unit_b, dims="x")
-        x = xr.DataArray(data=np.arange(10).astype(dtype) * unit_registry.m, dims="x")
-        y = xr.DataArray(
-            data=np.arange(10, 20).astype(dtype) * unit_registry.s, dims="x"
+            else (unit_registry.dimensionless, unit_registry.dimensionless)
         )
 
-        ds = xr.Dataset(data_vars={"a": a, "b": b}, coords={"x": x, "y": y})
+        a = np.linspace(0, 1, 10).astype(dtype) * unit_a
+        b = np.linspace(-1, 0, 10).astype(dtype) * unit_b
+
+        ds = xr.Dataset({"a": ("x", a), "b": ("x", b)})
+
+        if "dim" in func.kwargs:
+            numpy_kwargs = func.kwargs.copy()
+            dim = numpy_kwargs.pop("dim")
+
+            axis_a = ds.a.get_axis_num(dim)
+            axis_b = ds.b.get_axis_num(dim)
+
+            numpy_kwargs_a = numpy_kwargs.copy()
+            numpy_kwargs_a["axis"] = axis_a
+            numpy_kwargs_b = numpy_kwargs.copy()
+            numpy_kwargs_b["axis"] = axis_b
+        else:
+            numpy_kwargs_a = {}
+            numpy_kwargs_b = {}
+
+        units_a = array_extract_units(func(a, **numpy_kwargs_a))
+        units_b = array_extract_units(func(b, **numpy_kwargs_b))
+        units = {"a": units_a, "b": units_b}
 
         actual = func(ds)
-        expected = attach_units(
-            func(strip_units(ds)),
-            {
-                "a": extract_units(func(a)).get(None),
-                "b": extract_units(func(b)).get(None),
-            },
-        )
+        expected = attach_units(func(strip_units(ds)), units)
 
-        assert_equal_with_units(actual, expected)
+        assert_units_equal(expected, actual)
+        assert_allclose(expected, actual)
 
     @pytest.mark.parametrize("property", ("imag", "real"))
     def test_numpy_properties(self, property, dtype):
-        ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(
-                    data=np.linspace(0, 1, 10) * unit_registry.Pa, dims="x"
-                ),
-                "b": xr.DataArray(
-                    data=np.linspace(-1, 0, 15) * unit_registry.Pa, dims="y"
-                ),
-            },
-            coords={
-                "x": np.arange(10) * unit_registry.m,
-                "y": np.arange(15) * unit_registry.s,
-            },
-        )
+        a = np.linspace(0, 1, 10) * unit_registry.Pa
+        b = np.linspace(-1, 0, 15) * unit_registry.degK
+        ds = xr.Dataset({"a": ("x", a), "b": ("y", b)})
         units = extract_units(ds)
 
         actual = getattr(ds, property)
         expected = attach_units(getattr(strip_units(ds), property), units)
 
-        assert_equal_with_units(actual, expected)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
@@ -3846,31 +3945,19 @@ def test_numpy_properties(self, property, dtype):
         ids=repr,
     )
     def test_numpy_methods(self, func, dtype):
-        ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(
-                    data=np.linspace(1, -1, 10) * unit_registry.Pa, dims="x"
-                ),
-                "b": xr.DataArray(
-                    data=np.linspace(-1, 1, 15) * unit_registry.Pa, dims="y"
-                ),
-            },
-            coords={
-                "x": np.arange(10) * unit_registry.m,
-                "y": np.arange(15) * unit_registry.s,
-            },
-        )
-        units = {
-            "a": array_extract_units(func(ds.a)),
-            "b": array_extract_units(func(ds.b)),
-            "x": unit_registry.m,
-            "y": unit_registry.s,
-        }
+        a = np.linspace(1, -1, 10) * unit_registry.Pa
+        b = np.linspace(-1, 1, 15) * unit_registry.degK
+        ds = xr.Dataset({"a": ("x", a), "b": ("y", b)})
+
+        units_a = array_extract_units(func(a))
+        units_b = array_extract_units(func(b))
+        units = {"a": units_a, "b": units_b}
 
         actual = func(ds)
         expected = attach_units(func(strip_units(ds)), units)
 
-        assert_equal_with_units(actual, expected)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.parametrize("func", (method("clip", min=3, max=8),), ids=repr)
     @pytest.mark.parametrize(
@@ -3887,21 +3974,13 @@ def test_numpy_methods(self, func, dtype):
     )
     def test_numpy_methods_with_args(self, func, unit, error, dtype):
         data_unit = unit_registry.m
-        ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=np.arange(10) * data_unit, dims="x"),
-                "b": xr.DataArray(data=np.arange(15) * data_unit, dims="y"),
-            },
-            coords={
-                "x": np.arange(10) * unit_registry.m,
-                "y": np.arange(15) * unit_registry.s,
-            },
-        )
+        a = np.linspace(0, 10, 15) * unit_registry.m
+        b = np.linspace(-2, 12, 20) * unit_registry.m
+        ds = xr.Dataset({"a": ("x", a), "b": ("y", b)})
         units = extract_units(ds)
 
         kwargs = {
-            key: (value * unit if isinstance(value, (int, float)) else value)
-            for key, value in func.kwargs.items()
+            key: array_attach_units(value, unit) for key, value in func.kwargs.items()
         }
 
         if error is not None:
@@ -3918,7 +3997,8 @@ def test_numpy_methods_with_args(self, func, unit, error, dtype):
         actual = func(ds, **kwargs)
         expected = attach_units(func(strip_units(ds), **stripped_kwargs), units)
 
-        assert_equal_with_units(actual, expected)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.parametrize(
         "func", (method("isnull"), method("notnull"), method("count")), ids=repr
@@ -3948,22 +4028,13 @@ def test_missing_value_detection(self, func, dtype):
             * unit_registry.Pa
         )
 
-        x = np.arange(array1.shape[0]) * unit_registry.m
-        y = np.arange(array1.shape[1]) * unit_registry.m
-        z = np.arange(array2.shape[0]) * unit_registry.m
-
-        ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=array1, dims=("x", "y")),
-                "b": xr.DataArray(data=array2, dims=("z", "x")),
-            },
-            coords={"x": x, "y": y, "z": z},
-        )
+        ds = xr.Dataset({"a": (("x", "y"), array1), "b": (("z", "x"), array2)})
 
         expected = func(strip_units(ds))
         actual = func(ds)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.xfail(reason="ffill and bfill lose the unit")
     @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr)
@@ -3977,23 +4048,14 @@ def test_missing_value_filling(self, func, dtype):
             * unit_registry.Pa
         )
 
-        x = np.arange(len(array1))
-
-        ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=array1, dims="x"),
-                "b": xr.DataArray(data=array2, dims="x"),
-            },
-            coords={"x": x},
-        )
+        ds = xr.Dataset({"a": ("x", array1), "b": ("y", array2)})
+        units = extract_units(ds)
 
-        expected = attach_units(
-            func(strip_units(ds), dim="x"),
-            {"a": unit_registry.degK, "b": unit_registry.Pa},
-        )
+        expected = attach_units(func(strip_units(ds), dim="x"), units)
         actual = func(ds, dim="x")
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.parametrize(
         "unit,error",
@@ -4003,14 +4065,7 @@ def test_missing_value_filling(self, func, dtype):
                 unit_registry.dimensionless, DimensionalityError, id="dimensionless"
             ),
             pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
-            pytest.param(
-                unit_registry.cm,
-                None,
-                id="compatible_unit",
-                marks=pytest.mark.xfail(
-                    reason="where converts the array, not the fill value"
-                ),
-            ),
+            pytest.param(unit_registry.cm, None, id="compatible_unit",),
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
@@ -4031,30 +4086,26 @@ def test_fillna(self, fill_value, unit, error, dtype):
             np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype)
             * unit_registry.m
         )
-        ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=array1, dims="x"),
-                "b": xr.DataArray(data=array2, dims="x"),
-            }
-        )
+        ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)})
+        value = fill_value * unit
+        units = extract_units(ds)
 
         if error is not None:
             with pytest.raises(error):
-                ds.fillna(value=fill_value * unit)
+                ds.fillna(value=value)
 
             return
 
-        actual = ds.fillna(value=fill_value * unit)
+        actual = ds.fillna(value=value)
         expected = attach_units(
             strip_units(ds).fillna(
-                value=strip_units(
-                    convert_units(fill_value * unit, {None: unit_registry.m})
-                )
+                value=strip_units(convert_units(value, {None: unit_registry.m}))
             ),
-            {"a": unit_registry.m, "b": unit_registry.m},
+            units,
         )
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     def test_dropna(self, dtype):
         array1 = (
@@ -4065,22 +4116,14 @@ def test_dropna(self, dtype):
             np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype)
             * unit_registry.Pa
         )
-        x = np.arange(len(array1))
-        ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=array1, dims="x"),
-                "b": xr.DataArray(data=array2, dims="x"),
-            },
-            coords={"x": x},
-        )
+        ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)})
+        units = extract_units(ds)
 
-        expected = attach_units(
-            strip_units(ds).dropna(dim="x"),
-            {"a": unit_registry.degK, "b": unit_registry.Pa},
-        )
+        expected = attach_units(strip_units(ds).dropna(dim="x"), units)
         actual = ds.dropna(dim="x")
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.parametrize(
         "unit",
@@ -4101,34 +4144,28 @@ def test_isin(self, unit, dtype):
             np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype)
             * unit_registry.m
         )
-        x = np.arange(len(array1))
-        ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=array1, dims="x"),
-                "b": xr.DataArray(data=array2, dims="x"),
-            },
-            coords={"x": x},
-        )
+        ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)})
 
         raw_values = np.array([1.4, np.nan, 2.3]).astype(dtype)
         values = raw_values * unit
 
-        if (
-            isinstance(values, unit_registry.Quantity)
-            and values.check(unit_registry.m)
-            and unit != unit_registry.m
-        ):
-            raw_values = values.to(unit_registry.m).magnitude
+        converted_values = (
+            convert_units(values, {None: unit_registry.m})
+            if is_compatible(unit, unit_registry.m)
+            else values
+        )
 
-        expected = strip_units(ds).isin(raw_values)
-        if not isinstance(values, unit_registry.Quantity) or not values.check(
-            unit_registry.m
-        ):
+        expected = strip_units(ds).isin(strip_units(converted_values))
+        # TODO: use `unit_registry.is_compatible_with(unit, unit_registry.m)` instead.
+        # Needs `pint>=0.12.1`, though, so we probably should wait until that is released.
+        if not is_compatible(unit, unit_registry.m):
             expected.a[:] = False
             expected.b[:] = False
+
         actual = ds.isin(values)
 
-        assert_equal_with_units(actual, expected)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.parametrize(
         "variant", ("masking", "replacing_scalar", "replacing_array", "dropping")
@@ -4150,13 +4187,8 @@ def test_where(self, variant, unit, error, dtype):
         array1 = np.linspace(0, 1, 10).astype(dtype) * original_unit
         array2 = np.linspace(-1, 0, 10).astype(dtype) * original_unit
 
-        ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=array1, dims="x"),
-                "b": xr.DataArray(data=array2, dims="x"),
-            },
-            coords={"x": np.arange(len(array1))},
-        )
+        ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)})
+        units = extract_units(ds)
 
         condition = ds < 0.5 * original_unit
         other = np.linspace(-2, -1, 10).astype(dtype) * unit
@@ -4178,15 +4210,13 @@ def test_where(self, variant, unit, error, dtype):
             for key, value in kwargs.items()
         }
 
-        expected = attach_units(
-            strip_units(ds).where(**kwargs_without_units),
-            {"a": original_unit, "b": original_unit},
-        )
+        expected = attach_units(strip_units(ds).where(**kwargs_without_units), units,)
         actual = ds.where(**kwargs)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
-    @pytest.mark.xfail(reason="interpolate strips units")
+    @pytest.mark.xfail(reason="interpolate_na uses numpy.vectorize")
     def test_interpolate_na(self, dtype):
         array1 = (
             np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype)
@@ -4196,24 +4226,15 @@ def test_interpolate_na(self, dtype):
             np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype)
             * unit_registry.Pa
         )
-        x = np.arange(len(array1))
-        ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=array1, dims="x"),
-                "b": xr.DataArray(data=array2, dims="x"),
-            },
-            coords={"x": x},
-        )
+        ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)})
+        units = extract_units(ds)
 
-        expected = attach_units(
-            strip_units(ds).interpolate_na(dim="x"),
-            {"a": unit_registry.degK, "b": unit_registry.Pa},
-        )
+        expected = attach_units(strip_units(ds).interpolate_na(dim="x"), units,)
         actual = ds.interpolate_na(dim="x")
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
-    @pytest.mark.xfail(reason="wrong argument order for `where`")
     @pytest.mark.parametrize(
         "unit,error",
         (
@@ -4226,31 +4247,40 @@ def test_interpolate_na(self, dtype):
             pytest.param(unit_registry.m, None, id="same_unit"),
         ),
     )
-    def test_combine_first(self, unit, error, dtype):
+    @pytest.mark.parametrize(
+        "variant",
+        (
+            "data",
+            pytest.param(
+                "dims", marks=pytest.mark.xfail(reason="indexes don't support units"),
+            ),
+        ),
+    )
+    def test_combine_first(self, variant, unit, error, dtype):
+        variants = {
+            "data": (unit_registry.m, unit, 1, 1),
+            "dims": (1, 1, unit_registry.m, unit),
+        }
+        data_unit, other_data_unit, dims_unit, other_dims_unit = variants.get(variant)
+
         array1 = (
-            np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype)
-            * unit_registry.m
+            np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) * data_unit
         )
         array2 = (
-            np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype)
-            * unit_registry.m
+            np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) * data_unit
         )
-        x = np.arange(len(array1))
+        x = np.arange(len(array1)) * dims_unit
         ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=array1, dims="x"),
-                "b": xr.DataArray(data=array2, dims="x"),
-            },
-            coords={"x": x},
+            data_vars={"a": ("x", array1), "b": ("x", array2)}, coords={"x": x},
         )
-        other_array1 = np.ones_like(array1) * unit
-        other_array2 = -1 * np.ones_like(array2) * unit
+        units = extract_units(ds)
+
+        other_array1 = np.ones_like(array1) * other_data_unit
+        other_array2 = np.full_like(array2, fill_value=-1) * other_data_unit
+        other_x = (np.arange(array1.shape[0]) + 5) * other_dims_unit
         other = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=other_array1, dims="x"),
-                "b": xr.DataArray(data=other_array2, dims="x"),
-            },
-            coords={"x": np.arange(array1.shape[0])},
+            data_vars={"a": ("x", other_array1), "b": ("x", other_array2)},
+            coords={"x": other_x},
         )
 
         if error is not None:
@@ -4260,16 +4290,13 @@ def test_combine_first(self, unit, error, dtype):
             return
 
         expected = attach_units(
-            strip_units(ds).combine_first(
-                strip_units(
-                    convert_units(other, {"a": unit_registry.m, "b": unit_registry.m})
-                )
-            ),
-            {"a": unit_registry.m, "b": unit_registry.m},
+            strip_units(ds).combine_first(strip_units(convert_units(other, units))),
+            units,
         )
         actual = ds.combine_first(other)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.parametrize(
         "unit",
@@ -4282,7 +4309,7 @@ def test_combine_first(self, unit, error, dtype):
         ),
     )
     @pytest.mark.parametrize(
-        "variation",
+        "variant",
         (
             "data",
             pytest.param(
@@ -4291,50 +4318,67 @@ def test_combine_first(self, unit, error, dtype):
             "coords",
         ),
     )
-    @pytest.mark.parametrize("func", (method("equals"), method("identical")), ids=repr)
-    def test_comparisons(self, func, variation, unit, dtype):
-        def is_compatible(a, b):
-            a = a if a is not None else 1
-            b = b if b is not None else 1
-            quantity = np.arange(5) * a
-
-            return a == b or quantity.check(b)
-
+    @pytest.mark.parametrize(
+        "func",
+        (
+            method("equals"),
+            pytest.param(
+                method("identical"),
+                marks=pytest.mark.skip("behaviour of identical is unclear"),
+            ),
+        ),
+        ids=repr,
+    )
+    def test_comparisons(self, func, variant, unit, dtype):
         array1 = np.linspace(0, 5, 10).astype(dtype)
         array2 = np.linspace(-5, 0, 10).astype(dtype)
 
         coord = np.arange(len(array1)).astype(dtype)
 
-        original_unit = unit_registry.m
-        quantity1 = array1 * original_unit
-        quantity2 = array2 * original_unit
-        x = coord * original_unit
-        y = coord * original_unit
+        variants = {
+            "data": (unit_registry.m, 1, 1),
+            "dims": (1, unit_registry.m, 1),
+            "coords": (1, 1, unit_registry.m),
+        }
+        data_unit, dim_unit, coord_unit = variants.get(variant)
 
-        units = {"data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit)}
-        data_unit, dim_unit, coord_unit = units.get(variation)
+        a = array1 * data_unit
+        b = array2 * data_unit
+        x = coord * dim_unit
+        y = coord * coord_unit
 
         ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=quantity1, dims="x"),
-                "b": xr.DataArray(data=quantity2, dims="x"),
-            },
-            coords={"x": x, "y": ("x", y)},
+            data_vars={"a": ("x", a), "b": ("x", b)}, coords={"x": x, "y": ("x", y)},
         )
+        units = extract_units(ds)
+
+        other_variants = {
+            "data": (unit, 1, 1),
+            "dims": (1, unit, 1),
+            "coords": (1, 1, unit),
+        }
+        other_data_unit, other_dim_unit, other_coord_unit = other_variants.get(variant)
 
         other_units = {
-            "a": data_unit if quantity1.check(data_unit) else None,
-            "b": data_unit if quantity2.check(data_unit) else None,
-            "x": dim_unit if x.check(dim_unit) else None,
-            "y": coord_unit if y.check(coord_unit) else None,
+            "a": other_data_unit,
+            "b": other_data_unit,
+            "x": other_dim_unit,
+            "y": other_coord_unit,
         }
-        other = attach_units(strip_units(convert_units(ds, other_units)), other_units)
 
-        units = extract_units(ds)
+        to_convert = {
+            key: unit if is_compatible(unit, reference) else None
+            for key, (unit, reference) in zip_mappings(units, other_units)
+        }
+        # convert units where possible, then attach all units to the converted dataset
+        other = attach_units(strip_units(convert_units(ds, to_convert)), other_units)
         other_units = extract_units(other)
 
+        # make sure all units are compatible and only then try to
+        # convert and compare values
         equal_ds = all(
-            is_compatible(units[name], other_units[name]) for name in units.keys()
+            is_compatible(unit, other_unit)
+            for _, (unit, other_unit) in zip_mappings(units, other_units)
         ) and (strip_units(ds).equals(strip_units(convert_units(other, units))))
         equal_units = units == other_units
         expected = equal_ds and (func.name != "identical" or equal_units)
@@ -4343,6 +4387,9 @@ def is_compatible(a, b):
 
         assert expected == actual
 
+    # TODO: eventually use another decorator / wrapper function that
+    # applies a filter to the parametrize combinations:
+    # we only need a single test for data
     @pytest.mark.parametrize(
         "unit",
         (
@@ -4353,14 +4400,29 @@ def is_compatible(a, b):
             pytest.param(unit_registry.m, id="identical_unit"),
         ),
     )
-    def test_broadcast_like(self, unit, dtype):
-        array1 = np.linspace(1, 2, 2 * 1).reshape(2, 1).astype(dtype) * unit_registry.Pa
-        array2 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * unit_registry.Pa
+    @pytest.mark.parametrize(
+        "variant",
+        (
+            "data",
+            pytest.param(
+                "dims", marks=pytest.mark.xfail(reason="indexes don't support units"),
+            ),
+        ),
+    )
+    def test_broadcast_like(self, variant, unit, dtype):
+        variants = {
+            "data": ((unit_registry.m, unit), (1, 1)),
+            "dims": ((1, 1), (unit_registry.m, unit)),
+        }
+        (data_unit1, data_unit2), (dim_unit1, dim_unit2) = variants.get(variant)
 
-        x1 = np.arange(2) * unit_registry.m
-        x2 = np.arange(2) * unit
-        y1 = np.array([0]) * unit_registry.m
-        y2 = np.arange(3) * unit
+        array1 = np.linspace(1, 2, 2 * 1).reshape(2, 1).astype(dtype) * data_unit1
+        array2 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * data_unit2
+
+        x1 = np.arange(2) * dim_unit1
+        x2 = np.arange(2) * dim_unit2
+        y1 = np.array([0]) * dim_unit1
+        y2 = np.arange(3) * dim_unit2
 
         ds1 = xr.Dataset(
             data_vars={"a": (("x", "y"), array1)}, coords={"x": x1, "y": y1}
@@ -4374,7 +4436,8 @@ def test_broadcast_like(self, unit, dtype):
         )
         actual = ds1.broadcast_like(ds2)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.parametrize(
         "unit",
@@ -4387,32 +4450,25 @@ def test_broadcast_like(self, unit, dtype):
         ),
     )
     def test_broadcast_equals(self, unit, dtype):
+        # TODO: does this use indexes?
         left_array1 = np.ones(shape=(2, 3), dtype=dtype) * unit_registry.m
         left_array2 = np.zeros(shape=(3, 6), dtype=dtype) * unit_registry.m
 
         right_array1 = np.ones(shape=(2,)) * unit
-        right_array2 = np.ones(shape=(3,)) * unit
+        right_array2 = np.zeros(shape=(3,)) * unit
 
         left = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=left_array1, dims=("x", "y")),
-                "b": xr.DataArray(data=left_array2, dims=("y", "z")),
-            }
-        )
-        right = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=right_array1, dims="x"),
-                "b": xr.DataArray(data=right_array2, dims="y"),
-            }
+            {"a": (("x", "y"), left_array1), "b": (("y", "z"), left_array2)},
         )
+        right = xr.Dataset({"a": ("x", right_array1), "b": ("y", right_array2)})
 
-        units = {
-            **extract_units(left),
-            **({} if left_array1.check(unit) else {"a": None, "b": None}),
-        }
-        expected = strip_units(left).broadcast_equals(
-            strip_units(convert_units(right, units))
-        ) & left_array1.check(unit)
+        units = merge_mappings(
+            extract_units(left),
+            {} if is_compatible(left_array1, unit) else {"a": None, "b": None},
+        )
+        expected = is_compatible(left_array1, unit) and strip_units(
+            left
+        ).broadcast_equals(strip_units(convert_units(right, units)))
         actual = left.broadcast_equals(right)
 
         assert expected == actual
@@ -4422,68 +4478,74 @@ def test_broadcast_equals(self, unit, dtype):
         (method("unstack"), method("reset_index", "v"), method("reorder_levels")),
         ids=repr,
     )
-    def test_stacking_stacked(self, func, dtype):
-        array1 = (
-            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
-        )
+    @pytest.mark.parametrize(
+        "variant",
+        (
+            "data",
+            pytest.param(
+                "dims", marks=pytest.mark.xfail(reason="indexes don't support units"),
+            ),
+        ),
+    )
+    def test_stacking_stacked(self, variant, func, dtype):
+        variants = {
+            "data": (unit_registry.m, 1),
+            "dims": (1, unit_registry.m),
+        }
+        data_unit, dim_unit = variants.get(variant)
+
+        array1 = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit
         array2 = (
             np.linspace(-10, 0, 5 * 10 * 15).reshape(5, 10, 15).astype(dtype)
-            * unit_registry.m
+            * data_unit
         )
 
-        x = np.arange(array1.shape[0])
-        y = np.arange(array1.shape[1])
-        z = np.arange(array2.shape[2])
+        x = np.arange(array1.shape[0]) * dim_unit
+        y = np.arange(array1.shape[1]) * dim_unit
+        z = np.arange(array2.shape[2]) * dim_unit
 
         ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=array1, dims=("x", "y")),
-                "b": xr.DataArray(data=array2, dims=("x", "y", "z")),
-            },
+            data_vars={"a": (("x", "y"), array1), "b": (("x", "y", "z"), array2)},
             coords={"x": x, "y": y, "z": z},
         )
+        units = extract_units(ds)
 
         stacked = ds.stack(v=("x", "y"))
 
-        expected = attach_units(
-            func(strip_units(stacked)), {"a": unit_registry.m, "b": unit_registry.m}
-        )
+        expected = attach_units(func(strip_units(stacked)), units)
         actual = func(stacked)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
-    @pytest.mark.xfail(reason="does not work with quantities yet")
+    @pytest.mark.xfail(
+        reason="stacked dimension's labels have to be hashable, but is a numpy.array"
+    )
     def test_to_stacked_array(self, dtype):
-        labels = np.arange(5).astype(dtype) * unit_registry.s
-        arrays = {name: np.linspace(0, 1, 10) * unit_registry.m for name in labels}
+        labels = range(5) * unit_registry.s
+        arrays = {
+            name: np.linspace(0, 1, 10).astype(dtype) * unit_registry.m
+            for name in labels
+        }
 
-        ds = xr.Dataset(
-            data_vars={
-                name: xr.DataArray(data=array, dims="x")
-                for name, array in arrays.items()
-            }
-        )
+        ds = xr.Dataset({name: ("x", array) for name, array in arrays.items()})
+        units = {None: unit_registry.m, "y": unit_registry.s}
 
         func = method("to_stacked_array", "z", variable_dim="y", sample_dims=["x"])
 
         actual = func(ds).rename(None)
-        expected = attach_units(
-            func(strip_units(ds)).rename(None),
-            {None: unit_registry.m, "y": unit_registry.s},
-        )
+        expected = attach_units(func(strip_units(ds)).rename(None), units,)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
         (
             method("transpose", "y", "x", "z1", "z2"),
-            method("stack", a=("x", "y")),
+            method("stack", u=("x", "y")),
             method("set_index", x="x2"),
-            pytest.param(
-                method("shift", x=2),
-                marks=pytest.mark.xfail(reason="tries to concatenate nan arrays"),
-            ),
+            method("shift", x=2),
             method("roll", x=2, roll_coords=False),
             method("sortby", "x2"),
         ),
@@ -4508,20 +4570,19 @@ def test_stacking_reordering(self, func, dtype):
 
         ds = xr.Dataset(
             data_vars={
-                "a": xr.DataArray(data=array1, dims=("x", "y", "z1")),
-                "b": xr.DataArray(data=array2, dims=("x", "y", "z2")),
+                "a": (("x", "y", "z1"), array1),
+                "b": (("x", "y", "z2"), array2),
             },
             coords={"x": x, "y": y, "z1": z1, "z2": z2, "x2": ("x", x2)},
         )
+        units = extract_units(ds)
 
-        expected = attach_units(
-            func(strip_units(ds)), {"a": unit_registry.Pa, "b": unit_registry.degK}
-        )
+        expected = attach_units(func(strip_units(ds)), units)
         actual = func(ds)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
-    @pytest.mark.xfail(reason="indexes strip units")
     @pytest.mark.parametrize(
         "indices",
         (
@@ -4533,22 +4594,14 @@ def test_isel(self, indices, dtype):
         array1 = np.arange(10).astype(dtype) * unit_registry.s
         array2 = np.linspace(0, 1, 10).astype(dtype) * unit_registry.Pa
 
-        x = np.arange(len(array1)) * unit_registry.m
-        ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=array1, dims="x"),
-                "b": xr.DataArray(data=array2, dims="x"),
-            },
-            coords={"x": x},
-        )
+        ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("x", array2)})
+        units = extract_units(ds)
 
-        expected = attach_units(
-            strip_units(ds).isel(x=indices),
-            {"a": unit_registry.s, "b": unit_registry.Pa, "x": unit_registry.m},
-        )
+        expected = attach_units(strip_units(ds).isel(x=indices), units)
         actual = ds.isel(x=indices)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.xfail(reason="indexes don't support units")
     @pytest.mark.parametrize(
@@ -4565,7 +4618,7 @@ def test_isel(self, indices, dtype):
             pytest.param(1, KeyError, id="no_units"),
             pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"),
             pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"),
-            pytest.param(unit_registry.dm, KeyError, id="compatible_unit"),
+            pytest.param(unit_registry.mm, KeyError, id="compatible_unit"),
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
@@ -4584,20 +4637,24 @@ def test_sel(self, raw_values, unit, error, dtype):
 
         values = raw_values * unit
 
-        if error is not None and not (
-            isinstance(raw_values, (int, float)) and x.check(unit)
-        ):
+        # TODO: if we choose dm as compatible unit, single value keys
+        # can be found. Should we check that?
+        if error is not None:
             with pytest.raises(error):
                 ds.sel(x=values)
 
             return
 
         expected = attach_units(
-            strip_units(ds).sel(x=strip_units(convert_units(values, {None: x.units}))),
-            {"a": array1.units, "b": array2.units, "x": x.units},
+            strip_units(ds).sel(
+                x=strip_units(convert_units(values, {None: unit_registry.m}))
+            ),
+            extract_units(ds),
         )
         actual = ds.sel(x=values)
-        assert_equal_with_units(expected, actual)
+
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.xfail(reason="indexes don't support units")
     @pytest.mark.parametrize(
@@ -4614,7 +4671,7 @@ def test_sel(self, raw_values, unit, error, dtype):
             pytest.param(1, KeyError, id="no_units"),
             pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"),
             pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"),
-            pytest.param(unit_registry.dm, KeyError, id="compatible_unit"),
+            pytest.param(unit_registry.mm, KeyError, id="compatible_unit"),
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
@@ -4633,9 +4690,9 @@ def test_drop_sel(self, raw_values, unit, error, dtype):
 
         values = raw_values * unit
 
-        if error is not None and not (
-            isinstance(raw_values, (int, float)) and x.check(unit)
-        ):
+        # TODO: if we choose dm as compatible unit, single value keys
+        # can be found. Should we check that?
+        if error is not None:
             with pytest.raises(error):
                 ds.drop_sel(x=values)
 
@@ -4643,12 +4700,14 @@ def test_drop_sel(self, raw_values, unit, error, dtype):
 
         expected = attach_units(
             strip_units(ds).drop_sel(
-                x=strip_units(convert_units(values, {None: x.units}))
+                x=strip_units(convert_units(values, {None: unit_registry.m}))
             ),
             extract_units(ds),
         )
         actual = ds.drop_sel(x=values)
-        assert_equal_with_units(expected, actual)
+
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.xfail(reason="indexes don't support units")
     @pytest.mark.parametrize(
@@ -4665,7 +4724,7 @@ def test_drop_sel(self, raw_values, unit, error, dtype):
             pytest.param(1, KeyError, id="no_units"),
             pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"),
             pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"),
-            pytest.param(unit_registry.dm, KeyError, id="compatible_unit"),
+            pytest.param(unit_registry.mm, KeyError, id="compatible_unit"),
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
@@ -4684,9 +4743,9 @@ def test_loc(self, raw_values, unit, error, dtype):
 
         values = raw_values * unit
 
-        if error is not None and not (
-            isinstance(raw_values, (int, float)) and x.check(unit)
-        ):
+        # TODO: if we choose dm as compatible unit, single value keys
+        # can be found. Should we check that?
+        if error is not None:
             with pytest.raises(error):
                 ds.loc[{"x": values}]
 
@@ -4694,12 +4753,14 @@ def test_loc(self, raw_values, unit, error, dtype):
 
         expected = attach_units(
             strip_units(ds).loc[
-                {"x": strip_units(convert_units(values, {None: x.units}))}
+                {"x": strip_units(convert_units(values, {None: unit_registry.m}))}
             ],
-            {"a": array1.units, "b": array2.units, "x": x.units},
+            extract_units(ds),
         )
         actual = ds.loc[{"x": values}]
-        assert_equal_with_units(expected, actual)
+
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
@@ -4710,14 +4771,34 @@ def test_loc(self, raw_values, unit, error, dtype):
         ),
         ids=repr,
     )
-    def test_head_tail_thin(self, func, dtype):
-        array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
-        array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_registry.Pa
+    @pytest.mark.parametrize(
+        "variant",
+        (
+            "data",
+            pytest.param(
+                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
+            ),
+            "coords",
+        ),
+    )
+    def test_head_tail_thin(self, func, variant, dtype):
+        variants = {
+            "data": ((unit_registry.degK, unit_registry.Pa), 1, 1),
+            "dims": ((1, 1), unit_registry.m, 1),
+            "coords": ((1, 1), 1, unit_registry.m),
+        }
+        (unit_a, unit_b), dim_unit, coord_unit = variants.get(variant)
+
+        array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_a
+        array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_b
 
         coords = {
-            "x": np.arange(10) * unit_registry.m,
-            "y": np.arange(5) * unit_registry.m,
-            "z": np.arange(8) * unit_registry.m,
+            "x": np.arange(10) * dim_unit,
+            "y": np.arange(5) * dim_unit,
+            "z": np.arange(8) * dim_unit,
+            "u": ("x", np.linspace(0, 1, 10) * coord_unit),
+            "v": ("y", np.linspace(1, 2, 5) * coord_unit),
+            "w": ("z", np.linspace(-1, 0, 8) * coord_unit),
         }
 
         ds = xr.Dataset(
@@ -4731,8 +4812,10 @@ def test_head_tail_thin(self, func, dtype):
         expected = attach_units(func(strip_units(ds)), extract_units(ds))
         actual = func(ds)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
+    @pytest.mark.parametrize("dim", ("x", "y", "z", "t", "all"))
     @pytest.mark.parametrize(
         "shape",
         (
@@ -4743,13 +4826,9 @@ def test_head_tail_thin(self, func, dtype):
             pytest.param((1, 10, 1, 20), id="first and last dimension squeezable"),
         ),
     )
-    def test_squeeze(self, shape, dtype):
+    def test_squeeze(self, shape, dim, dtype):
         names = "xyzt"
-        coords = {
-            name: np.arange(length).astype(dtype)
-            * (unit_registry.m if name != "t" else unit_registry.s)
-            for name, length in zip(names, shape)
-        }
+        dim_lengths = dict(zip(names, shape))
         array1 = (
             np.linspace(0, 1, 10 * 20).astype(dtype).reshape(shape) * unit_registry.degK
         )
@@ -4759,74 +4838,59 @@ def test_squeeze(self, shape, dtype):
 
         ds = xr.Dataset(
             data_vars={
-                "a": xr.DataArray(data=array1, dims=tuple(names[: len(shape)])),
-                "b": xr.DataArray(data=array2, dims=tuple(names[: len(shape)])),
+                "a": (tuple(names[: len(shape)]), array1),
+                "b": (tuple(names[: len(shape)]), array2),
             },
-            coords=coords,
         )
         units = extract_units(ds)
 
-        expected = attach_units(strip_units(ds).squeeze(), units)
+        kwargs = {"dim": dim} if dim != "all" and dim_lengths.get(dim, 0) == 1 else {}
 
-        actual = ds.squeeze()
-        assert_equal_with_units(actual, expected)
+        expected = attach_units(strip_units(ds).squeeze(**kwargs), units)
 
-        # try squeezing the dimensions separately
-        names = tuple(dim for dim, coord in coords.items() if len(coord) == 1)
-        for name in names:
-            expected = attach_units(strip_units(ds).squeeze(dim=name), units)
-            actual = ds.squeeze(dim=name)
-            assert_equal_with_units(actual, expected)
+        actual = ds.squeeze(**kwargs)
+
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
-    @pytest.mark.xfail(reason="ignores units")
+    @pytest.mark.parametrize("variant", ("data", "coords"))
     @pytest.mark.parametrize(
-        "unit,error",
+        "func",
         (
-            pytest.param(1, DimensionalityError, id="no_unit"),
             pytest.param(
-                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+                method("interp"), marks=pytest.mark.xfail(reason="uses scipy")
             ),
-            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
-            pytest.param(unit_registry.cm, None, id="compatible_unit"),
-            pytest.param(unit_registry.m, None, id="identical_unit"),
+            method("reindex"),
         ),
+        ids=repr,
     )
-    def test_interp(self, unit, error):
-        array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
-        array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_registry.Pa
-
-        coords = {
-            "x": np.arange(10) * unit_registry.m,
-            "y": np.arange(5) * unit_registry.m,
-            "z": np.arange(8) * unit_registry.s,
+    def test_interp_reindex(self, func, variant, dtype):
+        variants = {
+            "data": (unit_registry.m, 1),
+            "coords": (1, unit_registry.m),
         }
+        data_unit, coord_unit = variants.get(variant)
 
-        ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=array1, dims=("x", "y")),
-                "b": xr.DataArray(data=array2, dims=("x", "z")),
-            },
-            coords=coords,
-        )
+        array1 = np.linspace(-1, 0, 10).astype(dtype) * data_unit
+        array2 = np.linspace(0, 1, 10).astype(dtype) * data_unit
 
-        new_coords = (np.arange(10) + 0.5) * unit
+        y = np.arange(10) * coord_unit
 
-        if error is not None:
-            with pytest.raises(error):
-                ds.interp(x=new_coords)
-
-            return
+        x = np.arange(10)
+        new_x = np.arange(8) + 0.5
 
-        units = extract_units(ds)
-        expected = attach_units(
-            strip_units(ds).interp(x=strip_units(convert_units(new_coords, units))),
-            units,
+        ds = xr.Dataset(
+            {"a": ("x", array1), "b": ("x", array2)}, coords={"x": x, "y": ("x", y)}
         )
-        actual = ds.interp(x=new_coords)
+        units = extract_units(ds)
+
+        expected = attach_units(func(strip_units(ds), x=new_x), units)
+        actual = func(ds, x=new_x)
 
-        assert_equal_with_units(actual, expected)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
-    @pytest.mark.xfail(reason="ignores units")
+    @pytest.mark.xfail(reason="indexes don't support units")
     @pytest.mark.parametrize(
         "unit,error",
         (
@@ -4839,106 +4903,67 @@ def test_interp(self, unit, error):
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
-    def test_interp_like(self, unit, error, dtype):
-        array1 = (
-            np.linspace(0, 10, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
-        )
-        array2 = (
-            np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa
-        )
+    @pytest.mark.parametrize("func", (method("interp"), method("reindex")), ids=repr)
+    def test_interp_reindex_indexing(self, func, unit, error, dtype):
+        array1 = np.linspace(-1, 0, 10).astype(dtype)
+        array2 = np.linspace(0, 1, 10).astype(dtype)
 
-        coords = {
-            "x": np.arange(10) * unit_registry.m,
-            "y": np.arange(5) * unit_registry.m,
-            "z": np.arange(8) * unit_registry.m,
-        }
-
-        ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=array1, dims=("x", "y")),
-                "b": xr.DataArray(data=array2, dims=("x", "z")),
-            },
-            coords=coords,
-        )
+        x = np.arange(10) * unit_registry.m
+        new_x = (np.arange(8) + 0.5) * unit
 
-        other = xr.Dataset(
-            data_vars={
-                "c": xr.DataArray(data=np.empty((20, 10)), dims=("x", "y")),
-                "d": xr.DataArray(data=np.empty((20, 15)), dims=("x", "z")),
-            },
-            coords={
-                "x": (np.arange(20) + 0.3) * unit,
-                "y": (np.arange(10) - 0.2) * unit,
-                "z": (np.arange(15) + 0.4) * unit,
-            },
-        )
+        ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}, coords={"x": x})
+        units = extract_units(ds)
 
         if error is not None:
             with pytest.raises(error):
-                ds.interp_like(other)
+                func(ds, x=new_x)
 
             return
 
-        units = extract_units(ds)
-        expected = attach_units(
-            strip_units(ds).interp_like(strip_units(convert_units(other, units))), units
-        )
-        actual = ds.interp_like(other)
+        expected = attach_units(func(strip_units(ds), x=new_x), units)
+        actual = func(ds, x=new_x)
 
-        assert_equal_with_units(actual, expected)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
-    @pytest.mark.xfail(reason="indexes don't support units")
+    @pytest.mark.parametrize("variant", ("data", "coords"))
     @pytest.mark.parametrize(
-        "unit,error",
+        "func",
         (
-            pytest.param(1, DimensionalityError, id="no_unit"),
             pytest.param(
-                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+                method("interp_like"), marks=pytest.mark.xfail(reason="uses scipy")
             ),
-            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
-            pytest.param(unit_registry.cm, None, id="compatible_unit"),
-            pytest.param(unit_registry.m, None, id="identical_unit"),
+            method("reindex_like"),
         ),
+        ids=repr,
     )
-    def test_reindex(self, unit, error, dtype):
-        array1 = (
-            np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
-        )
-        array2 = (
-            np.linspace(1, 2, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa
-        )
-
-        coords = {
-            "x": np.arange(10) * unit_registry.m,
-            "y": np.arange(5) * unit_registry.m,
-            "z": np.arange(8) * unit_registry.s,
+    def test_interp_reindex_like(self, func, variant, dtype):
+        variants = {
+            "data": (unit_registry.m, 1),
+            "coords": (1, unit_registry.m),
         }
+        data_unit, coord_unit = variants.get(variant)
 
-        ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=array1, dims=("x", "y")),
-                "b": xr.DataArray(data=array2, dims=("x", "z")),
-            },
-            coords=coords,
-        )
+        array1 = np.linspace(-1, 0, 10).astype(dtype) * data_unit
+        array2 = np.linspace(0, 1, 10).astype(dtype) * data_unit
 
-        new_coords = (np.arange(10) + 0.5) * unit
+        y = np.arange(10) * coord_unit
 
-        if error is not None:
-            with pytest.raises(error):
-                ds.reindex(x=new_coords)
-
-            return
+        x = np.arange(10)
+        new_x = np.arange(8) + 0.5
 
-        expected = attach_units(
-            strip_units(ds).reindex(
-                x=strip_units(convert_units(new_coords, {None: coords["x"].units}))
-            ),
-            extract_units(ds),
+        ds = xr.Dataset(
+            {"a": ("x", array1), "b": ("x", array2)}, coords={"x": x, "y": ("x", y)}
         )
-        actual = ds.reindex(x=new_coords)
+        units = extract_units(ds)
+
+        other = xr.Dataset({"a": ("x", np.empty_like(new_x))}, coords={"x": new_x})
+
+        expected = attach_units(func(strip_units(ds), other), units)
+        actual = func(ds, other)
 
-        assert_equal_with_units(actual, expected)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.xfail(reason="indexes don't support units")
     @pytest.mark.parametrize(
@@ -4953,54 +4978,32 @@ def test_reindex(self, unit, error, dtype):
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
-    def test_reindex_like(self, unit, error, dtype):
-        array1 = (
-            np.linspace(0, 10, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
-        )
-        array2 = (
-            np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa
-        )
+    @pytest.mark.parametrize(
+        "func", (method("interp_like"), method("reindex_like")), ids=repr
+    )
+    def test_interp_reindex_like_indexing(self, func, unit, error, dtype):
+        array1 = np.linspace(-1, 0, 10).astype(dtype)
+        array2 = np.linspace(0, 1, 10).astype(dtype)
 
-        coords = {
-            "x": np.arange(10) * unit_registry.m,
-            "y": np.arange(5) * unit_registry.m,
-            "z": np.arange(8) * unit_registry.m,
-        }
+        x = np.arange(10) * unit_registry.m
+        new_x = (np.arange(8) + 0.5) * unit
 
-        ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=array1, dims=("x", "y")),
-                "b": xr.DataArray(data=array2, dims=("x", "z")),
-            },
-            coords=coords,
-        )
+        ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}, coords={"x": x})
+        units = extract_units(ds)
 
-        other = xr.Dataset(
-            data_vars={
-                "c": xr.DataArray(data=np.empty((20, 10)), dims=("x", "y")),
-                "d": xr.DataArray(data=np.empty((20, 15)), dims=("x", "z")),
-            },
-            coords={
-                "x": (np.arange(20) + 0.3) * unit,
-                "y": (np.arange(10) - 0.2) * unit,
-                "z": (np.arange(15) + 0.4) * unit,
-            },
-        )
+        other = xr.Dataset({"a": ("x", np.empty_like(new_x))}, coords={"x": new_x})
 
         if error is not None:
             with pytest.raises(error):
-                ds.reindex_like(other)
+                func(ds, other)
 
             return
 
-        units = extract_units(ds)
-        expected = attach_units(
-            strip_units(ds).reindex_like(strip_units(convert_units(other, units))),
-            units,
-        )
-        actual = ds.reindex_like(other)
+        expected = attach_units(func(strip_units(ds), other), units)
+        actual = func(ds, other)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
@@ -5010,30 +5013,46 @@ def test_reindex_like(self, unit, error, dtype):
             method("integrate", coord="x"),
             pytest.param(
                 method("quantile", q=[0.25, 0.75]),
-                marks=pytest.mark.xfail(reason="nanquantile not implemented"),
+                marks=pytest.mark.xfail(
+                    LooseVersion(pint.__version__) <= "0.12",
+                    reason="nanquantile not implemented yet",
+                ),
             ),
             method("reduce", func=np.sum, dim="x"),
             method("map", np.fabs),
         ),
         ids=repr,
     )
-    def test_computation(self, func, dtype):
-        array1 = (
-            np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
-        )
-        array2 = (
-            np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa
-        )
-        x = np.arange(10) * unit_registry.m
-        y = np.arange(5) * unit_registry.m
-        z = np.arange(8) * unit_registry.m
+    @pytest.mark.parametrize(
+        "variant",
+        (
+            "data",
+            pytest.param(
+                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
+            ),
+            "coords",
+        ),
+    )
+    def test_computation(self, func, variant, dtype):
+        variants = {
+            "data": ((unit_registry.degK, unit_registry.Pa), 1, 1),
+            "dims": ((1, 1), unit_registry.m, 1),
+            "coords": ((1, 1), 1, unit_registry.m),
+        }
+        (unit1, unit2), dim_unit, coord_unit = variants.get(variant)
+
+        array1 = np.linspace(-5, 5, 4 * 5).reshape(4, 5).astype(dtype) * unit1
+        array2 = np.linspace(10, 20, 4 * 3).reshape(4, 3).astype(dtype) * unit2
+        x = np.arange(4) * dim_unit
+        y = np.arange(5) * dim_unit
+        z = np.arange(3) * dim_unit
 
         ds = xr.Dataset(
             data_vars={
                 "a": xr.DataArray(data=array1, dims=("x", "y")),
                 "b": xr.DataArray(data=array2, dims=("x", "z")),
             },
-            coords={"x": x, "y": y, "z": z},
+            coords={"x": x, "y": y, "z": z, "y2": ("y", np.arange(5) * coord_unit)},
         )
 
         units = extract_units(ds)
@@ -5041,69 +5060,105 @@ def test_computation(self, func, dtype):
         expected = attach_units(func(strip_units(ds)), units)
         actual = func(ds)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
         (
             method("groupby", "x"),
-            method("groupby_bins", "x", bins=4),
+            pytest.param(
+                method("groupby_bins", "x", bins=2),
+                marks=pytest.mark.xfail(
+                    LooseVersion(pint.__version__) <= "0.12",
+                    reason="needs assert_allclose but that does not work with pint",
+                ),
+            ),
             method("coarsen", x=2),
             pytest.param(
                 method("rolling", x=3), marks=pytest.mark.xfail(reason="strips units")
             ),
             pytest.param(
                 method("rolling_exp", x=3),
-                marks=pytest.mark.xfail(reason="uses numbagg which strips units"),
+                marks=pytest.mark.xfail(
+                    reason="numbagg functions are not supported by pint"
+                ),
             ),
         ),
         ids=repr,
     )
-    def test_computation_objects(self, func, dtype):
-        array1 = (
-            np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
-        )
-        array2 = (
-            np.linspace(10, 20, 10 * 5 * 8).reshape(10, 5, 8).astype(dtype)
-            * unit_registry.Pa
-        )
-        x = np.arange(10) * unit_registry.m
-        y = np.arange(5) * unit_registry.m
-        z = np.arange(8) * unit_registry.m
+    @pytest.mark.parametrize(
+        "variant",
+        (
+            "data",
+            pytest.param(
+                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
+            ),
+            "coords",
+        ),
+    )
+    def test_computation_objects(self, func, variant, dtype):
+        variants = {
+            "data": ((unit_registry.degK, unit_registry.Pa), 1, 1),
+            "dims": ((1, 1), unit_registry.m, 1),
+            "coords": ((1, 1), 1, unit_registry.m),
+        }
+        (unit1, unit2), dim_unit, coord_unit = variants.get(variant)
+
+        array1 = np.linspace(-5, 5, 4 * 5).reshape(4, 5).astype(dtype) * unit1
+        array2 = np.linspace(10, 20, 4 * 3).reshape(4, 3).astype(dtype) * unit2
+        x = np.arange(4) * dim_unit
+        y = np.arange(5) * dim_unit
+        z = np.arange(3) * dim_unit
 
         ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=array1, dims=("x", "y")),
-                "b": xr.DataArray(data=array2, dims=("x", "y", "z")),
-            },
-            coords={"x": x, "y": y, "z": z},
+            data_vars={"a": (("x", "y"), array1), "b": (("x", "z"), array2)},
+            coords={"x": x, "y": y, "z": z, "y2": ("y", np.arange(5) * coord_unit)},
         )
         units = extract_units(ds)
 
         args = [] if func.name != "groupby" else ["y"]
-        reduce_func = method("mean", *args)
-        expected = attach_units(reduce_func(func(strip_units(ds))), units)
-        actual = reduce_func(func(ds))
+        expected = attach_units(func(strip_units(ds)).mean(*args), units)
+        actual = func(ds).mean(*args)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        # TODO: remove once pint 0.12 has been released
+        if LooseVersion(pint.__version__) <= "0.12":
+            assert_equal(expected, actual)
+        else:
+            assert_allclose(expected, actual)
+
+    @pytest.mark.parametrize(
+        "variant",
+        (
+            "data",
+            pytest.param(
+                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
+            ),
+            "coords",
+        ),
+    )
+    def test_resample(self, variant, dtype):
+        # TODO: move this to test_computation_objects
+        variants = {
+            "data": ((unit_registry.degK, unit_registry.Pa), 1, 1),
+            "dims": ((1, 1), unit_registry.m, 1),
+            "coords": ((1, 1), 1, unit_registry.m),
+        }
+        (unit1, unit2), dim_unit, coord_unit = variants.get(variant)
+
+        array1 = np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit1
+        array2 = np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit2
 
-    def test_resample(self, dtype):
-        array1 = (
-            np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
-        )
-        array2 = (
-            np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa
-        )
         t = pd.date_range("10-09-2010", periods=array1.shape[0], freq="1y")
-        y = np.arange(5) * unit_registry.m
-        z = np.arange(8) * unit_registry.m
+        y = np.arange(5) * dim_unit
+        z = np.arange(8) * dim_unit
+
+        u = np.linspace(-1, 0, 5) * coord_unit
 
         ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=array1, dims=("time", "y")),
-                "b": xr.DataArray(data=array2, dims=("time", "z")),
-            },
-            coords={"time": t, "y": y, "z": z},
+            data_vars={"a": (("time", "y"), array1), "b": (("time", "z"), array2)},
+            coords={"time": t, "y": y, "z": z, "u": ("y", u)},
         )
         units = extract_units(ds)
 
@@ -5112,43 +5167,59 @@ def test_resample(self, dtype):
         expected = attach_units(func(strip_units(ds)).mean(), units)
         actual = func(ds).mean()
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
         (
             method("assign", c=lambda ds: 10 * ds.b),
-            method("assign_coords", v=("x", np.arange(10) * unit_registry.s)),
+            method("assign_coords", v=("x", np.arange(5) * unit_registry.s)),
             method("first"),
             method("last"),
             pytest.param(
                 method("quantile", q=[0.25, 0.5, 0.75], dim="x"),
-                marks=pytest.mark.xfail(reason="nanquantile not implemented"),
+                marks=pytest.mark.xfail(
+                    LooseVersion(pint.__version__) <= "0.12",
+                    reason="nanquantile not implemented",
+                ),
             ),
         ),
         ids=repr,
     )
-    def test_grouped_operations(self, func, dtype):
-        array1 = (
-            np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
-        )
-        array2 = (
-            np.linspace(10, 20, 10 * 5 * 8).reshape(10, 5, 8).astype(dtype)
-            * unit_registry.Pa
-        )
-        x = np.arange(10) * unit_registry.m
-        y = np.arange(5) * unit_registry.m
-        z = np.arange(8) * unit_registry.m
+    @pytest.mark.parametrize(
+        "variant",
+        (
+            "data",
+            pytest.param(
+                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
+            ),
+            "coords",
+        ),
+    )
+    def test_grouped_operations(self, func, variant, dtype):
+        variants = {
+            "data": ((unit_registry.degK, unit_registry.Pa), 1, 1),
+            "dims": ((1, 1), unit_registry.m, 1),
+            "coords": ((1, 1), 1, unit_registry.m),
+        }
+        (unit1, unit2), dim_unit, coord_unit = variants.get(variant)
+
+        array1 = np.linspace(-5, 5, 5 * 4).reshape(5, 4).astype(dtype) * unit1
+        array2 = np.linspace(10, 20, 5 * 4 * 3).reshape(5, 4, 3).astype(dtype) * unit2
+        x = np.arange(5) * dim_unit
+        y = np.arange(4) * dim_unit
+        z = np.arange(3) * dim_unit
+
+        u = np.linspace(-1, 0, 4) * coord_unit
 
         ds = xr.Dataset(
-            data_vars={
-                "a": xr.DataArray(data=array1, dims=("x", "y")),
-                "b": xr.DataArray(data=array2, dims=("x", "y", "z")),
-            },
-            coords={"x": x, "y": y, "z": z},
+            data_vars={"a": (("x", "y"), array1), "b": (("x", "y", "z"), array2)},
+            coords={"x": x, "y": y, "z": z, "u": ("y", u)},
         )
-        units = extract_units(ds)
-        units.update({"c": unit_registry.Pa, "v": unit_registry.s})
+
+        assigned_units = {"c": unit2, "v": unit_registry.s}
+        units = merge_mappings(extract_units(ds), assigned_units)
 
         stripped_kwargs = {
             name: strip_units(value) for name, value in func.kwargs.items()
@@ -5158,20 +5229,26 @@ def test_grouped_operations(self, func, dtype):
         )
         actual = func(ds.groupby("y"))
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.parametrize(
         "func",
         (
             method("pipe", lambda ds: ds * 10),
             method("assign", d=lambda ds: ds.b * 10),
-            method("assign_coords", y2=("y", np.arange(5) * unit_registry.mm)),
+            method("assign_coords", y2=("y", np.arange(4) * unit_registry.mm)),
             method("assign_attrs", attr1="value"),
             method("rename", x2="x_mm"),
             method("rename_vars", c="temperature"),
             method("rename_dims", x="offset_x"),
-            method("swap_dims", {"x": "x2"}),
-            method("expand_dims", v=np.linspace(10, 20, 12) * unit_registry.s, axis=1),
+            method("swap_dims", {"x": "u"}),
+            pytest.param(
+                method(
+                    "expand_dims", v=np.linspace(10, 20, 12) * unit_registry.s, axis=1
+                ),
+                marks=pytest.mark.xfail(reason="indexes don't support units"),
+            ),
             method("drop_vars", "x"),
             method("drop_dims", "z"),
             method("set_coords", names="c"),
@@ -5180,40 +5257,55 @@ def test_grouped_operations(self, func, dtype):
         ),
         ids=repr,
     )
-    def test_content_manipulation(self, func, dtype):
-        array1 = (
-            np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype)
-            * unit_registry.m ** 3
-        )
-        array2 = (
-            np.linspace(10, 20, 10 * 5 * 8).reshape(10, 5, 8).astype(dtype)
-            * unit_registry.Pa
-        )
-        array3 = np.linspace(0, 10, 10).astype(dtype) * unit_registry.degK
+    @pytest.mark.parametrize(
+        "variant",
+        (
+            "data",
+            pytest.param(
+                "dims", marks=pytest.mark.xfail(reason="indexes don't support units")
+            ),
+            "coords",
+        ),
+    )
+    def test_content_manipulation(self, func, variant, dtype):
+        variants = {
+            "data": (
+                (unit_registry.m ** 3, unit_registry.Pa, unit_registry.degK),
+                1,
+                1,
+            ),
+            "dims": ((1, 1, 1), unit_registry.m, 1),
+            "coords": ((1, 1, 1), 1, unit_registry.m),
+        }
+        (unit1, unit2, unit3), dim_unit, coord_unit = variants.get(variant)
 
-        x = np.arange(10) * unit_registry.m
-        x2 = x.to(unit_registry.mm)
-        y = np.arange(5) * unit_registry.m
-        z = np.arange(8) * unit_registry.m
+        array1 = np.linspace(-5, 5, 5 * 4).reshape(5, 4).astype(dtype) * unit1
+        array2 = np.linspace(10, 20, 5 * 4 * 3).reshape(5, 4, 3).astype(dtype) * unit2
+        array3 = np.linspace(0, 10, 5).astype(dtype) * unit3
+
+        x = np.arange(5) * dim_unit
+        y = np.arange(4) * dim_unit
+        z = np.arange(3) * dim_unit
+
+        x2 = np.linspace(-1, 0, 5) * coord_unit
 
         ds = xr.Dataset(
             data_vars={
-                "a": xr.DataArray(data=array1, dims=("x", "y")),
-                "b": xr.DataArray(data=array2, dims=("x", "y", "z")),
-                "c": xr.DataArray(data=array3, dims="x"),
+                "a": (("x", "y"), array1),
+                "b": (("x", "y", "z"), array2),
+                "c": ("x", array3),
             },
             coords={"x": x, "y": y, "z": z, "x2": ("x", x2)},
         )
-        units = {
-            **extract_units(ds),
-            **{
-                "y2": unit_registry.mm,
-                "x_mm": unit_registry.mm,
-                "offset_x": unit_registry.m,
-                "d": unit_registry.Pa,
-                "temperature": unit_registry.degK,
-            },
+
+        new_units = {
+            "y2": unit_registry.mm,
+            "x_mm": coord_unit,
+            "offset_x": unit_registry.m,
+            "d": unit2,
+            "temperature": unit3,
         }
+        units = merge_mappings(extract_units(ds), new_units)
 
         stripped_kwargs = {
             key: strip_units(value) for key, value in func.kwargs.items()
@@ -5221,7 +5313,8 @@ def test_content_manipulation(self, func, dtype):
         expected = attach_units(func(strip_units(ds), **stripped_kwargs), units)
         actual = func(ds)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
 
     @pytest.mark.parametrize(
         "unit,error",
@@ -5246,25 +5339,29 @@ def test_content_manipulation(self, func, dtype):
         ),
     )
     def test_merge(self, variant, unit, error, dtype):
-        original_data_unit = unit_registry.m
-        original_dim_unit = unit_registry.m
-        original_coord_unit = unit_registry.m
+        left_variants = {
+            "data": (unit_registry.m, 1, 1),
+            "dims": (1, unit_registry.m, 1),
+            "coords": (1, 1, unit_registry.m),
+        }
 
-        variants = {
-            "data": (unit, original_dim_unit, original_coord_unit),
-            "dims": (original_data_unit, unit, original_coord_unit),
-            "coords": (original_data_unit, original_dim_unit, unit),
+        left_data_unit, left_dim_unit, left_coord_unit = left_variants.get(variant)
+
+        right_variants = {
+            "data": (unit, 1, 1),
+            "dims": (1, unit, 1),
+            "coords": (1, 1, unit),
         }
-        data_unit, dim_unit, coord_unit = variants.get(variant)
+        right_data_unit, right_dim_unit, right_coord_unit = right_variants.get(variant)
 
-        left_array = np.arange(10).astype(dtype) * original_data_unit
-        right_array = np.arange(-5, 5).astype(dtype) * data_unit
+        left_array = np.arange(10).astype(dtype) * left_data_unit
+        right_array = np.arange(-5, 5).astype(dtype) * right_data_unit
 
-        left_dim = np.arange(10, 20) * original_dim_unit
-        right_dim = np.arange(5, 15) * dim_unit
+        left_dim = np.arange(10, 20) * left_dim_unit
+        right_dim = np.arange(5, 15) * right_dim_unit
 
-        left_coord = np.arange(-10, 0) * original_coord_unit
-        right_coord = np.arange(-15, -5) * coord_unit
+        left_coord = np.arange(-10, 0) * left_coord_unit
+        right_coord = np.arange(-15, -5) * right_coord_unit
 
         left = xr.Dataset(
             data_vars={"a": ("x", left_array)},
@@ -5287,4 +5384,5 @@ def test_merge(self, variant, unit, error, dtype):
         expected = attach_units(strip_units(left).merge(strip_units(converted)), units)
         actual = left.merge(right)
 
-        assert_equal_with_units(expected, actual)
+        assert_units_equal(expected, actual)
+        assert_equal(expected, actual)
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index 78e3848b8fb..d79d40d67c0 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -1657,7 +1657,7 @@ def test_reduce_funcs(self):
         assert_identical(v.all(dim="x"), Variable([], False))
 
         v = Variable("t", pd.date_range("2000-01-01", periods=3))
-        assert v.argmax(skipna=True) == 2
+        assert v.argmax(skipna=True, dim="t") == 2
 
         assert_identical(v.max(), Variable([], pd.Timestamp("2000-01-03")))
 
@@ -2213,6 +2213,10 @@ def test_full_like(self):
         assert expect.dtype == bool
         assert_identical(expect, full_like(orig, True, dtype=bool))
 
+        # raise error on non-scalar fill_value
+        with raises_regex(ValueError, "must be scalar"):
+            full_like(orig, [1.0, 2.0])
+
     @requires_dask
     def test_full_like_dask(self):
         orig = Variable(
diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py
index 24531215dfb..1bf685cc95d 100644
--- a/xarray/tests/test_weighted.py
+++ b/xarray/tests/test_weighted.py
@@ -59,6 +59,18 @@ def test_weighted_sum_of_weights_nan(weights, expected):
     assert_equal(expected, result)
 
 
+def test_weighted_sum_of_weights_bool():
+    # https://github.com/pydata/xarray/issues/4074
+
+    da = DataArray([1, 2])
+    weights = DataArray([True, True])
+    result = da.weighted(weights).sum_of_weights()
+
+    expected = DataArray(2)
+
+    assert_equal(expected, result)
+
+
 @pytest.mark.parametrize("da", ([1.0, 2], [1, np.nan], [np.nan, np.nan]))
 @pytest.mark.parametrize("factor", [0, 1, 3.14])
 @pytest.mark.parametrize("skipna", (True, False))
@@ -158,6 +170,17 @@ def test_weighted_mean_nan(weights, expected, skipna):
     assert_equal(expected, result)
 
 
+def test_weighted_mean_bool():
+    # https://github.com/pydata/xarray/issues/4074
+    da = DataArray([1, 1])
+    weights = DataArray([True, True])
+    expected = DataArray(1)
+
+    result = da.weighted(weights).mean()
+
+    assert_equal(expected, result)
+
+
 def expected_weighted(da, weights, dim, skipna, operation):
     """
     Generate expected result using ``*`` and ``sum``. This is checked against
diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py
index 32051bb6843..96983c83aab 100755
--- a/xarray/util/print_versions.py
+++ b/xarray/util/print_versions.py
@@ -129,7 +129,7 @@ def show_versions(file=sys.stdout):
         ("sphinx", lambda mod: mod.__version__),
     ]
 
-    deps_blob = list()
+    deps_blob = []
     for (modname, ver_f) in deps:
         try:
             if modname in sys.modules: