pydata · ahuang11 · Mar 29, 2021 · Mar 30, 2021 · Mar 30, 2021 · Mar 30, 2021
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -4418,10 +4418,34 @@ def query(
         )
         return ds[self.name]
 
+    def drop_duplicates(
+        self,
+        subset: Optional[Union[Hashable, Sequence[Hashable]]] = None,
+        keep: Union[str, bool,] = "first"
+    ):
+        """Returns a new data array with duplicate dimension values removed.
+
+        Parameters
+        ----------
+        subset : dimension label or sequence of labels, optional
+            Only consider certain dimensions for identifying duplicates, by
+            default use all of the columns.
+        keep : {"first", "last", False}, default: "first"
+            Determines which duplicates (if any) to keep.
+            - ``first`` : Drop duplicates except for the first occurrence.
+            - ``last`` : Drop duplicates except for the last occurrence.
+            - False : Drop all duplicates.
+
+        Returns
+        -------
+        DataArray
+        """
+        ds = self._to_temp_dataset().drop_duplicates(subset=subset, keep=keep)
+        return self._from_temp_dataset(ds)
+
     # this needs to be at the end, or mypy will confuse with `str`
     # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names
     str = utils.UncachedAccessor(StringAccessor)
 
-
 # priority most be higher than Variable to properly work with binary ufuncs
 ops.inject_all_ops_and_reduce_methods(DataArray, priority=60)
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -7074,5 +7074,51 @@ def query(
         # apply the selection
         return self.isel(indexers, missing_dims=missing_dims)
 
+    def drop_duplicates(
+        self,
+        subset: Optional[Union[Hashable, Sequence[Hashable]]] = None,
+        keep: Union[str, bool] = "first"
+    ):
+        """Returns a new dataset with duplicate dimension values removed.
+
+        Parameters
+        ----------
+        subset : dimension label or sequence of labels, optional
+            Only consider certain dimensions for identifying duplicates, by
+            default use all of the columns.
+        keep : {"first", "last", False}, default: "first"
+            Determines which duplicates (if any) to keep.
+            - ``first`` : Drop duplicates except for the first occurrence.
+            - ``last`` : Drop duplicates except for the last occurrence.
+            - False : Drop all duplicates.
+
+        Returns
+        -------
+        Dataset
+        """
+        if subset is None:
+            subset = list(self.coords)
+        elif isinstance(subset, str):
+            subset = [subset]
+
+        for dim in subset:
+            if dim not in self.dims:
+                raise ValueError("%s must be a single dataset dimension" % dim)
+
+        new = self.copy(deep=False)
+        if len(subset) > 1:
+            new = self.stack({'tmp_dim': subset})
+            subset = 'tmp_dim'
+        else:
+            subset = subset[0]
+
+        index = new.get_index(subset).duplicated(keep=keep)
+        new = new.isel(**{subset: ~index})
+
+        if 'tmp_dim' in new.dims:
+            new = new.unstack('tmp_dim')
+
+        return new
+
 
 ops.inject_all_ops_and_reduce_methods(Dataset, array_only=False)
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
@@ -6845,3 +6845,51 @@ def test_deepcopy_obj_array():
     x0 = Dataset(dict(foo=DataArray(np.array([object()]))))
     x1 = deepcopy(x0)
     assert x0["foo"].values[0] is not x1["foo"].values[0]
+
+
+@pytest.mark.parametrize("keep", ['first', 'last', False])
+def test_drop_duplicates():
+    ds = xr.DataArray(
+        [0, 5, 6, 7], dims='time', coords={'time': [0, 0, 1, 2]}
+    ).to_dataset()
+
+    if keep == 'first':
+        data = [0, 6, 7]
+        time = [0, 1, 2]
+    elif keep == 'last':
+        data = [5, 6, 7]
+        time = [0, 1, 2]
+    else:
+        data = [6, 7]
+        time = [1, 2]
+    expected =  xr.DataArray(data, dims='time', coords={'time': time}).to_dataset()
+    result = ds.drop_duplicates('time', keep=keep)
+    assert_equal(expected, result)
+
+
+@pytest.mark.parametrize("keep", ['first', 'last', False])
+def test_drop_duplicates_multi_dim():
+    base_data = np.stack([np.arange(0, 5) * i for i in np.arange(0, 5)])
+    ds = xr.DataArray(
+        base_data,
+        coords={'lat': [0, 1, 2, 2, 3], 'lon': [0, 1, 3, 3, 4]},
+        dims=['lat', 'lon'], name='test'
+    ).to_dataset()
+    ds = ds.drop_duplicates(['lat', 'lon'], keep='first')
+
+    if keep == 'first':
+        data = base_data[[0, 1, 2, 4]][:, [0, 1, 2, 4]]
+        lat = [0, 1, 2, 3]
+        lon = [0, 1, 3, 4]
+    elif keep == 'last':
+        data = base_data[[0, 1, 3, 4]][:, [0, 1, 3, 4]]
+        lat = [0, 1, 2, 3]
+        lon = [0, 1, 3, 4]
+    else:
+        data = base_data[[0, 1, 4]][:, [0, 1, 4]]
+        lat = [0, 1, 3]
+        lon = [0, 1, 4]
+    expected =  xr.DataArray(
+        data, dims=['lat', 'lon'], coords={'lat': lat, 'lon': lon}, name='test'
+    ).to_dataset()
+    result = ds.drop_duplicates(['lat', 'lon'], keep=keep)