Skip to content

Commit

Permalink
Fix 40420: Interpret NaN in clip() as no bound. (pandas-dev#40927)
Browse files Browse the repository at this point in the history
  • Loading branch information
DriesSchaumont authored and yeshsurya committed May 6, 2021
1 parent a8722f6 commit ab5972d
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 11 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -911,6 +911,7 @@ Other
- Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`)
- Bug in :func:`pandas.util.show_versions` where console JSON output was not proper JSON (:issue:`39701`)
- Bug in :meth:`DataFrame.convert_dtypes` incorrectly raised ValueError when called on an empty DataFrame (:issue:`40393`)
- Bug in :meth:`DataFrame.clip` not interpreting missing values as no threshold (:issue:`40420`)

.. ---------------------------------------------------------------------------
Expand Down
51 changes: 45 additions & 6 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7341,8 +7341,6 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace):
return self._clip_with_scalar(None, threshold, inplace=inplace)
return self._clip_with_scalar(threshold, None, inplace=inplace)

subset = method(threshold, axis=axis) | isna(self)

# GH #15390
# In order for where method to work, the threshold must
# be transformed to NDFrame from other array like structure.
Expand All @@ -7351,6 +7349,18 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace):
threshold = self._constructor(threshold, index=self.index)
else:
threshold = align_method_FRAME(self, threshold, axis, flex=None)[1]

# GH 40420
# Treat missing thresholds as no bounds, not clipping the values
if is_list_like(threshold):
fill_value = np.inf if method.__name__ == "le" else -np.inf
threshold_inf = threshold.fillna(fill_value)
else:
threshold_inf = threshold

subset = method(threshold_inf, axis=axis) | isna(self)

# GH 40420
return self.where(subset, threshold, axis=axis, inplace=inplace)

@overload
Expand Down Expand Up @@ -7482,10 +7492,12 @@ def clip(
----------
lower : float or array_like, default None
Minimum threshold value. All values below this
threshold will be set to it.
threshold will be set to it. A missing
threshold (e.g `NA`) will not clip the value.
upper : float or array_like, default None
Maximum threshold value. All values above this
threshold will be set to it.
threshold will be set to it. A missing
threshold (e.g `NA`) will not clip the value.
axis : int or str axis name, optional
Align object with lower and upper along the given axis.
inplace : bool, default False
Expand Down Expand Up @@ -7546,6 +7558,25 @@ def clip(
2 0 3
3 6 8
4 5 3
Clips using specific lower threshold per column element, with missing values:
>>> t = pd.Series([2, -4, np.NaN, 6, 3])
>>> t
0 2.0
1 -4.0
2 NaN
3 6.0
4 3.0
dtype: float64
>>> df.clip(t, axis=0)
col_0 col_1
0 9 2
1 -3 -4
2 0 6
3 6 8
4 5 3
"""
inplace = validate_bool_kwarg(inplace, "inplace")

Expand All @@ -7558,9 +7589,17 @@ def clip(
# so ignore
# GH 19992
# numpy doesn't drop a list-like bound containing NaN
if not is_list_like(lower) and np.any(isna(lower)):
isna_lower = isna(lower)
if not is_list_like(lower):
if np.any(isna_lower):
lower = None
elif np.all(isna_lower):
lower = None
if not is_list_like(upper) and np.any(isna(upper)):
isna_upper = isna(upper)
if not is_list_like(upper):
if np.any(isna_upper):
upper = None
elif np.all(isna_upper):
upper = None

# GH 2747 (arguments were reversed)
Expand Down
14 changes: 11 additions & 3 deletions pandas/tests/frame/methods/test_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,17 +144,25 @@ def test_clip_with_na_args(self, float_frame):
tm.assert_frame_equal(float_frame.clip(np.nan), float_frame)
tm.assert_frame_equal(float_frame.clip(upper=np.nan, lower=np.nan), float_frame)

# GH#19992
# GH#19992 and adjusted in GH#40420
df = DataFrame({"col_0": [1, 2, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]})

result = df.clip(lower=[4, 5, np.nan], axis=0)
expected = DataFrame(
{"col_0": [4, 5, np.nan], "col_1": [4, 5, np.nan], "col_2": [7, 8, np.nan]}
{"col_0": [4, 5, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]}
)
tm.assert_frame_equal(result, expected)

result = df.clip(lower=[4, 5, np.nan], axis=1)
expected = DataFrame(
{"col_0": [4, 4, 4], "col_1": [5, 5, 6], "col_2": [np.nan, np.nan, np.nan]}
{"col_0": [4, 4, 4], "col_1": [5, 5, 6], "col_2": [7, 8, 9]}
)
tm.assert_frame_equal(result, expected)

# GH#40420
data = {"col_0": [9, -3, 0, -1, 5], "col_1": [-2, -7, 6, 8, -5]}
df = DataFrame(data)
t = Series([2, -4, np.NaN, 6, 3])
result = df.clip(lower=t, axis=0)
expected = DataFrame({"col_0": [9, -3, 0, 6, 5], "col_1": [2, -4, 6, 8, 3]})
tm.assert_frame_equal(result, expected)
9 changes: 7 additions & 2 deletions pandas/tests/series/methods/test_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,13 @@ def test_clip_with_na_args(self):
tm.assert_series_equal(s.clip(upper=np.nan, lower=np.nan), Series([1, 2, 3]))

# GH#19992
tm.assert_series_equal(s.clip(lower=[0, 4, np.nan]), Series([1, 4, np.nan]))
tm.assert_series_equal(s.clip(upper=[1, np.nan, 1]), Series([1, np.nan, 1]))
tm.assert_series_equal(s.clip(lower=[0, 4, np.nan]), Series([1, 4, 3]))
tm.assert_series_equal(s.clip(upper=[1, np.nan, 1]), Series([1, 2, 1]))

# GH#40420
s = Series([1, 2, 3])
result = s.clip(0, [np.nan, np.nan, np.nan])
tm.assert_series_equal(s, result)

def test_clip_against_series(self):
# GH#6966
Expand Down

0 comments on commit ab5972d

Please sign in to comment.