diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5515d093f39e4..ce0d40d327c15 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -135,6 +135,7 @@ Reshaping Numeric ^^^^^^^ +- Bug in ``.clip()`` with ``axis=1`` and a list-like for ``threshold`` is passed; previously this raised ``ValueError`` (:issue:`15390`) Categorical diff --git a/pandas/core/generic.py b/pandas/core/generic.py index db19d9354ec4d..7d1a8adf381fe 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -52,6 +52,7 @@ from pandas.compat.numpy import function as nv from pandas.compat import (map, zip, lzip, lrange, string_types, isidentifier, set_function_name, cPickle as pkl) +from pandas.core.ops import _align_method_FRAME import pandas.core.nanops as nanops from pandas.util._decorators import Appender, Substitution, deprecate_kwarg from pandas.util._validators import validate_bool_kwarg @@ -4413,6 +4414,34 @@ def _clip_with_scalar(self, lower, upper, inplace=False): else: return result + def _clip_with_one_bound(self, threshold, method, axis, inplace): + + inplace = validate_bool_kwarg(inplace, 'inplace') + if axis is not None: + axis = self._get_axis_number(axis) + + if np.any(isnull(threshold)): + raise ValueError("Cannot use an NA value as a clip threshold") + + # method is self.le for upper bound and self.ge for lower bound + if is_scalar(threshold) and is_number(threshold): + if method.__name__ == 'le': + return self._clip_with_scalar(None, threshold, inplace=inplace) + return self._clip_with_scalar(threshold, None, inplace=inplace) + + subset = method(threshold, axis=axis) | isnull(self) + + # GH #15390 + # In order for where method to work, the threshold must + # be transformed to NDFrame from other array like structure. + if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold): + if isinstance(self, ABCSeries): + threshold = pd.Series(threshold, index=self.index) + else: + threshold = _align_method_FRAME(self, np.asarray(threshold), + axis) + return self.where(subset, threshold, axis=axis, inplace=inplace) + def clip(self, lower=None, upper=None, axis=None, inplace=False, *args, **kwargs): """ @@ -4515,16 +4544,8 @@ def clip_upper(self, threshold, axis=None, inplace=False): ------- clipped : same type as input """ - if np.any(isnull(threshold)): - raise ValueError("Cannot use an NA value as a clip threshold") - - if is_scalar(threshold) and is_number(threshold): - return self._clip_with_scalar(None, threshold, inplace=inplace) - - inplace = validate_bool_kwarg(inplace, 'inplace') - - subset = self.le(threshold, axis=axis) | isnull(self) - return self.where(subset, threshold, axis=axis, inplace=inplace) + return self._clip_with_one_bound(threshold, method=self.le, + axis=axis, inplace=inplace) def clip_lower(self, threshold, axis=None, inplace=False): """ @@ -4547,16 +4568,8 @@ def clip_lower(self, threshold, axis=None, inplace=False): ------- clipped : same type as input """ - if np.any(isnull(threshold)): - raise ValueError("Cannot use an NA value as a clip threshold") - - if is_scalar(threshold) and is_number(threshold): - return self._clip_with_scalar(threshold, None, inplace=inplace) - - inplace = validate_bool_kwarg(inplace, 'inplace') - - subset = self.ge(threshold, axis=axis) | isnull(self) - return self.where(subset, threshold, axis=axis, inplace=inplace) + return self._clip_with_one_bound(threshold, method=self.ge, + axis=axis, inplace=inplace) def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, **kwargs): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 943a93b27a78a..b09325bfa2ddc 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1892,12 +1892,33 @@ def test_clip_against_series(self, inplace): tm.assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i]) - def test_clip_against_frame(self): + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("lower", [[2, 3, 4], np.asarray([2, 3, 4])]) + @pytest.mark.parametrize("axis,res", [ + (0, [[2., 2., 3.], [4., 5., 6.], [7., 7., 7.]]), + (1, [[2., 3., 4.], [4., 5., 6.], [5., 6., 7.]]) + ]) + def test_clip_against_list_like(self, inplace, lower, axis, res): + # GH #15390 + original = self.simple.copy(deep=True) + + result = original.clip(lower=lower, upper=[5, 6, 7], + axis=axis, inplace=inplace) + + expected = pd.DataFrame(res, + columns=original.columns, + index=original.index) + if inplace: + result = original + tm.assert_frame_equal(result, expected, check_exact=True) + + @pytest.mark.parametrize("axis", [0, 1, None]) + def test_clip_against_frame(self, axis): df = DataFrame(np.random.randn(1000, 2)) lb = DataFrame(np.random.randn(1000, 2)) ub = lb + 1 - clipped_df = df.clip(lb, ub) + clipped_df = df.clip(lb, ub, axis=axis) lb_mask = df <= lb ub_mask = df >= ub diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 18c6c9a6dd021..749af1c56a7f0 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1015,6 +1015,18 @@ def test_clip_against_series(self): assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5])) assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5])) + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("upper", [[1, 2, 3], np.asarray([1, 2, 3])]) + def test_clip_against_list_like(self, inplace, upper): + # GH #15390 + original = pd.Series([5, 6, 7]) + result = original.clip(upper=upper, inplace=inplace) + expected = pd.Series([1, 2, 3]) + + if inplace: + result = original + tm.assert_series_equal(result, expected, check_exact=True) + def test_clip_with_datetimes(self): # GH 11838