From 91c2874085061fccbfffc59e36677c570c77ce1c Mon Sep 17 00:00:00 2001 From: yl2526 Date: Thu, 25 May 2017 15:34:42 -0400 Subject: [PATCH 01/10] fix conflict from clip inplace --- pandas/core/generic.py | 57 +++++++++++++++++---------- pandas/tests/frame/test_analytics.py | 19 +++++++++ pandas/tests/series/test_analytics.py | 12 ++++++ 3 files changed, 68 insertions(+), 20 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index db19d9354ec4d..904693259a7f6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4413,6 +4413,39 @@ def _clip_with_scalar(self, lower, upper, inplace=False): else: return result + def _clip_with_one_bound(self, threshold, method, axis, inplace): + + if np.any(isnull(threshold)): + raise ValueError("Cannot use an NA value as a clip threshold") + + # method is self.le for upper bound and self.ge for lower bound + if is_scalar(threshold) and is_number(threshold): + if method.__name__ == 'le': + return self._clip_with_scalar(None, threshold, inplace=inplace) + else: + return self._clip_with_scalar(threshold, None, inplace=inplace) + + inplace = validate_bool_kwarg(inplace, 'inplace') + + subset = method(threshold, axis=axis) | isnull(self) + + # GH #15390 + if is_scalar(threshold) or is_number(threshold): + return self.where(subset, threshold, axis=axis, inplace=inplace) + + # For arry_like threshold, convet it to Series with corret index + # `where` only takes + try: + if isinstance(subset, ABCSeries): + threshold = pd.Series(threshold, index=subset.index) + elif axis == 0: + threshold = pd.Series(threshold, index=subset.index) + else: + threshold = pd.Series(threshold, index=subset.columns) + finally: + return self.where(subset, threshold, axis=axis, inplace=inplace) + + def clip(self, lower=None, upper=None, axis=None, inplace=False, *args, **kwargs): """ @@ -4515,16 +4548,8 @@ def clip_upper(self, threshold, axis=None, inplace=False): ------- clipped : same type as input """ - if np.any(isnull(threshold)): - raise ValueError("Cannot use an NA value as a clip threshold") - - if is_scalar(threshold) and is_number(threshold): - return self._clip_with_scalar(None, threshold, inplace=inplace) - - inplace = validate_bool_kwarg(inplace, 'inplace') - - subset = self.le(threshold, axis=axis) | isnull(self) - return self.where(subset, threshold, axis=axis, inplace=inplace) + return self._clip_with_one_bound(threshold, method=self.le, + axis=axis, inplace=inplace) def clip_lower(self, threshold, axis=None, inplace=False): """ @@ -4547,16 +4572,8 @@ def clip_lower(self, threshold, axis=None, inplace=False): ------- clipped : same type as input """ - if np.any(isnull(threshold)): - raise ValueError("Cannot use an NA value as a clip threshold") - - if is_scalar(threshold) and is_number(threshold): - return self._clip_with_scalar(threshold, None, inplace=inplace) - - inplace = validate_bool_kwarg(inplace, 'inplace') - - subset = self.ge(threshold, axis=axis) | isnull(self) - return self.where(subset, threshold, axis=axis, inplace=inplace) + return self._clip_with_one_bound(threshold, method=self.ge, + axis=axis, inplace=inplace) def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, **kwargs): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 943a93b27a78a..37a1c33df97a8 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1892,6 +1892,25 @@ def test_clip_against_series(self, inplace): tm.assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i]) + @pytest.mark.parametrize("inplace", [True, False]) + def test_clip_against_list(self, inplace): + # GH #15390 + original = self.simple + + result = original.clip(lower=[2, 3, 4], upper=[5, 6, 7], + axis=1, inplace=inplace) + + arr = np.array([[2., 3., 4.], + [4., 5., 6.], + [5., 6., 7.]]) + expected = pd.DataFrame(arr, + columns=original.columns, + index=original.index) + if inplace: + tm.assert_frame_equal(original, expected, check_exact=True) + else: + tm.assert_frame_equal(result, expected, check_exact=True) + def test_clip_against_frame(self): df = DataFrame(np.random.randn(1000, 2)) lb = DataFrame(np.random.randn(1000, 2)) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 18c6c9a6dd021..9a89744a9aaba 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1015,6 +1015,18 @@ def test_clip_against_series(self): assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5])) assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5])) + @pytest.mark.parametrize("inplace", [True, False]) + def test_clip_against_list(self, inplace): + # GH #15390 + original = pd.Series([5, 6, 7]) + result = original.clip(upper=[1, 2, 3], inplace=inplace) + expected = pd.Series([1, 2, 3]) + + if inplace: + tm.assert_series_equal(original, expected, check_exact=True) + else: + tm.assert_series_equal(result, expected, check_exact=True) + def test_clip_with_datetimes(self): # GH 11838 From ef2dc7628b64d1d3438126f2e32c67d3a74925e7 Mon Sep 17 00:00:00 2001 From: yl2526 Date: Thu, 25 May 2017 17:29:20 -0400 Subject: [PATCH 02/10] fix lint error --- pandas/core/generic.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 904693259a7f6..484eecb2ce904 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4420,7 +4420,7 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): # method is self.le for upper bound and self.ge for lower bound if is_scalar(threshold) and is_number(threshold): - if method.__name__ == 'le': + if method.__name__ == 'le': return self._clip_with_scalar(None, threshold, inplace=inplace) else: return self._clip_with_scalar(threshold, None, inplace=inplace) @@ -4445,7 +4445,6 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): finally: return self.where(subset, threshold, axis=axis, inplace=inplace) - def clip(self, lower=None, upper=None, axis=None, inplace=False, *args, **kwargs): """ @@ -4549,7 +4548,7 @@ def clip_upper(self, threshold, axis=None, inplace=False): clipped : same type as input """ return self._clip_with_one_bound(threshold, method=self.le, - axis=axis, inplace=inplace) + axis=axis, inplace=inplace) def clip_lower(self, threshold, axis=None, inplace=False): """ @@ -4573,7 +4572,7 @@ def clip_lower(self, threshold, axis=None, inplace=False): clipped : same type as input """ return self._clip_with_one_bound(threshold, method=self.ge, - axis=axis, inplace=inplace) + axis=axis, inplace=inplace) def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, **kwargs): From e0e0d04fda225d37b74eda6cb95f0984c05a10e7 Mon Sep 17 00:00:00 2001 From: yl2526 Date: Thu, 25 May 2017 19:08:04 -0400 Subject: [PATCH 03/10] change inplace check position --- pandas/core/generic.py | 11 +++++------ pandas/tests/series/test_analytics.py | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 484eecb2ce904..a80998aff0e7c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4415,6 +4415,8 @@ def _clip_with_scalar(self, lower, upper, inplace=False): def _clip_with_one_bound(self, threshold, method, axis, inplace): + inplace = validate_bool_kwarg(inplace, 'inplace') + if np.any(isnull(threshold)): raise ValueError("Cannot use an NA value as a clip threshold") @@ -4422,19 +4424,16 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): if is_scalar(threshold) and is_number(threshold): if method.__name__ == 'le': return self._clip_with_scalar(None, threshold, inplace=inplace) - else: - return self._clip_with_scalar(threshold, None, inplace=inplace) - - inplace = validate_bool_kwarg(inplace, 'inplace') + return self._clip_with_scalar(threshold, None, inplace=inplace) subset = method(threshold, axis=axis) | isnull(self) # GH #15390 - if is_scalar(threshold) or is_number(threshold): + if is_scalar(threshold): return self.where(subset, threshold, axis=axis, inplace=inplace) # For arry_like threshold, convet it to Series with corret index - # `where` only takes + # `where` takes scalar, NDFrame, or callable for argument "other" try: if isinstance(subset, ABCSeries): threshold = pd.Series(threshold, index=subset.index) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 9a89744a9aaba..f1cb7d8454ba7 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1021,7 +1021,7 @@ def test_clip_against_list(self, inplace): original = pd.Series([5, 6, 7]) result = original.clip(upper=[1, 2, 3], inplace=inplace) expected = pd.Series([1, 2, 3]) - + if inplace: tm.assert_series_equal(original, expected, check_exact=True) else: From acc4390640621fa3ad0c39b574f69260a5b40ee0 Mon Sep 17 00:00:00 2001 From: yl2526 Date: Thu, 1 Jun 2017 23:22:21 -0400 Subject: [PATCH 04/10] add to release note --- doc/source/whatsnew/v0.21.0.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5515d093f39e4..be6bf299184f8 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -143,3 +143,5 @@ Categorical Other ^^^^^ + +- Bug in ``.clip()`` now takes list or numpy-array; previously this raised ``ValueError`` (:issue:`15390`) From 7b52f5fefe4e2a6ab7baf16e515ee849147f4a3e Mon Sep 17 00:00:00 2001 From: yl2526 Date: Thu, 1 Jun 2017 23:26:48 -0400 Subject: [PATCH 05/10] change tests --- pandas/tests/frame/test_analytics.py | 7 +++---- pandas/tests/series/test_analytics.py | 5 ++--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 37a1c33df97a8..3e35a95b38faa 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1895,7 +1895,7 @@ def test_clip_against_series(self, inplace): @pytest.mark.parametrize("inplace", [True, False]) def test_clip_against_list(self, inplace): # GH #15390 - original = self.simple + original = self.simple.copy(deep=True) result = original.clip(lower=[2, 3, 4], upper=[5, 6, 7], axis=1, inplace=inplace) @@ -1907,9 +1907,8 @@ def test_clip_against_list(self, inplace): columns=original.columns, index=original.index) if inplace: - tm.assert_frame_equal(original, expected, check_exact=True) - else: - tm.assert_frame_equal(result, expected, check_exact=True) + result = original + tm.assert_frame_equal(result, expected, check_exact=True) def test_clip_against_frame(self): df = DataFrame(np.random.randn(1000, 2)) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index f1cb7d8454ba7..a073feaff2430 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1023,9 +1023,8 @@ def test_clip_against_list(self, inplace): expected = pd.Series([1, 2, 3]) if inplace: - tm.assert_series_equal(original, expected, check_exact=True) - else: - tm.assert_series_equal(result, expected, check_exact=True) + result = original + tm.assert_series_equal(result, expected, check_exact=True) def test_clip_with_datetimes(self): From e54ca22436a4b87b481df4e8a2913b50f901ab2e Mon Sep 17 00:00:00 2001 From: yl2526 Date: Thu, 1 Jun 2017 23:34:49 -0400 Subject: [PATCH 06/10] use np.asarray for threshold --- pandas/core/generic.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a80998aff0e7c..89c9af172b32d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4429,20 +4429,9 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): subset = method(threshold, axis=axis) | isnull(self) # GH #15390 - if is_scalar(threshold): - return self.where(subset, threshold, axis=axis, inplace=inplace) - - # For arry_like threshold, convet it to Series with corret index - # `where` takes scalar, NDFrame, or callable for argument "other" - try: - if isinstance(subset, ABCSeries): - threshold = pd.Series(threshold, index=subset.index) - elif axis == 0: - threshold = pd.Series(threshold, index=subset.index) - else: - threshold = pd.Series(threshold, index=subset.columns) - finally: - return self.where(subset, threshold, axis=axis, inplace=inplace) + if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold): + threshold = np.asarray(threshold) + return self.where(subset, threshold, axis=axis, inplace=inplace) def clip(self, lower=None, upper=None, axis=None, inplace=False, *args, **kwargs): From f66a108f55add3f9c50a418004dc4ac4c6a18523 Mon Sep 17 00:00:00 2001 From: yl2526 Date: Tue, 20 Jun 2017 22:31:22 -0400 Subject: [PATCH 07/10] fix clip by casting to Series --- pandas/core/generic.py | 5 ++++- pandas/tests/frame/test_analytics.py | 16 +++++++++------- pandas/tests/series/test_analytics.py | 5 +++-- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 89c9af172b32d..1917fd354fdfc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4430,7 +4430,10 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): # GH #15390 if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold): - threshold = np.asarray(threshold) + if isinstance(self, ABCSeries) or axis == 0: + threshold = pd.Series(threshold, index=self.index) + elif axis == 1: + threshold = pd.Series(threshold, index=self.columns) return self.where(subset, threshold, axis=axis, inplace=inplace) def clip(self, lower=None, upper=None, axis=None, inplace=False, diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 3e35a95b38faa..149374c780198 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1893,17 +1893,19 @@ def test_clip_against_series(self, inplace): tm.assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i]) @pytest.mark.parametrize("inplace", [True, False]) - def test_clip_against_list(self, inplace): + @pytest.mark.parametrize("lower", [[2, 3, 4], np.asarray([2, 3, 4])]) + @pytest.mark.parametrize("axis,res", [ + (0, [[2., 2., 3.], [4., 5., 6.], [7., 7., 7.]]), + (1, [[2., 3., 4.], [4., 5., 6.], [5., 6., 7.]]) + ]) + def test_clip_against_list_like(self, inplace, lower, axis, res): # GH #15390 original = self.simple.copy(deep=True) - result = original.clip(lower=[2, 3, 4], upper=[5, 6, 7], - axis=1, inplace=inplace) + result = original.clip(lower=lower, upper=[5, 6, 7], + axis=axis, inplace=inplace) - arr = np.array([[2., 3., 4.], - [4., 5., 6.], - [5., 6., 7.]]) - expected = pd.DataFrame(arr, + expected = pd.DataFrame(res, columns=original.columns, index=original.index) if inplace: diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index a073feaff2430..749af1c56a7f0 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1016,10 +1016,11 @@ def test_clip_against_series(self): assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5])) @pytest.mark.parametrize("inplace", [True, False]) - def test_clip_against_list(self, inplace): + @pytest.mark.parametrize("upper", [[1, 2, 3], np.asarray([1, 2, 3])]) + def test_clip_against_list_like(self, inplace, upper): # GH #15390 original = pd.Series([5, 6, 7]) - result = original.clip(upper=[1, 2, 3], inplace=inplace) + result = original.clip(upper=upper, inplace=inplace) expected = pd.Series([1, 2, 3]) if inplace: From c859bb7ddd5165e307d71bdd93baf5d1099362cf Mon Sep 17 00:00:00 2001 From: yl2526 Date: Wed, 21 Jun 2017 22:37:33 -0400 Subject: [PATCH 08/10] fix transform to series logic --- pandas/core/generic.py | 12 +++++++++--- pandas/tests/frame/test_analytics.py | 5 +++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1917fd354fdfc..7d1a8adf381fe 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -52,6 +52,7 @@ from pandas.compat.numpy import function as nv from pandas.compat import (map, zip, lzip, lrange, string_types, isidentifier, set_function_name, cPickle as pkl) +from pandas.core.ops import _align_method_FRAME import pandas.core.nanops as nanops from pandas.util._decorators import Appender, Substitution, deprecate_kwarg from pandas.util._validators import validate_bool_kwarg @@ -4416,6 +4417,8 @@ def _clip_with_scalar(self, lower, upper, inplace=False): def _clip_with_one_bound(self, threshold, method, axis, inplace): inplace = validate_bool_kwarg(inplace, 'inplace') + if axis is not None: + axis = self._get_axis_number(axis) if np.any(isnull(threshold)): raise ValueError("Cannot use an NA value as a clip threshold") @@ -4429,11 +4432,14 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): subset = method(threshold, axis=axis) | isnull(self) # GH #15390 + # In order for where method to work, the threshold must + # be transformed to NDFrame from other array like structure. if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold): - if isinstance(self, ABCSeries) or axis == 0: + if isinstance(self, ABCSeries): threshold = pd.Series(threshold, index=self.index) - elif axis == 1: - threshold = pd.Series(threshold, index=self.columns) + else: + threshold = _align_method_FRAME(self, np.asarray(threshold), + axis) return self.where(subset, threshold, axis=axis, inplace=inplace) def clip(self, lower=None, upper=None, axis=None, inplace=False, diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 149374c780198..b09325bfa2ddc 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1912,12 +1912,13 @@ def test_clip_against_list_like(self, inplace, lower, axis, res): result = original tm.assert_frame_equal(result, expected, check_exact=True) - def test_clip_against_frame(self): + @pytest.mark.parametrize("axis", [0, 1, None]) + def test_clip_against_frame(self, axis): df = DataFrame(np.random.randn(1000, 2)) lb = DataFrame(np.random.randn(1000, 2)) ub = lb + 1 - clipped_df = df.clip(lb, ub) + clipped_df = df.clip(lb, ub, axis=axis) lb_mask = df <= lb ub_mask = df >= ub From c28657678b42fa89ab58a1064fca3b1d6c861825 Mon Sep 17 00:00:00 2001 From: yl2526 Date: Wed, 21 Jun 2017 22:41:02 -0400 Subject: [PATCH 09/10] change whatsnew --- doc/source/whatsnew/v0.21.0.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index be6bf299184f8..f67adcc013c50 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -135,6 +135,7 @@ Reshaping Numeric ^^^^^^^ +- Bug in ``.clip()`` When list-like is passed; previously this raised ``ValueError`` (:issue:`15390`) Categorical @@ -143,5 +144,3 @@ Categorical Other ^^^^^ - -- Bug in ``.clip()`` now takes list or numpy-array; previously this raised ``ValueError`` (:issue:`15390`) From 85429ef0bbc1d1450c85c23c8b42541836a7d27a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 1 Jul 2017 11:40:45 +0300 Subject: [PATCH 10/10] edit whatsnew --- doc/source/whatsnew/v0.21.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f67adcc013c50..ce0d40d327c15 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -135,7 +135,7 @@ Reshaping Numeric ^^^^^^^ -- Bug in ``.clip()`` When list-like is passed; previously this raised ``ValueError`` (:issue:`15390`) +- Bug in ``.clip()`` with ``axis=1`` and a list-like for ``threshold`` is passed; previously this raised ``ValueError`` (:issue:`15390`) Categorical