Skip to content

Commit

Permalink
BUG: clip dataframe column-wise #15390 (#16504)
Browse files Browse the repository at this point in the history
  • Loading branch information
yl2526 authored and jreback committed Jul 1, 2017
1 parent 9462379 commit e5fd3e0
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 22 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ Reshaping

Numeric
^^^^^^^
- Bug in ``.clip()`` with ``axis=1`` and a list-like for ``threshold`` is passed; previously this raised ``ValueError`` (:issue:`15390`)


Categorical
Expand Down
53 changes: 33 additions & 20 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
from pandas.compat.numpy import function as nv
from pandas.compat import (map, zip, lzip, lrange, string_types,
isidentifier, set_function_name, cPickle as pkl)
from pandas.core.ops import _align_method_FRAME
import pandas.core.nanops as nanops
from pandas.util._decorators import Appender, Substitution, deprecate_kwarg
from pandas.util._validators import validate_bool_kwarg
Expand Down Expand Up @@ -4413,6 +4414,34 @@ def _clip_with_scalar(self, lower, upper, inplace=False):
else:
return result

def _clip_with_one_bound(self, threshold, method, axis, inplace):

inplace = validate_bool_kwarg(inplace, 'inplace')
if axis is not None:
axis = self._get_axis_number(axis)

if np.any(isnull(threshold)):
raise ValueError("Cannot use an NA value as a clip threshold")

# method is self.le for upper bound and self.ge for lower bound
if is_scalar(threshold) and is_number(threshold):
if method.__name__ == 'le':
return self._clip_with_scalar(None, threshold, inplace=inplace)
return self._clip_with_scalar(threshold, None, inplace=inplace)

subset = method(threshold, axis=axis) | isnull(self)

# GH #15390
# In order for where method to work, the threshold must
# be transformed to NDFrame from other array like structure.
if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold):
if isinstance(self, ABCSeries):
threshold = pd.Series(threshold, index=self.index)
else:
threshold = _align_method_FRAME(self, np.asarray(threshold),
axis)
return self.where(subset, threshold, axis=axis, inplace=inplace)

def clip(self, lower=None, upper=None, axis=None, inplace=False,
*args, **kwargs):
"""
Expand Down Expand Up @@ -4515,16 +4544,8 @@ def clip_upper(self, threshold, axis=None, inplace=False):
-------
clipped : same type as input
"""
if np.any(isnull(threshold)):
raise ValueError("Cannot use an NA value as a clip threshold")

if is_scalar(threshold) and is_number(threshold):
return self._clip_with_scalar(None, threshold, inplace=inplace)

inplace = validate_bool_kwarg(inplace, 'inplace')

subset = self.le(threshold, axis=axis) | isnull(self)
return self.where(subset, threshold, axis=axis, inplace=inplace)
return self._clip_with_one_bound(threshold, method=self.le,
axis=axis, inplace=inplace)

def clip_lower(self, threshold, axis=None, inplace=False):
"""
Expand All @@ -4547,16 +4568,8 @@ def clip_lower(self, threshold, axis=None, inplace=False):
-------
clipped : same type as input
"""
if np.any(isnull(threshold)):
raise ValueError("Cannot use an NA value as a clip threshold")

if is_scalar(threshold) and is_number(threshold):
return self._clip_with_scalar(threshold, None, inplace=inplace)

inplace = validate_bool_kwarg(inplace, 'inplace')

subset = self.ge(threshold, axis=axis) | isnull(self)
return self.where(subset, threshold, axis=axis, inplace=inplace)
return self._clip_with_one_bound(threshold, method=self.ge,
axis=axis, inplace=inplace)

def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
group_keys=True, squeeze=False, **kwargs):
Expand Down
25 changes: 23 additions & 2 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1892,12 +1892,33 @@ def test_clip_against_series(self, inplace):

tm.assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i])

def test_clip_against_frame(self):
@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize("lower", [[2, 3, 4], np.asarray([2, 3, 4])])
@pytest.mark.parametrize("axis,res", [
(0, [[2., 2., 3.], [4., 5., 6.], [7., 7., 7.]]),
(1, [[2., 3., 4.], [4., 5., 6.], [5., 6., 7.]])
])
def test_clip_against_list_like(self, inplace, lower, axis, res):
# GH #15390
original = self.simple.copy(deep=True)

result = original.clip(lower=lower, upper=[5, 6, 7],
axis=axis, inplace=inplace)

expected = pd.DataFrame(res,
columns=original.columns,
index=original.index)
if inplace:
result = original
tm.assert_frame_equal(result, expected, check_exact=True)

@pytest.mark.parametrize("axis", [0, 1, None])
def test_clip_against_frame(self, axis):
df = DataFrame(np.random.randn(1000, 2))
lb = DataFrame(np.random.randn(1000, 2))
ub = lb + 1

clipped_df = df.clip(lb, ub)
clipped_df = df.clip(lb, ub, axis=axis)

lb_mask = df <= lb
ub_mask = df >= ub
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/series/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1015,6 +1015,18 @@ def test_clip_against_series(self):
assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5]))
assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5]))

@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize("upper", [[1, 2, 3], np.asarray([1, 2, 3])])
def test_clip_against_list_like(self, inplace, upper):
# GH #15390
original = pd.Series([5, 6, 7])
result = original.clip(upper=upper, inplace=inplace)
expected = pd.Series([1, 2, 3])

if inplace:
result = original
tm.assert_series_equal(result, expected, check_exact=True)

def test_clip_with_datetimes(self):

# GH 11838
Expand Down

0 comments on commit e5fd3e0

Please sign in to comment.