Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: clip dataframe column-wise #15390 #16504

Merged
merged 10 commits into from
Jul 1, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ Reshaping

Numeric
^^^^^^^
- Bug in ``.clip()`` with ``axis=1`` and a list-like for ``threshold`` is passed; previously this raised ``ValueError`` (:issue:`15390`)


Categorical
Expand Down
53 changes: 33 additions & 20 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
from pandas.compat.numpy import function as nv
from pandas.compat import (map, zip, lzip, lrange, string_types,
isidentifier, set_function_name, cPickle as pkl)
from pandas.core.ops import _align_method_FRAME
import pandas.core.nanops as nanops
from pandas.util._decorators import Appender, Substitution, deprecate_kwarg
from pandas.util._validators import validate_bool_kwarg
Expand Down Expand Up @@ -4413,6 +4414,34 @@ def _clip_with_scalar(self, lower, upper, inplace=False):
else:
return result

def _clip_with_one_bound(self, threshold, method, axis, inplace):

inplace = validate_bool_kwarg(inplace, 'inplace')
if axis is not None:
axis = self._get_axis_number(axis)

if np.any(isnull(threshold)):
raise ValueError("Cannot use an NA value as a clip threshold")

# method is self.le for upper bound and self.ge for lower bound
if is_scalar(threshold) and is_number(threshold):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

woa. you are adding a LOT of code here. where exactly is the issue?

Copy link
Contributor Author

@yl2526 yl2526 May 25, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I fell like there are multiple lines of common code of clip_upper and clip_lower and this fix will make even more. Thus, I moved them form clip_upper and clip_lower to a new method _clip_with_one_bound. This is my first time to contribute code to a open-source project and I am not sure if I should do this. If I shouldn't, let me know and I should fix it. Thanks.

The issue is that if you pass a list to clip's lower or upper argument, an error will be raised, because it will pass lower and upper to the argument other of where and where's argument other doesn't take like or numpy array.

if method.__name__ == 'le':
return self._clip_with_scalar(None, threshold, inplace=inplace)
return self._clip_with_scalar(threshold, None, inplace=inplace)

subset = method(threshold, axis=axis) | isnull(self)

# GH #15390
# In order for where method to work, the threshold must
# be transformed to NDFrame from other array like structure.
if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold):
if isinstance(self, ABCSeries):
threshold = pd.Series(threshold, index=self.index)
else:
threshold = _align_method_FRAME(self, np.asarray(threshold),
axis)
return self.where(subset, threshold, axis=axis, inplace=inplace)

def clip(self, lower=None, upper=None, axis=None, inplace=False,
*args, **kwargs):
"""
Expand Down Expand Up @@ -4515,16 +4544,8 @@ def clip_upper(self, threshold, axis=None, inplace=False):
-------
clipped : same type as input
"""
if np.any(isnull(threshold)):
raise ValueError("Cannot use an NA value as a clip threshold")

if is_scalar(threshold) and is_number(threshold):
return self._clip_with_scalar(None, threshold, inplace=inplace)

inplace = validate_bool_kwarg(inplace, 'inplace')

subset = self.le(threshold, axis=axis) | isnull(self)
return self.where(subset, threshold, axis=axis, inplace=inplace)
return self._clip_with_one_bound(threshold, method=self.le,
axis=axis, inplace=inplace)

def clip_lower(self, threshold, axis=None, inplace=False):
"""
Expand All @@ -4547,16 +4568,8 @@ def clip_lower(self, threshold, axis=None, inplace=False):
-------
clipped : same type as input
"""
if np.any(isnull(threshold)):
raise ValueError("Cannot use an NA value as a clip threshold")

if is_scalar(threshold) and is_number(threshold):
return self._clip_with_scalar(threshold, None, inplace=inplace)

inplace = validate_bool_kwarg(inplace, 'inplace')

subset = self.ge(threshold, axis=axis) | isnull(self)
return self.where(subset, threshold, axis=axis, inplace=inplace)
return self._clip_with_one_bound(threshold, method=self.ge,
axis=axis, inplace=inplace)

def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
group_keys=True, squeeze=False, **kwargs):
Expand Down
25 changes: 23 additions & 2 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1892,12 +1892,33 @@ def test_clip_against_series(self, inplace):

tm.assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i])

def test_clip_against_frame(self):
@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize("lower", [[2, 3, 4], np.asarray([2, 3, 4])])
@pytest.mark.parametrize("axis,res", [
(0, [[2., 2., 3.], [4., 5., 6.], [7., 7., 7.]]),
(1, [[2., 3., 4.], [4., 5., 6.], [5., 6., 7.]])
])
def test_clip_against_list_like(self, inplace, lower, axis, res):
# GH #15390
original = self.simple.copy(deep=True)

result = original.clip(lower=lower, upper=[5, 6, 7],
axis=axis, inplace=inplace)

expected = pd.DataFrame(res,
columns=original.columns,
index=original.index)
if inplace:
result = original
tm.assert_frame_equal(result, expected, check_exact=True)

@pytest.mark.parametrize("axis", [0, 1, None])
def test_clip_against_frame(self, axis):
df = DataFrame(np.random.randn(1000, 2))
lb = DataFrame(np.random.randn(1000, 2))
ub = lb + 1

clipped_df = df.clip(lb, ub)
clipped_df = df.clip(lb, ub, axis=axis)

lb_mask = df <= lb
ub_mask = df >= ub
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/series/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1015,6 +1015,18 @@ def test_clip_against_series(self):
assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5]))
assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5]))

@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize("upper", [[1, 2, 3], np.asarray([1, 2, 3])])
def test_clip_against_list_like(self, inplace, upper):
# GH #15390
original = pd.Series([5, 6, 7])
result = original.clip(upper=upper, inplace=inplace)
expected = pd.Series([1, 2, 3])

if inplace:
result = original
tm.assert_series_equal(result, expected, check_exact=True)

def test_clip_with_datetimes(self):

# GH 11838
Expand Down