Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Frame ops prelims - de-duplicate, remove unused kwargs #19522

Merged
merged 8 commits into from
Feb 7, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,7 @@ Numeric
- Bug in :class:`Index` multiplication and division methods where operating with a ``Series`` would return an ``Index`` object instead of a ``Series`` object (:issue:`19042`)
- Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`)
- Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`)
- Bug in :class:`DataFrame` flex arithmetic (e.g. `df.add(other, fill_value=foo)`) with a `fill_value` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`)


Indexing
Expand Down
49 changes: 18 additions & 31 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3915,8 +3915,7 @@ def reorder_levels(self, order, axis=0):
# ----------------------------------------------------------------------
# Arithmetic / combination related

def _combine_frame(self, other, func, fill_value=None, level=None,
try_cast=True):
def _combine_frame(self, other, func, fill_value=None, level=None):
this, other = self.align(other, join='outer', level=level, copy=False)
new_index, new_columns = this.index, this.columns

Expand Down Expand Up @@ -3968,52 +3967,40 @@ def f(i):

def _combine_series(self, other, func, fill_value=None, axis=None,
level=None, try_cast=True):
if fill_value is not None:
raise NotImplementedError("fill_value {fill} not supported."
.format(fill=fill_value))

if axis is not None:
axis = self._get_axis_name(axis)
if axis == 'index':
return self._combine_match_index(other, func, level=level,
fill_value=fill_value,
try_cast=try_cast)
return self._combine_match_index(other, func, level=level)
else:
return self._combine_match_columns(other, func, level=level,
fill_value=fill_value,
try_cast=try_cast)
return self._combine_series_infer(other, func, level=level,
fill_value=fill_value,
try_cast=try_cast)

def _combine_series_infer(self, other, func, level=None,
fill_value=None, try_cast=True):
if len(other) == 0:
return self * np.nan
else:
if not len(other):
return self * np.nan

if len(self) == 0:
# Ambiguous case, use _series so works with DataFrame
return self._constructor(data=self._series, index=self.index,
columns=self.columns)
if not len(self):
# Ambiguous case, use _series so works with DataFrame
return self._constructor(data=self._series, index=self.index,
columns=self.columns)

return self._combine_match_columns(other, func, level=level,
fill_value=fill_value,
try_cast=try_cast)
# default axis is columns
return self._combine_match_columns(other, func, level=level,
try_cast=try_cast)

def _combine_match_index(self, other, func, level=None,
fill_value=None, try_cast=True):
def _combine_match_index(self, other, func, level=None):
left, right = self.align(other, join='outer', axis=0, level=level,
copy=False)
if fill_value is not None:
raise NotImplementedError("fill_value %r not supported." %
fill_value)
return self._constructor(func(left.values.T, right.values).T,
index=left.index, columns=self.columns,
copy=False)

def _combine_match_columns(self, other, func, level=None,
fill_value=None, try_cast=True):
def _combine_match_columns(self, other, func, level=None, try_cast=True):
left, right = self.align(other, join='outer', axis=1, level=level,
copy=False)
if fill_value is not None:
raise NotImplementedError("fill_value %r not supported" %
fill_value)

new_data = left._data.eval(func=func, other=right,
axes=[left.columns, self.index],
Expand Down
47 changes: 20 additions & 27 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,26 @@ def _try_get_item(x):
return x


def _make_invalid_op(name):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

consider moving this function to core/ops (and parameterizing so you can use it for index/series)

"""
Return a binary method that always raises a TypeError.

Parameters
----------
name : str

Returns
-------
invalid_op : function
"""
def invalid_op(self, other=None):
raise TypeError("cannot perform {name} with this index type: "
"{typ}".format(name=name, typ=type(self)))

invalid_op.__name__ = name
return invalid_op


class InvalidIndexError(Exception):
pass

Expand Down Expand Up @@ -3916,30 +3936,12 @@ def _evaluate_compare(self, other):
@classmethod
def _add_numeric_methods_add_sub_disabled(cls):
""" add in the numeric add/sub methods to disable """

def _make_invalid_op(name):
def invalid_op(self, other=None):
raise TypeError("cannot perform {name} with this index type: "
"{typ}".format(name=name, typ=type(self)))

invalid_op.__name__ = name
return invalid_op

cls.__add__ = cls.__radd__ = __iadd__ = _make_invalid_op('__add__') # noqa
cls.__sub__ = __isub__ = _make_invalid_op('__sub__') # noqa

@classmethod
def _add_numeric_methods_disabled(cls):
""" add in numeric methods to disable other than add/sub """

def _make_invalid_op(name):
def invalid_op(self, other=None):
raise TypeError("cannot perform {name} with this index type: "
"{typ}".format(name=name, typ=type(self)))

invalid_op.__name__ = name
return invalid_op

cls.__pow__ = cls.__rpow__ = _make_invalid_op('__pow__')
cls.__mul__ = cls.__rmul__ = _make_invalid_op('__mul__')
cls.__floordiv__ = cls.__rfloordiv__ = _make_invalid_op('__floordiv__')
Expand Down Expand Up @@ -4147,15 +4149,6 @@ def logical_func(self, *args, **kwargs):
@classmethod
def _add_logical_methods_disabled(cls):
""" add in logical methods to disable """

def _make_invalid_op(name):
def invalid_op(self, other=None):
raise TypeError("cannot perform {name} with this index type: "
"{typ}".format(name=name, typ=type(self)))

invalid_op.__name__ = name
return invalid_op

cls.all = _make_invalid_op('all')
cls.any = _make_invalid_op('any')

Expand Down
8 changes: 5 additions & 3 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1106,12 +1106,13 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
if isinstance(other, ABCDataFrame): # Another DataFrame
return self._combine_frame(other, na_op, fill_value, level)
elif isinstance(other, ABCSeries):
return self._combine_series(other, na_op, fill_value, axis, level)
return self._combine_series(other, na_op, fill_value, axis, level,
try_cast=True)
else:
if fill_value is not None:
self = self.fillna(fill_value)

return self._combine_const(other, na_op)
return self._combine_const(other, na_op, try_cast=True)

f.__name__ = name

Expand Down Expand Up @@ -1172,7 +1173,8 @@ def f(self, other):
if isinstance(other, ABCDataFrame): # Another DataFrame
return self._compare_frame(other, func, str_rep)
elif isinstance(other, ABCSeries):
return self._combine_series_infer(other, func, try_cast=False)
return self._combine_series(other, func,
axis=None, try_cast=False)
else:

# straight boolean comparisons we want to allow all columns
Expand Down
14 changes: 4 additions & 10 deletions pandas/core/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,8 +540,7 @@ def xs(self, key, axis=0, copy=False):
# ----------------------------------------------------------------------
# Arithmetic-related methods

def _combine_frame(self, other, func, fill_value=None, level=None,
try_cast=True):
def _combine_frame(self, other, func, fill_value=None, level=None):
this, other = self.align(other, join='outer', level=level, copy=False)
new_index, new_columns = this.index, this.columns

Expand Down Expand Up @@ -584,12 +583,9 @@ def _combine_frame(self, other, func, fill_value=None, level=None,
default_fill_value=new_fill_value
).__finalize__(self)

def _combine_match_index(self, other, func, level=None, fill_value=None,
try_cast=True):
def _combine_match_index(self, other, func, level=None):
new_data = {}

if fill_value is not None:
raise NotImplementedError("'fill_value' argument is not supported")
if level is not None:
raise NotImplementedError("'level' argument is not supported")

Expand All @@ -605,6 +601,7 @@ def _combine_match_index(self, other, func, level=None, fill_value=None,
new_data[col] = func(series.values, other.values)

# fill_value is a function of our operator
fill_value = None
if isna(other.fill_value) or isna(self.default_fill_value):
fill_value = np.nan
else:
Expand All @@ -615,15 +612,12 @@ def _combine_match_index(self, other, func, level=None, fill_value=None,
new_data, index=new_index, columns=self.columns,
default_fill_value=fill_value).__finalize__(self)

def _combine_match_columns(self, other, func, level=None, fill_value=None,
try_cast=True):
def _combine_match_columns(self, other, func, level=None, try_cast=True):
# patched version of DataFrame._combine_match_columns to account for
# NumPy circumventing __rsub__ with float64 types, e.g.: 3.0 - series,
# where 3.0 is numpy.float64 and series is a SparseSeries. Still
# possible for this to happen, which is bothersome

if fill_value is not None:
raise NotImplementedError("'fill_value' argument is not supported")
if level is not None:
raise NotImplementedError("'level' argument is not supported")

Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/frame/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,19 @@ def test_arith_flex_frame(self):
with tm.assert_raises_regex(NotImplementedError, 'fill_value'):
self.frame.add(self.frame.iloc[0], axis='index', fill_value=3)

def test_arith_flex_zero_len_raises(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't these be in test_arithmetic?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eventually yes. The more important grouping for now is that this belongs with the test above it, which I plan to split/parametrize before moving to test_arithmetic

# GH#19522 passing fill_value to frame flex arith methods should
# raise even in the zero-length special cases
ser_len0 = pd.Series([])
df_len0 = pd.DataFrame([], columns=['A', 'B'])
df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])

with tm.assert_raises_regex(NotImplementedError, 'fill_value'):
df.add(ser_len0, fill_value='E')

with tm.assert_raises_regex(NotImplementedError, 'fill_value'):
df_len0.sub(df['A'], axis=None, fill_value=3)

def test_binary_ops_align(self):

# test aligning binary ops
Expand Down