Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Depr] raise_on_error kwarg with errors kwarg in astype#14878 #14967

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ Deprecations
- ``Index.repeat()`` and ``MultiIndex.repeat()`` have deprecated the ``n`` parameter in favor of ``repeats`` (:issue:`12662`)
- ``Categorical.searchsorted()`` and ``Series.searchsorted()`` have deprecated the ``v`` parameter in favor of ``value`` (:issue:`12662`)
- ``TimedeltaIndex.searchsorted()``, ``DatetimeIndex.searchsorted()``, and ``PeriodIndex.searchsorted()`` have deprecated the ``key`` parameter in favor of ``value`` (:issue:`12662`)

- ``DataFrame.astype()`` has deprecated the ``raise_on_error`` parameter in favor of ``errors`` (:issue:`14878`)



Expand Down
20 changes: 15 additions & 5 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3061,7 +3061,9 @@ def blocks(self):
"""Internal property, property synonym for as_blocks()"""
return self.as_blocks()

def astype(self, dtype, copy=True, raise_on_error=True, **kwargs):
@deprecate_kwarg(old_arg_name='raise_on_error', new_arg_name='errors',
mapping={True: 'raise', False: 'ignore'})
def astype(self, dtype, copy=True, errors='raise', **kwargs):
"""
Cast object to input numpy.dtype
Return a copy when copy = True (be really careful with this!)
Expand All @@ -3073,7 +3075,15 @@ def astype(self, dtype, copy=True, raise_on_error=True, **kwargs):
the same type. Alternatively, use {col: dtype, ...}, where col is a
column label and dtype is a numpy.dtype or Python type to cast one
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

leave this in here and just say DEPRECATED.

@jorisvandenbossche IIRC that is our convention?

add a versionadded tag 0.20.0 for errors

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, can leave it (but put it at the end)

or more of the DataFrame's columns to column-specific types.
raise_on_error : raise on invalid input
errors : {'raise', 'ignore'}, default 'raise'.
Control raising of exceptions on invalid data for provided dtype.

- ``raise`` : allow exceptions to be raised
- ``ignore`` : suppress exceptions. On error return original object

.. versionadded:: 0.20.0

raise_on_error : DEPRECATED use ``errors`` instead
kwargs : keyword arguments to pass on to the constructor
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you need a blank line before kwargs (to make the sub-list work)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've just checked and the sublist is rendered fine with, or without a line between ignore
and kwargs. I can add an extra line if that is the convention,


Returns
Expand All @@ -3086,7 +3096,7 @@ def astype(self, dtype, copy=True, raise_on_error=True, **kwargs):
raise KeyError('Only the Series name can be used for '
'the key in Series dtype mappings.')
new_type = list(dtype.values())[0]
return self.astype(new_type, copy, raise_on_error, **kwargs)
return self.astype(new_type, copy, errors, **kwargs)
elif self.ndim > 2:
raise NotImplementedError(
'astype() only accepts a dtype arg of type dict when '
Expand All @@ -3107,8 +3117,8 @@ def astype(self, dtype, copy=True, raise_on_error=True, **kwargs):
return concat(results, axis=1, copy=False)

# else, only a single dtype is given
new_data = self._data.astype(dtype=dtype, copy=copy,
raise_on_error=raise_on_error, **kwargs)
new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
**kwargs)
return self._constructor(new_data).__finalize__(self)

def copy(self, deep=True):
Expand Down
20 changes: 13 additions & 7 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,17 +455,23 @@ def downcast(self, dtypes=None, mgr=None):

return blocks

def astype(self, dtype, copy=False, raise_on_error=True, values=None,
**kwargs):
return self._astype(dtype, copy=copy, raise_on_error=raise_on_error,
values=values, **kwargs)
def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
return self._astype(dtype, copy=copy, errors=errors, values=values,
**kwargs)

def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
def _astype(self, dtype, copy=False, errors='raise', values=None,
klass=None, mgr=None, **kwargs):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you check the errors in ['raise', 'ignore'] at the beginning of the function and raise a ValueError otherwise (and add a test for this)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, there are two 'public' astype(...) methods:

  • NDFrame.astype(...) in pandas/core/generic.py
  • Block.astype(...) in pandas/core/internals.py

In addition there is a 'protected' Block._astype(...) method in
pandas/core/internals.py. Should I only put the checks in the 'public'
methods?

Bearing in mind that the raise_on_error kwarg is going to be deprecated for
DataFrame.where() and replaced with the errors kwarg it would make sense to
put the code that checks the validity of the arguments in one place. Do we have
any existing code where we put such validity checking functions/methods?

I notice that both NDFrame & Block inherit from PandasObject but, I'm not
sure that this is the correct thing to do. Should I put a function in
pandas/core/base.py?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Block is completely internal
u can put the check there

we have centralized checks but no need in this case

out in _astype as that's where it's actually used
parameter validation is best down where it's actually

"""
Coerce to the new type (if copy=True, return a new copy)
raise on an except if raise == True
"""
errors_legal_values = ('raise', 'ignore')

if errors not in errors_legal_values:
invalid_arg = ("Expected value of kwarg 'errors' to be one of {}. "
"Supplied value is '{}'".format(
list(errors_legal_values), errors))
raise ValueError(invalid_arg)

# may need to convert to categorical
# this is only called for non-categoricals
Expand Down Expand Up @@ -507,7 +513,7 @@ def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
newb = make_block(values, placement=self.mgr_locs, dtype=dtype,
klass=klass)
except:
if raise_on_error is True:
if errors == 'raise':
raise
newb = self.copy() if copy else self

Expand Down Expand Up @@ -2147,7 +2153,7 @@ def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):

return self.make_block_same_class(new_values, new_mgr_locs)

def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
def _astype(self, dtype, copy=False, errors='raise', values=None,
klass=None, mgr=None):
"""
Coerce to the new type (if copy=True, return a new copy)
Expand Down
17 changes: 15 additions & 2 deletions pandas/tests/frame/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,15 +357,15 @@ def test_astype_with_exclude_string(self):
df = self.frame.copy()
expected = self.frame.astype(int)
df['string'] = 'foo'
casted = df.astype(int, raise_on_error=False)
casted = df.astype(int, errors='ignore')

expected['string'] = 'foo'
assert_frame_equal(casted, expected)

df = self.frame.copy()
expected = self.frame.astype(np.int32)
df['string'] = 'foo'
casted = df.astype(np.int32, raise_on_error=False)
casted = df.astype(np.int32, errors='ignore')

expected['string'] = 'foo'
assert_frame_equal(casted, expected)
Expand Down Expand Up @@ -523,6 +523,19 @@ def test_timedeltas(self):
result = df.get_dtype_counts().sort_values()
assert_series_equal(result, expected)

def test_arg_for_errors_in_astype(self):
# issue #14878

df = DataFrame([1, 2, 3])

with self.assertRaises(ValueError):
df.astype(np.float64, errors=True)

with tm.assert_produces_warning(FutureWarning):
df.astype(np.int8, raise_on_error=False)

df.astype(np.int8, errors='ignore')


class TestDataFrameDatetimeWithTZ(tm.TestCase, TestData):

Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/series/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,3 +168,16 @@ def test_complexx(self):
b.real = np.arange(5) + 5
tm.assert_numpy_array_equal(a + 5, b.real)
tm.assert_numpy_array_equal(4 * a, b.imag)

def test_arg_for_errors_in_astype(self):
# issue #14878

sr = Series([1, 2, 3])

with self.assertRaises(ValueError):
sr.astype(np.float64, errors=False)

with tm.assert_produces_warning(FutureWarning):
sr.astype(np.int8, raise_on_error=True)

sr.astype(np.int8, errors='raise')
2 changes: 1 addition & 1 deletion pandas/tests/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ def test_astype(self):
'e: f4; f: f2; g: f8')
for t in ['float16', 'float32', 'float64', 'int32', 'int64']:
t = np.dtype(t)
tmgr = mgr.astype(t, raise_on_error=False)
tmgr = mgr.astype(t, errors='ignore')
self.assertEqual(tmgr.get('c').dtype.type, t)
self.assertEqual(tmgr.get('e').dtype.type, t)
self.assertEqual(tmgr.get('f').dtype.type, t)
Expand Down