diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index e7b2fc5a6505d..4027edd6eb9eb 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -247,7 +247,7 @@ Deprecations - ``Index.repeat()`` and ``MultiIndex.repeat()`` have deprecated the ``n`` parameter in favor of ``repeats`` (:issue:`12662`) - ``Categorical.searchsorted()`` and ``Series.searchsorted()`` have deprecated the ``v`` parameter in favor of ``value`` (:issue:`12662`) - ``TimedeltaIndex.searchsorted()``, ``DatetimeIndex.searchsorted()``, and ``PeriodIndex.searchsorted()`` have deprecated the ``key`` parameter in favor of ``value`` (:issue:`12662`) - +- ``DataFrame.astype()`` has deprecated the ``raise_on_error`` parameter in favor of ``errors`` (:issue:`14878`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3678168890444..cd4b95ad48e0d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3061,7 +3061,9 @@ def blocks(self): """Internal property, property synonym for as_blocks()""" return self.as_blocks() - def astype(self, dtype, copy=True, raise_on_error=True, **kwargs): + @deprecate_kwarg(old_arg_name='raise_on_error', new_arg_name='errors', + mapping={True: 'raise', False: 'ignore'}) + def astype(self, dtype, copy=True, errors='raise', **kwargs): """ Cast object to input numpy.dtype Return a copy when copy = True (be really careful with this!) @@ -3073,7 +3075,15 @@ def astype(self, dtype, copy=True, raise_on_error=True, **kwargs): the same type. Alternatively, use {col: dtype, ...}, where col is a column label and dtype is a numpy.dtype or Python type to cast one or more of the DataFrame's columns to column-specific types. - raise_on_error : raise on invalid input + errors : {'raise', 'ignore'}, default 'raise'. + Control raising of exceptions on invalid data for provided dtype. + + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object + + .. versionadded:: 0.20.0 + + raise_on_error : DEPRECATED use ``errors`` instead kwargs : keyword arguments to pass on to the constructor Returns @@ -3086,7 +3096,7 @@ def astype(self, dtype, copy=True, raise_on_error=True, **kwargs): raise KeyError('Only the Series name can be used for ' 'the key in Series dtype mappings.') new_type = list(dtype.values())[0] - return self.astype(new_type, copy, raise_on_error, **kwargs) + return self.astype(new_type, copy, errors, **kwargs) elif self.ndim > 2: raise NotImplementedError( 'astype() only accepts a dtype arg of type dict when ' @@ -3107,8 +3117,8 @@ def astype(self, dtype, copy=True, raise_on_error=True, **kwargs): return concat(results, axis=1, copy=False) # else, only a single dtype is given - new_data = self._data.astype(dtype=dtype, copy=copy, - raise_on_error=raise_on_error, **kwargs) + new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors, + **kwargs) return self._constructor(new_data).__finalize__(self) def copy(self, deep=True): diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 05ac3356c1770..aa865ae430d4a 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -455,17 +455,23 @@ def downcast(self, dtypes=None, mgr=None): return blocks - def astype(self, dtype, copy=False, raise_on_error=True, values=None, - **kwargs): - return self._astype(dtype, copy=copy, raise_on_error=raise_on_error, - values=values, **kwargs) + def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs): + return self._astype(dtype, copy=copy, errors=errors, values=values, + **kwargs) - def _astype(self, dtype, copy=False, raise_on_error=True, values=None, + def _astype(self, dtype, copy=False, errors='raise', values=None, klass=None, mgr=None, **kwargs): """ Coerce to the new type (if copy=True, return a new copy) raise on an except if raise == True """ + errors_legal_values = ('raise', 'ignore') + + if errors not in errors_legal_values: + invalid_arg = ("Expected value of kwarg 'errors' to be one of {}. " + "Supplied value is '{}'".format( + list(errors_legal_values), errors)) + raise ValueError(invalid_arg) # may need to convert to categorical # this is only called for non-categoricals @@ -507,7 +513,7 @@ def _astype(self, dtype, copy=False, raise_on_error=True, values=None, newb = make_block(values, placement=self.mgr_locs, dtype=dtype, klass=klass) except: - if raise_on_error is True: + if errors == 'raise': raise newb = self.copy() if copy else self @@ -2147,7 +2153,7 @@ def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None): return self.make_block_same_class(new_values, new_mgr_locs) - def _astype(self, dtype, copy=False, raise_on_error=True, values=None, + def _astype(self, dtype, copy=False, errors='raise', values=None, klass=None, mgr=None): """ Coerce to the new type (if copy=True, return a new copy) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 43a108e9acc80..95c5e7ea6e9fc 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -357,7 +357,7 @@ def test_astype_with_exclude_string(self): df = self.frame.copy() expected = self.frame.astype(int) df['string'] = 'foo' - casted = df.astype(int, raise_on_error=False) + casted = df.astype(int, errors='ignore') expected['string'] = 'foo' assert_frame_equal(casted, expected) @@ -365,7 +365,7 @@ def test_astype_with_exclude_string(self): df = self.frame.copy() expected = self.frame.astype(np.int32) df['string'] = 'foo' - casted = df.astype(np.int32, raise_on_error=False) + casted = df.astype(np.int32, errors='ignore') expected['string'] = 'foo' assert_frame_equal(casted, expected) @@ -523,6 +523,19 @@ def test_timedeltas(self): result = df.get_dtype_counts().sort_values() assert_series_equal(result, expected) + def test_arg_for_errors_in_astype(self): + # issue #14878 + + df = DataFrame([1, 2, 3]) + + with self.assertRaises(ValueError): + df.astype(np.float64, errors=True) + + with tm.assert_produces_warning(FutureWarning): + df.astype(np.int8, raise_on_error=False) + + df.astype(np.int8, errors='ignore') + class TestDataFrameDatetimeWithTZ(tm.TestCase, TestData): diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 3eafbaf912797..bf9c64276b693 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -168,3 +168,16 @@ def test_complexx(self): b.real = np.arange(5) + 5 tm.assert_numpy_array_equal(a + 5, b.real) tm.assert_numpy_array_equal(4 * a, b.imag) + + def test_arg_for_errors_in_astype(self): + # issue #14878 + + sr = Series([1, 2, 3]) + + with self.assertRaises(ValueError): + sr.astype(np.float64, errors=False) + + with tm.assert_produces_warning(FutureWarning): + sr.astype(np.int8, raise_on_error=True) + + sr.astype(np.int8, errors='raise') diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index db1c8da4cae73..32e8f44e6f258 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -553,7 +553,7 @@ def test_astype(self): 'e: f4; f: f2; g: f8') for t in ['float16', 'float32', 'float64', 'int32', 'int64']: t = np.dtype(t) - tmgr = mgr.astype(t, raise_on_error=False) + tmgr = mgr.astype(t, errors='ignore') self.assertEqual(tmgr.get('c').dtype.type, t) self.assertEqual(tmgr.get('e').dtype.type, t) self.assertEqual(tmgr.get('f').dtype.type, t)