From b7ba887f6433e38766b9c927b38b5a48845cd3d3 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 22 Feb 2023 22:59:42 +0000 Subject: [PATCH 01/10] ENH: Add downcast as method to df and Series --- pandas/core/frame.py | 4 +++ pandas/core/generic.py | 15 ++++++++ pandas/core/internals/blocks.py | 4 +++ pandas/core/internals/managers.py | 3 ++ pandas/core/series.py | 4 +++ pandas/tests/frame/methods/test_downcast.py | 40 +++++++++++++++++++++ 6 files changed, 70 insertions(+) create mode 100644 pandas/tests/frame/methods/test_downcast.py diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8c10e1fdd2d43..3fbf7d036dbc4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -66,6 +66,7 @@ CorrelationMethod, DropKeep, Dtype, + DtypeArg, DtypeObj, FilePath, FillnaOptions, @@ -5475,6 +5476,9 @@ def fillna( downcast=downcast, ) + def downcast(self, dtype: Literal["infer"] | DtypeArg) -> DataFrame: + return super().downcast(dtype) + def pop(self, item: Hashable) -> Series: """ Return item and drop from frame. Raise KeyError if not found. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8e7d31f3f85e1..96eab58ee9f1c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6963,6 +6963,21 @@ def fillna( else: return result.__finalize__(self, method="fillna") + def downcast(self: NDFrameT, dtype: Literal["infer"] | DtypeArg) -> NDFrameT: + if self.ndim == 1: + new_data = self._mgr.downcast(dtype) + return self._constructor(new_data).__finalize__(self, method="downcast") + if isinstance(dtype, dict): + # Don't make an actual copy since setitem does not write into array + result = self.copy(deep=False) + for key, val in dtype.items(): + if key in result.columns: + result[key] = result[key].downcast(dtype=val) + else: + new_data = self._mgr.downcast(dtype) + result = self._constructor(new_data) + return result.__finalize__(self, method="downcast") + @overload def ffill( self: NDFrameT, diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1aba48371b430..3d0f3460c8a90 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -421,6 +421,10 @@ def coerce_to_target_dtype(self, other) -> Block: return self.astype(new_dtype, copy=False) + @final + def downcast(self, dtype: DtypeObj, using_cow: bool = False) -> list[Block]: + return self._maybe_downcast([self], downcast=dtype, using_cow=using_cow) + @final def _maybe_downcast( self, blocks: list[Block], downcast=None, using_cow: bool = False diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index cb32b3bbc6cc7..2cde77ea25c5b 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -429,6 +429,9 @@ def fillna(self: T, value, limit, inplace: bool, downcast) -> T: using_cow=using_copy_on_write(), ) + def downcast(self: T, dtype) -> T: + return self.apply("downcast", dtype=dtype, using_cow=using_copy_on_write()) + def astype(self: T, dtype, copy: bool | None = False, errors: str = "raise") -> T: if copy is None: if using_copy_on_write(): diff --git a/pandas/core/series.py b/pandas/core/series.py index 894c33dd618c6..dc48e747ae9fd 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5069,6 +5069,10 @@ def fillna( downcast=downcast, ) + @doc(NDFrame.downcast, **_shared_doc_kwargs) + def downcast(self, dtype: Literal["infer"] | Dtype) -> Series: + return super().downcast(dtype) + def pop(self, item: Hashable) -> Any: """ Return item and drops from series. Raise KeyError if not found. diff --git a/pandas/tests/frame/methods/test_downcast.py b/pandas/tests/frame/methods/test_downcast.py new file mode 100644 index 0000000000000..49c416483e512 --- /dev/null +++ b/pandas/tests/frame/methods/test_downcast.py @@ -0,0 +1,40 @@ +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +class TestDowncast: + def test_downcast(self): + df = DataFrame({"a": [1.0, 2.0], "b": 1.5, "c": 2}) + result = df.downcast("int8") + expected = DataFrame( + { + "a": Series([1, 2], dtype="int8"), + "b": 1.5, + "c": Series([2, 2], dtype="int8"), + } + ) + tm.assert_frame_equal(result, expected) + + def test_downcast_dict(self): + df = DataFrame({"a": [1.0, 2.0], "b": 1.5, "c": 2, "d": 1.0}) + df_orig = df.copy() + result = df.downcast({"a": "int8", "b": "int64", "c": "int32"}) + expected = DataFrame( + { + "a": Series([1, 2], dtype="int8"), + "b": 1.5, + "c": Series([2, 2], dtype="int32"), + "d": 1.0, + } + ) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(df, df_orig) + + def test_downcast_infer(self): + df = DataFrame({"a": [1.0, 2.0], "b": 1.5, "c": 2.0}) + result = df.downcast("infer") + expected = DataFrame({"a": [1, 2], "b": 1.5, "c": 2}) + tm.assert_frame_equal(result, expected) From bcad0a60531fc4c9793a8ace7d2c3f06881a07b7 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 26 Feb 2023 01:00:55 +0100 Subject: [PATCH 02/10] CoW implementation --- pandas/core/apply.py | 2 +- pandas/core/frame.py | 1 + pandas/core/generic.py | 65 ++++++++++++++++++++++++- pandas/core/internals/blocks.py | 6 ++- pandas/tests/copy_view/test_downcast.py | 44 +++++++++++++++++ 5 files changed, 114 insertions(+), 4 deletions(-) create mode 100644 pandas/tests/copy_view/test_downcast.py diff --git a/pandas/core/apply.py b/pandas/core/apply.py index da049218d5187..5fa860e1c389f 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -261,7 +261,7 @@ def transform_dict_like(self, func): results: dict[Hashable, DataFrame | Series] = {} for name, how in func.items(): colg = obj._gotitem(name, ndim=1) - results[name] = colg.transform(how, 0, *args, **kwargs) + results[name] = colg.transform(how, 0, *args, **kwargs).copy(deep=False) return concat(results, axis=1) def transform_str_or_callable(self, func) -> DataFrame | Series: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3fbf7d036dbc4..96e9d3318b77d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5476,6 +5476,7 @@ def fillna( downcast=downcast, ) + @doc(NDFrame.downcast, **_shared_doc_kwargs) def downcast(self, dtype: Literal["infer"] | DtypeArg) -> DataFrame: return super().downcast(dtype) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 96eab58ee9f1c..84c95d2e7ae8a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6964,15 +6964,76 @@ def fillna( return result.__finalize__(self, method="fillna") def downcast(self: NDFrameT, dtype: Literal["infer"] | DtypeArg) -> NDFrameT: + """Downcasts the columns to an appropriate dtype. + + Possibly casts floats to integers or integers to a smaller dtype from + the same type, e.g. int64 -> int32. + + Parameters + ---------- + dtype: "infer", dtype or dict with column -> dtype + Dtype to cast to or "infer" if the dtype should be + inferred. + + Returns + ------- + {klass} + {klass} with the same shape and converted columns. + + Notes + ----- + The downcasting logic protects against overflows and truncating floats. + If the values don't fit into the specified dtype, the column is ignored. + + Examples + -------- + >>> df = pd.DataFrame({"foo": [1.0, 2.0], "bar": [1.5, 2.5], "baz": [3.0, 4.0]}) + >>> df + foo bar baz + 0 1.0 1.5 3.0 + 1 2.0 2.5 4.0 + + >>> result = df.downcast("int8") + >>> result + foo bar baz + 0 1 1.5 3 + 1 2 2.5 4 + >>> result.dtypes + foo int8 + bar float64 + baz int8 + dtype: object + + >>> df.downcast({"foo": "int64"}) + foo bar baz + 0 1 1.5 3.0 + 1 2 2.5 4.0 + + >>> result = df.downcast("infer") + >>> result + foo bar baz + 0 1 1.5 3 + 1 2 2.5 4 + + >>> result.dtypes + foo int64 + bar float64 + baz int64 + dtype: object + """ if self.ndim == 1: new_data = self._mgr.downcast(dtype) return self._constructor(new_data).__finalize__(self, method="downcast") if isinstance(dtype, dict): # Don't make an actual copy since setitem does not write into array - result = self.copy(deep=False) + if using_copy_on_write(): + result = self.copy(deep=False) + else: + result = self.copy(deep=True) for key, val in dtype.items(): if key in result.columns: - result[key] = result[key].downcast(dtype=val) + res = result[key].downcast(dtype=val) + result[key] = res else: new_data = self._mgr.downcast(dtype) result = self._constructor(new_data) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3d0f3460c8a90..eef27cfc6cfba 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -423,7 +423,11 @@ def coerce_to_target_dtype(self, other) -> Block: @final def downcast(self, dtype: DtypeObj, using_cow: bool = False) -> list[Block]: - return self._maybe_downcast([self], downcast=dtype, using_cow=using_cow) + if not using_cow: + blk = self.copy() + else: + blk = self + return self._maybe_downcast([blk], downcast=dtype, using_cow=using_cow) @final def _maybe_downcast( diff --git a/pandas/tests/copy_view/test_downcast.py b/pandas/tests/copy_view/test_downcast.py new file mode 100644 index 0000000000000..6a2c12af089d7 --- /dev/null +++ b/pandas/tests/copy_view/test_downcast.py @@ -0,0 +1,44 @@ +import numpy as np + +from pandas import DataFrame +import pandas._testing as tm +from pandas.tests.copy_view.util import get_array + + +class TestDowncast: + def test_downcast(self, using_copy_on_write): + df = DataFrame({"a": [1.0, 2.0], "b": 1.5}) + df_orig = df.copy() + result = df.downcast("int8") + + assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) + if using_copy_on_write: + assert np.shares_memory(get_array(df, "b"), get_array(result, "b")) + else: + assert not np.shares_memory(get_array(df, "b"), get_array(result, "b")) + + result.iloc[0, 1] = 100.5 + tm.assert_frame_equal(df, df_orig) + + def test_downcast_dict(self, using_copy_on_write): + df = DataFrame({"a": [1.0, 2.0], "b": 1.5, "c": 2.0}) + df_orig = df.copy() + result = df.downcast({"a": "int8", "b": "int64"}) + + assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) + + if using_copy_on_write: + assert np.shares_memory(get_array(df, "b"), get_array(result, "b")) + assert np.shares_memory(get_array(df, "c"), get_array(result, "c")) + else: + assert not np.shares_memory(get_array(df, "b"), get_array(result, "b")) + assert not np.shares_memory(get_array(df, "c"), get_array(result, "c")) + + result.iloc[0, 1] = 100.5 + tm.assert_frame_equal(df, df_orig) + + def test_downcast_infer(self): + df = DataFrame({"a": [1.0, 2.0], "b": 1.5, "c": 2.0}) + result = df.downcast("infer") + expected = DataFrame({"a": [1, 2], "b": 1.5, "c": 2}) + tm.assert_frame_equal(result, expected) From f6d7ef01f0b4b45ef879649aa1ab87b9044ba967 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 26 Feb 2023 01:08:55 +0100 Subject: [PATCH 03/10] Add whatsnew --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/generic.py | 3 +++ pandas/tests/copy_view/test_downcast.py | 6 ------ 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 45b5c16415f9d..93eee8f12118c 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -28,7 +28,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ -- +- Added :meth:`DataFrame.downcast` and :meth:`Series.downcast` (:issue:`0`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4fb4a841a6845..b4778ce72bb7e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7037,6 +7037,9 @@ def downcast(self: NDFrameT, dtype: Literal["infer"] | DtypeArg) -> NDFrameT: for key, val in dtype.items(): if key in result.columns: res = result[key].downcast(dtype=val) + if res.dtype == result[key].dtype: + # no-op and __setitem__ copies values right now + continue result[key] = res else: new_data = self._mgr.downcast(dtype) diff --git a/pandas/tests/copy_view/test_downcast.py b/pandas/tests/copy_view/test_downcast.py index 6a2c12af089d7..3d2ba326ed179 100644 --- a/pandas/tests/copy_view/test_downcast.py +++ b/pandas/tests/copy_view/test_downcast.py @@ -36,9 +36,3 @@ def test_downcast_dict(self, using_copy_on_write): result.iloc[0, 1] = 100.5 tm.assert_frame_equal(df, df_orig) - - def test_downcast_infer(self): - df = DataFrame({"a": [1.0, 2.0], "b": 1.5, "c": 2.0}) - result = df.downcast("infer") - expected = DataFrame({"a": [1, 2], "b": 1.5, "c": 2}) - tm.assert_frame_equal(result, expected) From bacc5a17b3a2aad11883bdb02b1bd1243a6c1e01 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 26 Feb 2023 01:11:19 +0100 Subject: [PATCH 04/10] Revert --- pandas/core/apply.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 5fa860e1c389f..da049218d5187 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -261,7 +261,7 @@ def transform_dict_like(self, func): results: dict[Hashable, DataFrame | Series] = {} for name, how in func.items(): colg = obj._gotitem(name, ndim=1) - results[name] = colg.transform(how, 0, *args, **kwargs).copy(deep=False) + results[name] = colg.transform(how, 0, *args, **kwargs) return concat(results, axis=1) def transform_str_or_callable(self, func) -> DataFrame | Series: From c44949def986c922cb3c3c700eedcc7514e374d1 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 26 Feb 2023 01:12:06 +0100 Subject: [PATCH 05/10] Revert --- pandas/core/generic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b4778ce72bb7e..bb00b822a48a7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7029,7 +7029,6 @@ def downcast(self: NDFrameT, dtype: Literal["infer"] | DtypeArg) -> NDFrameT: new_data = self._mgr.downcast(dtype) return self._constructor(new_data).__finalize__(self, method="downcast") if isinstance(dtype, dict): - # Don't make an actual copy since setitem does not write into array if using_copy_on_write(): result = self.copy(deep=False) else: From b9850b7f5a1b766aff52ade9442bdab31b8543b0 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 26 Feb 2023 01:13:39 +0100 Subject: [PATCH 06/10] Fix copy --- pandas/core/generic.py | 12 ++++++------ pandas/core/internals/blocks.py | 6 +----- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bb00b822a48a7..8c47251e345d9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7025,14 +7025,14 @@ def downcast(self: NDFrameT, dtype: Literal["infer"] | DtypeArg) -> NDFrameT: baz int64 dtype: object """ + if using_copy_on_write(): + result = self.copy(deep=False) + else: + result = self.copy(deep=True) if self.ndim == 1: - new_data = self._mgr.downcast(dtype) + new_data = result._mgr.downcast(dtype) return self._constructor(new_data).__finalize__(self, method="downcast") if isinstance(dtype, dict): - if using_copy_on_write(): - result = self.copy(deep=False) - else: - result = self.copy(deep=True) for key, val in dtype.items(): if key in result.columns: res = result[key].downcast(dtype=val) @@ -7041,7 +7041,7 @@ def downcast(self: NDFrameT, dtype: Literal["infer"] | DtypeArg) -> NDFrameT: continue result[key] = res else: - new_data = self._mgr.downcast(dtype) + new_data = result._mgr.downcast(dtype) result = self._constructor(new_data) return result.__finalize__(self, method="downcast") diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 97209db1036cd..51c8d97b36c9f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -423,11 +423,7 @@ def coerce_to_target_dtype(self, other) -> Block: @final def downcast(self, dtype: DtypeObj, using_cow: bool = False) -> list[Block]: - if not using_cow: - blk = self.copy() - else: - blk = self - return self._maybe_downcast([blk], downcast=dtype, using_cow=using_cow) + return self._maybe_downcast([self], downcast=dtype, using_cow=using_cow) @final def _maybe_downcast( From e67bd06d0e6e9869fcd6e62581d40d00efa5fa9f Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 26 Feb 2023 01:18:31 +0100 Subject: [PATCH 07/10] Add gh ref --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 93eee8f12118c..49803df7824a8 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -28,7 +28,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ -- Added :meth:`DataFrame.downcast` and :meth:`Series.downcast` (:issue:`0`) +- Added :meth:`DataFrame.downcast` and :meth:`Series.downcast` (:issue:`51641`) - .. --------------------------------------------------------------------------- From 19ccd6619b75db0dc606899bd0c28e628d676c43 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 26 Feb 2023 01:38:14 +0100 Subject: [PATCH 08/10] Fix mypy --- pandas/core/internals/array_manager.py | 3 +++ pandas/core/series.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 87e549ae023ef..03f8826d17807 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -368,6 +368,9 @@ def fillna(self: T, value, limit, inplace: bool, downcast) -> T: "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast ) + def downcast(self: T, dtype) -> T: + return self.apply_with_block("downcast", dtype=dtype) + def astype(self: T, dtype, copy: bool | None = False, errors: str = "raise") -> T: if copy is None: copy = True diff --git a/pandas/core/series.py b/pandas/core/series.py index 362f08f23884d..4c8af4ff11428 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -49,6 +49,7 @@ CorrelationMethod, DropKeep, Dtype, + DtypeArg, DtypeObj, FilePath, FillnaOptions, @@ -5075,7 +5076,7 @@ def fillna( ) @doc(NDFrame.downcast, **_shared_doc_kwargs) - def downcast(self, dtype: Literal["infer"] | Dtype) -> Series: + def downcast(self, dtype: Literal["infer"] | DtypeArg) -> Series: return super().downcast(dtype) def pop(self, item: Hashable) -> Any: From 49fca8d074d3435bfa2e85b81833a0bf55640ec2 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 27 Feb 2023 01:37:48 +0100 Subject: [PATCH 09/10] Remove dtype arg --- pandas/core/frame.py | 5 +-- pandas/core/generic.py | 45 +++------------------ pandas/core/series.py | 5 +-- pandas/tests/copy_view/test_downcast.py | 19 +-------- pandas/tests/frame/methods/test_downcast.py | 38 ++--------------- 5 files changed, 14 insertions(+), 98 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index aaadb88ba3f96..562b9d6269ea9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -66,7 +66,6 @@ CorrelationMethod, DropKeep, Dtype, - DtypeArg, DtypeObj, FilePath, FillnaOptions, @@ -5481,8 +5480,8 @@ def fillna( ) @doc(NDFrame.downcast, **_shared_doc_kwargs) - def downcast(self, dtype: Literal["infer"] | DtypeArg) -> DataFrame: - return super().downcast(dtype) + def downcast(self) -> DataFrame: + return super().downcast() def pop(self, item: Hashable) -> Series: """ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8c47251e345d9..bfc980d01b5fa 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6967,17 +6967,10 @@ def fillna( else: return result.__finalize__(self, method="fillna") - def downcast(self: NDFrameT, dtype: Literal["infer"] | DtypeArg) -> NDFrameT: + def downcast(self: NDFrameT) -> NDFrameT: """Downcasts the columns to an appropriate dtype. - Possibly casts floats to integers or integers to a smaller dtype from - the same type, e.g. int64 -> int32. - - Parameters - ---------- - dtype: "infer", dtype or dict with column -> dtype - Dtype to cast to or "infer" if the dtype should be - inferred. + Possibly casts floats to integers. The dtype is inferred. Returns ------- @@ -6986,7 +6979,7 @@ def downcast(self: NDFrameT, dtype: Literal["infer"] | DtypeArg) -> NDFrameT: Notes ----- - The downcasting logic protects against overflows and truncating floats. + The downcasting logic protects against truncating floats. If the values don't fit into the specified dtype, the column is ignored. Examples @@ -6997,22 +6990,6 @@ def downcast(self: NDFrameT, dtype: Literal["infer"] | DtypeArg) -> NDFrameT: 0 1.0 1.5 3.0 1 2.0 2.5 4.0 - >>> result = df.downcast("int8") - >>> result - foo bar baz - 0 1 1.5 3 - 1 2 2.5 4 - >>> result.dtypes - foo int8 - bar float64 - baz int8 - dtype: object - - >>> df.downcast({"foo": "int64"}) - foo bar baz - 0 1 1.5 3.0 - 1 2 2.5 4.0 - >>> result = df.downcast("infer") >>> result foo bar baz @@ -7029,20 +7006,8 @@ def downcast(self: NDFrameT, dtype: Literal["infer"] | DtypeArg) -> NDFrameT: result = self.copy(deep=False) else: result = self.copy(deep=True) - if self.ndim == 1: - new_data = result._mgr.downcast(dtype) - return self._constructor(new_data).__finalize__(self, method="downcast") - if isinstance(dtype, dict): - for key, val in dtype.items(): - if key in result.columns: - res = result[key].downcast(dtype=val) - if res.dtype == result[key].dtype: - # no-op and __setitem__ copies values right now - continue - result[key] = res - else: - new_data = result._mgr.downcast(dtype) - result = self._constructor(new_data) + new_data = result._mgr.downcast("infer") + result = self._constructor(new_data) return result.__finalize__(self, method="downcast") @overload diff --git a/pandas/core/series.py b/pandas/core/series.py index 4c8af4ff11428..4fbd785879139 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -49,7 +49,6 @@ CorrelationMethod, DropKeep, Dtype, - DtypeArg, DtypeObj, FilePath, FillnaOptions, @@ -5076,8 +5075,8 @@ def fillna( ) @doc(NDFrame.downcast, **_shared_doc_kwargs) - def downcast(self, dtype: Literal["infer"] | DtypeArg) -> Series: - return super().downcast(dtype) + def downcast(self) -> Series: + return super().downcast() def pop(self, item: Hashable) -> Any: """ diff --git a/pandas/tests/copy_view/test_downcast.py b/pandas/tests/copy_view/test_downcast.py index 3d2ba326ed179..b741267d610ed 100644 --- a/pandas/tests/copy_view/test_downcast.py +++ b/pandas/tests/copy_view/test_downcast.py @@ -9,7 +9,7 @@ class TestDowncast: def test_downcast(self, using_copy_on_write): df = DataFrame({"a": [1.0, 2.0], "b": 1.5}) df_orig = df.copy() - result = df.downcast("int8") + result = df.downcast() assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) if using_copy_on_write: @@ -19,20 +19,3 @@ def test_downcast(self, using_copy_on_write): result.iloc[0, 1] = 100.5 tm.assert_frame_equal(df, df_orig) - - def test_downcast_dict(self, using_copy_on_write): - df = DataFrame({"a": [1.0, 2.0], "b": 1.5, "c": 2.0}) - df_orig = df.copy() - result = df.downcast({"a": "int8", "b": "int64"}) - - assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) - - if using_copy_on_write: - assert np.shares_memory(get_array(df, "b"), get_array(result, "b")) - assert np.shares_memory(get_array(df, "c"), get_array(result, "c")) - else: - assert not np.shares_memory(get_array(df, "b"), get_array(result, "b")) - assert not np.shares_memory(get_array(df, "c"), get_array(result, "c")) - - result.iloc[0, 1] = 100.5 - tm.assert_frame_equal(df, df_orig) diff --git a/pandas/tests/frame/methods/test_downcast.py b/pandas/tests/frame/methods/test_downcast.py index 49c416483e512..e1d60e76938e3 100644 --- a/pandas/tests/frame/methods/test_downcast.py +++ b/pandas/tests/frame/methods/test_downcast.py @@ -1,40 +1,10 @@ -from pandas import ( - DataFrame, - Series, -) +from pandas import DataFrame import pandas._testing as tm class TestDowncast: def test_downcast(self): - df = DataFrame({"a": [1.0, 2.0], "b": 1.5, "c": 2}) - result = df.downcast("int8") - expected = DataFrame( - { - "a": Series([1, 2], dtype="int8"), - "b": 1.5, - "c": Series([2, 2], dtype="int8"), - } - ) - tm.assert_frame_equal(result, expected) - - def test_downcast_dict(self): - df = DataFrame({"a": [1.0, 2.0], "b": 1.5, "c": 2, "d": 1.0}) - df_orig = df.copy() - result = df.downcast({"a": "int8", "b": "int64", "c": "int32"}) - expected = DataFrame( - { - "a": Series([1, 2], dtype="int8"), - "b": 1.5, - "c": Series([2, 2], dtype="int32"), - "d": 1.0, - } - ) - tm.assert_frame_equal(result, expected) - tm.assert_frame_equal(df, df_orig) - - def test_downcast_infer(self): - df = DataFrame({"a": [1.0, 2.0], "b": 1.5, "c": 2.0}) - result = df.downcast("infer") - expected = DataFrame({"a": [1, 2], "b": 1.5, "c": 2}) + df = DataFrame({"a": [1.0, 2.0], "b": 1.5, "c": 2.0, "d": "a"}) + result = df.downcast() + expected = DataFrame({"a": [1, 2], "b": 1.5, "c": 2, "d": "a"}) tm.assert_frame_equal(result, expected) From 1e798415a9f0a6d7eed5258923a64878f01e8143 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 27 Feb 2023 11:47:43 +0100 Subject: [PATCH 10/10] Fix docstring --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bfc980d01b5fa..b1f1e565a3a81 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6990,7 +6990,7 @@ def downcast(self: NDFrameT) -> NDFrameT: 0 1.0 1.5 3.0 1 2.0 2.5 4.0 - >>> result = df.downcast("infer") + >>> result = df.downcast() >>> result foo bar baz 0 1 1.5 3