From 691c0675b3b4de3ef416f491d680befac78bad83 Mon Sep 17 00:00:00 2001 From: jschendel Date: Thu, 1 Mar 2018 00:54:58 -0700 Subject: [PATCH] BUG: Preserve column metadata with DataFrame.astype --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/generic.py | 16 ++++++++++------ pandas/tests/frame/test_dtypes.py | 9 +++++++++ 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 90ce6b47728fb4..fb19fd81fe7c79 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -954,6 +954,7 @@ Reshaping - Bug in :func:`qcut` where datetime and timedelta data with ``NaT`` present raised a ``ValueError`` (:issue:`19768`) - Bug in :func:`DataFrame.iterrows`, which would infers strings not compliant to `ISO8601 `_ to datetimes (:issue:`19671`) - Bug in :class:`Series` constructor with ``Categorical`` where a ```ValueError`` is not raised when an index of different length is given (:issue:`19342`) +- Bug in :meth:`DataFrame.astype` where column metadata is lost when converting to categorical or a dictionary of dtypes (:issue:`19920`) Other ^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c4eb7dd7e7a7e3..43f7b673c7cbd9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4436,17 +4436,21 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs): results.append(col.astype(dtype[col_name], copy=copy)) else: results.append(results.append(col.copy() if copy else col)) - return pd.concat(results, axis=1, copy=False) elif is_categorical_dtype(dtype) and self.ndim > 1: # GH 18099: columnwise conversion to categorical results = (self[col].astype(dtype, copy=copy) for col in self) - return pd.concat(results, axis=1, copy=False) - # else, only a single dtype is given - new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors, - **kwargs) - return self._constructor(new_data).__finalize__(self) + else: + # else, only a single dtype is given + new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors, + **kwargs) + return self._constructor(new_data).__finalize__(self) + + # GH 19920: retain column metadata after concat + result = pd.concat(results, axis=1, copy=False) + result.columns = self.columns + return result def copy(self, deep=True): """ diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 430d43019afc2d..90daa9aa882c8b 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -649,6 +649,15 @@ def test_astype_categoricaldtype_class_raises(self, cls): with tm.assert_raises_regex(TypeError, xpr): df['A'].astype(cls) + @pytest.mark.parametrize('dtype', [ + {100: 'float64', 200: 'uint64'}, 'category', 'float64']) + def test_astype_column_metadata(self, dtype): + # GH 19920 + columns = pd.UInt64Index([100, 200, 300], name='foo') + df = DataFrame(np.arange(15).reshape(5, 3), columns=columns) + df = df.astype(dtype) + tm.assert_index_equal(df.columns, columns) + @pytest.mark.parametrize("dtype", ["M8", "m8"]) @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D']) def test_astype_from_datetimelike_to_objectt(self, dtype, unit):