Skip to content

Commit

Permalink
BUG: Preserve column metadata with DataFrame.astype
Browse files Browse the repository at this point in the history
  • Loading branch information
jschendel committed Mar 1, 2018
1 parent 52559f5 commit 691c067
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 6 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -954,6 +954,7 @@ Reshaping
- Bug in :func:`qcut` where datetime and timedelta data with ``NaT`` present raised a ``ValueError`` (:issue:`19768`)
- Bug in :func:`DataFrame.iterrows`, which would infers strings not compliant to `ISO8601 <https://en.wikipedia.org/wiki/ISO_8601>`_ to datetimes (:issue:`19671`)
- Bug in :class:`Series` constructor with ``Categorical`` where a ```ValueError`` is not raised when an index of different length is given (:issue:`19342`)
- Bug in :meth:`DataFrame.astype` where column metadata is lost when converting to categorical or a dictionary of dtypes (:issue:`19920`)

Other
^^^^^
Expand Down
16 changes: 10 additions & 6 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4436,17 +4436,21 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs):
results.append(col.astype(dtype[col_name], copy=copy))
else:
results.append(results.append(col.copy() if copy else col))
return pd.concat(results, axis=1, copy=False)

elif is_categorical_dtype(dtype) and self.ndim > 1:
# GH 18099: columnwise conversion to categorical
results = (self[col].astype(dtype, copy=copy) for col in self)
return pd.concat(results, axis=1, copy=False)

# else, only a single dtype is given
new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
**kwargs)
return self._constructor(new_data).__finalize__(self)
else:
# else, only a single dtype is given
new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
**kwargs)
return self._constructor(new_data).__finalize__(self)

# GH 19920: retain column metadata after concat
result = pd.concat(results, axis=1, copy=False)
result.columns = self.columns
return result

def copy(self, deep=True):
"""
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/frame/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,15 @@ def test_astype_categoricaldtype_class_raises(self, cls):
with tm.assert_raises_regex(TypeError, xpr):
df['A'].astype(cls)

@pytest.mark.parametrize('dtype', [
{100: 'float64', 200: 'uint64'}, 'category', 'float64'])
def test_astype_column_metadata(self, dtype):
# GH 19920
columns = pd.UInt64Index([100, 200, 300], name='foo')
df = DataFrame(np.arange(15).reshape(5, 3), columns=columns)
df = df.astype(dtype)
tm.assert_index_equal(df.columns, columns)

@pytest.mark.parametrize("dtype", ["M8", "m8"])
@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
def test_astype_from_datetimelike_to_objectt(self, dtype, unit):
Expand Down

0 comments on commit 691c067

Please sign in to comment.