From 79b2610b192a87bfcedb063ea7b45ee1e297dedf Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Mon, 23 Jan 2023 13:29:01 -0500 Subject: [PATCH] BUG/PERF: Series(index=MultiIndex).rename losing EA dtypes (#50930) BUG/PERF: Series(index=MultiIndex).rename --- doc/source/whatsnew/v2.0.0.rst | 2 ++ pandas/core/indexes/base.py | 16 +++++++--------- pandas/tests/series/methods/test_rename.py | 19 +++++++++++++++++++ 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 90a00cd01d665..909e2454b7cb8 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -900,6 +900,7 @@ Performance improvements - Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47405`, :issue:`47656`, :issue:`48502`) - Memory improvement in :meth:`RangeIndex.sort_values` (:issue:`48801`) - Performance improvement in :meth:`Series.to_numpy` if ``copy=True`` by avoiding copying twice (:issue:`24345`) +- Performance improvement in :meth:`Series.rename` with :class:`MultiIndex` (:issue:`21055`) - Performance improvement in :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` when ``by`` is a categorical type and ``sort=False`` (:issue:`48976`) - Performance improvement in :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` when ``by`` is a categorical type and ``observed=False`` (:issue:`49596`) - Performance improvement in :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default). Now the index will be a :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`49745`) @@ -1020,6 +1021,7 @@ Indexing - Bug in :meth:`DataFrame.iloc` raising ``IndexError`` when indexer is a :class:`Series` with numeric extension array dtype (:issue:`49521`) - Bug in :func:`~DataFrame.describe` when formatting percentiles in the resulting index showed more decimals than needed (:issue:`46362`) - Bug in :meth:`DataFrame.compare` does not recognize differences when comparing ``NA`` with value in nullable dtypes (:issue:`48939`) +- Bug in :meth:`Series.rename` with :class:`MultiIndex` losing extension array dtypes (:issue:`21055`) - Bug in :meth:`DataFrame.isetitem` coercing extension array dtypes in :class:`DataFrame` to object (:issue:`49922`) - Bug in :class:`BusinessHour` would cause creation of :class:`DatetimeIndex` to fail when no opening hour was included in the index (:issue:`49835`) - diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b05b7cae8042e..2948bb81d0b6a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6042,15 +6042,13 @@ def _transform_index(self, func, *, level=None) -> Index: Only apply function to one level of the MultiIndex if level is specified. """ if isinstance(self, ABCMultiIndex): - if level is not None: - # Caller is responsible for ensuring level is positional. - items = [ - tuple(func(y) if i == level else y for i, y in enumerate(x)) - for x in self - ] - else: - items = [tuple(func(y) for y in x) for x in self] - return type(self).from_tuples(items, names=self.names) + values = [ + self.get_level_values(i).map(func) + if i == level or level is None + else self.get_level_values(i) + for i in range(self.nlevels) + ] + return type(self).from_arrays(values) else: items = [func(x) for x in self] return Index(items, name=self.name, tupleize_cols=False) diff --git a/pandas/tests/series/methods/test_rename.py b/pandas/tests/series/methods/test_rename.py index d0392929cb082..93c4fbb7f3c46 100644 --- a/pandas/tests/series/methods/test_rename.py +++ b/pandas/tests/series/methods/test_rename.py @@ -136,6 +136,25 @@ def test_rename_series_with_multiindex(self): tm.assert_series_equal(result, series_expected) + def test_rename_series_with_multiindex_keeps_ea_dtypes(self): + # GH21055 + arrays = [ + Index([1, 2, 3], dtype="Int64").astype("category"), + Index([1, 2, 3], dtype="Int64"), + ] + mi = MultiIndex.from_arrays(arrays, names=["A", "B"]) + ser = Series(1, index=mi) + result = ser.rename({1: 4}, level=1) + + arrays_expected = [ + Index([1, 2, 3], dtype="Int64").astype("category"), + Index([4, 2, 3], dtype="Int64"), + ] + mi_expected = MultiIndex.from_arrays(arrays_expected, names=["A", "B"]) + expected = Series(1, index=mi_expected) + + tm.assert_series_equal(result, expected) + def test_rename_error_arg(self): # GH 46889 ser = Series(["foo", "bar"])