From 4ce99af7438d38e91ee2540336a278ade2fffd79 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 21 Feb 2024 06:53:38 -1000 Subject: [PATCH] Fix reductions when DataFrame has MulitIndex columns (#15097) closes #15085 Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/15097 --- python/cudf/cudf/core/dataframe.py | 11 +++++++---- python/cudf/cudf/tests/test_reductions.py | 10 ++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 1a6376d1c00..89abd7be0ba 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -3955,7 +3955,6 @@ def transpose(self): Not supporting *copy* because default and only behavior is copy=True """ - index = self._data.to_pandas_index() columns = self.index.copy(deep=False) if self._num_columns == 0 or self._num_rows == 0: @@ -6202,9 +6201,13 @@ def _reduce( "Columns must all have the same dtype to " f"perform {op=} with {axis=}" ) - return Series._from_data( - {None: as_column(result)}, as_index(source._data.names) - ) + if source._data.multiindex: + idx = MultiIndex.from_tuples( + source._data.names, names=source._data.level_names + ) + else: + idx = as_index(source._data.names) + return Series._from_data({None: as_column(result)}, idx) elif axis == 1: return source._apply_cupy_method_axis_1(op, **kwargs) else: diff --git a/python/cudf/cudf/tests/test_reductions.py b/python/cudf/cudf/tests/test_reductions.py index 1a38cb3dd22..c6ffa1d2bc7 100644 --- a/python/cudf/cudf/tests/test_reductions.py +++ b/python/cudf/cudf/tests/test_reductions.py @@ -366,3 +366,13 @@ def test_reductions_axis_none_warning(op): ): expected = getattr(pdf, op)(axis=None) assert_eq(expected, actual, check_dtype=False) + + +def test_reduction_column_multiindex(): + idx = cudf.MultiIndex.from_tuples( + [("a", 1), ("a", 2)], names=["foo", "bar"] + ) + df = cudf.DataFrame(np.array([[1, 3], [2, 4]]), columns=idx) + result = df.mean() + expected = df.to_pandas().mean() + assert_eq(result, expected)