diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py index ec854cb977d..bec9c367ba9 100644 --- a/python/cudf/cudf/core/column_accessor.py +++ b/python/cudf/cudf/core/column_accessor.py @@ -511,7 +511,7 @@ def _select_by_label_list_like(self, key: Any) -> ColumnAccessor: def _select_by_label_grouped(self, key: Any) -> ColumnAccessor: result = self._grouped_data[key] if isinstance(result, cudf.core.column.ColumnBase): - return self.__class__({key: result}) + return self.__class__({key: result}, multiindex=self.multiindex) else: if self.multiindex: result = _to_flat_dict(result) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index fc624c0b8eb..c80c2a7272e 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -173,13 +173,13 @@ def _can_downcast_to_series(self, df, arg): all_numeric = all(is_numeric_dtype(t) for t in dtypes) if all_numeric: return True + if isinstance(arg[1], tuple): + return True if ncols == 1: if type(arg[1]) is slice: return False if isinstance(arg[1], tuple): - # Multiindex indexing with a slice - if any(isinstance(v, slice) for v in arg): - return False + return len(arg[1]) == df._data.nlevels if not (is_list_like(arg[1]) or is_column_like(arg[1])): return True return False @@ -193,7 +193,10 @@ def _downcast_to_series(self, df, arg): nrows, ncols = df.shape # determine the axis along which the Series is taken: if nrows == 1 and ncols == 1: - if is_scalar(arg[0]) and is_scalar(arg[1]): + if is_scalar(arg[0]) and ( + is_scalar(arg[1]) + or (df._data.multiindex and arg[1] in df._column_names) + ): return df[df._column_names[0]].iloc[0] elif not is_scalar(arg[0]): axis = 1 @@ -288,13 +291,20 @@ def _getitem_tuple_arg(self, arg): ) else: tmp_col_name = str(uuid4()) + cantor_name = "_" + "_".join( + map(str, columns_df._data.names) + ) + if columns_df._data.multiindex: + # column names must be appropriate length tuples + extra = tuple( + "" for _ in range(columns_df._data.nlevels - 1) + ) + tmp_col_name = (tmp_col_name, *extra) + cantor_name = (cantor_name, *extra) other_df = DataFrame( {tmp_col_name: column.arange(len(tmp_arg[0]))}, index=as_index(tmp_arg[0]), ) - cantor_name = "_" + "_".join( - map(str, columns_df._data.names) - ) columns_df[cantor_name] = column.arange(len(columns_df)) df = other_df.join(columns_df, how="inner") # as join is not assigning any names to index, diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py index d747ed13e27..2e169a2b0b1 100644 --- a/python/cudf/cudf/tests/test_indexing.py +++ b/python/cudf/cudf/tests/test_indexing.py @@ -2069,6 +2069,31 @@ def test_loc_index_notinindex_slice( assert_eq(expect, actual) +@pytest.mark.parametrize( + "arg", + [ + (2, ("one", "second")), + (slice(None, None, None), ("two", "first")), + (1, ("one", "first")), + (slice(None, None, None), ("two", "second")), + (slice(None, None, None), ("two", "first", "three")), + (3, ("two", "first", "three")), + (slice(None, None, None), ("two",)), + (0, ("two",)), + ], +) +def test_loc_dataframe_column_multiindex(arg): + gdf = cudf.DataFrame( + [list("abcd"), list("efgh"), list("ijkl"), list("mnop")], + columns=cudf.MultiIndex.from_product( + [["one", "two"], ["first", "second"], ["three"]] + ), + ) + pdf = gdf.to_pandas() + + assert_eq(gdf.loc[arg], pdf.loc[arg]) + + @pytest.mark.parametrize( "arg", [slice(2, 4), slice(2, 5), slice(2.3, 5), slice(4.6, 6)] )