From c3cad1d7a0aa799a64ec767edb64686f99be78e6 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Mon, 4 Mar 2024 16:22:01 -0600 Subject: [PATCH 1/2] Fix `ListColumn.to_pandas()` to retain `list` type (#15155) Fixes: #14568 This PR fixes `ListColumn.to_pandas()` by calling `ArrowArray.to_pylist()` method to retain `list` type in pandas series. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Matthew Roeschke (https://github.com/mroeschke) - Richard (Rick) Zamora (https://github.com/rjzamora) URL: https://github.com/rapidsai/cudf/pull/15155 --- python/cudf/cudf/core/column/lists.py | 18 ++++++++++++++++++ python/cudf/cudf/tests/test_list.py | 4 +++- .../dask_cudf/dask_cudf/tests/test_groupby.py | 6 +----- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index b2205af34e8..d1bf0b74d3c 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -6,6 +6,7 @@ from typing import List, Optional, Sequence, Tuple, Union import numpy as np +import pandas as pd import pyarrow as pa from typing_extensions import Self @@ -288,6 +289,23 @@ def _transform_leaves(self, func, *args, **kwargs) -> Self: ) return lc + def to_pandas( + self, + *, + index: Optional[pd.Index] = None, + nullable: bool = False, + ) -> pd.Series: + # Can't rely on Column.to_pandas implementation for lists. + # Need to perform `to_pylist` to preserve list types. + if nullable: + raise NotImplementedError(f"{nullable=} is not implemented.") + + pd_series = pd.Series(self.to_arrow().to_pylist(), dtype="object") + + if index is not None: + pd_series.index = index + return pd_series + class ListMethods(ColumnMethods): """ diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index 7ae7ae34b97..f04cb8a91a4 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import functools import operator @@ -41,6 +41,8 @@ def test_create_list_series(data): expect = pd.Series(data) got = cudf.Series(data) assert_eq(expect, got) + assert isinstance(got[0], type(expect[0])) + assert isinstance(got.to_pandas()[0], type(expect[0])) @pytest.mark.parametrize( diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py index c8cc6e65fa5..30251b88dea 100644 --- a/python/dask_cudf/dask_cudf/tests/test_groupby.py +++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py @@ -702,13 +702,9 @@ def test_is_supported(arg, supported): def test_groupby_unique_lists(): df = pd.DataFrame({"a": [0, 0, 0, 1, 1, 1], "b": [10, 10, 10, 7, 8, 9]}) - ddf = dd.from_pandas(df, 2) gdf = cudf.from_pandas(df) gddf = dask_cudf.from_cudf(gdf, 2) - dd.assert_eq( - ddf.groupby("a").b.unique().compute(), - gddf.groupby("a").b.unique().compute(), - ) + dd.assert_eq( gdf.groupby("a").b.unique(), gddf.groupby("a").b.unique().compute(), From 4f1315587df1d64c384f018d90d4ef4fe69a96be Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Mon, 4 Mar 2024 14:38:53 -0800 Subject: [PATCH 2/2] Update labeler and codeowner configs for CMake files (#15208) When working on #15206, I noticed the `rapids_config.cmake` file was not properly labeled. Based on offline discussions, we also noticed that the file's codeowner was misconfigured as well. This PR updates both github `labeler` and `CODEOWNER` files to properly handle files with `.cmake` extension. Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - Bradley Dice (https://github.com/bdice) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cudf/pull/15208 --- .github/CODEOWNERS | 1 + .github/labeler.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 9578d32d13d..31cfeaf4ca3 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -11,6 +11,7 @@ python/dask_cudf/ @rapidsai/cudf-dask-codeowners cpp/CMakeLists.txt @rapidsai/cudf-cmake-codeowners cpp/libcudf_kafka/CMakeLists.txt @rapidsai/cudf-cmake-codeowners **/cmake/ @rapidsai/cudf-cmake-codeowners +*.cmake @rapidsai/cudf-cmake-codeowners #java code owners java/ @rapidsai/cudf-java-codeowners diff --git a/.github/labeler.yml b/.github/labeler.yml index b0b0db9684a..d14344384d1 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -10,6 +10,7 @@ libcudf: CMake: - '**/CMakeLists.txt' - '**/cmake/**' + - '**/*.cmake' cuDF (Java): - 'java/**'