From 5c93384966328b09fd243983458da9886c982e4d Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 12 Jun 2024 16:16:53 +0000 Subject: [PATCH] Add coverage selecting len from a dataframe (number of rows) Fix bug (and report a polars issue) for the case that the dataframe is empty, and therefore we cannot ask a column for its length. --- .../cudf_polars/containers/dataframe.py | 2 +- .../cudf_polars/tests/expressions/test_len.py | 26 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 python/cudf_polars/tests/expressions/test_len.py diff --git a/python/cudf_polars/cudf_polars/containers/dataframe.py b/python/cudf_polars/cudf_polars/containers/dataframe.py index d1f7a9ed2cf..ec8d00c3123 100644 --- a/python/cudf_polars/cudf_polars/containers/dataframe.py +++ b/python/cudf_polars/cudf_polars/containers/dataframe.py @@ -70,7 +70,7 @@ def num_columns(self) -> int: @cached_property def num_rows(self) -> int: """Number of rows.""" - return self.table.num_rows() + return 0 if len(self.columns) == 0 else self.table.num_rows() @classmethod def from_cudf(cls, df: cudf.DataFrame) -> Self: diff --git a/python/cudf_polars/tests/expressions/test_len.py b/python/cudf_polars/tests/expressions/test_len.py new file mode 100644 index 00000000000..03b30928184 --- /dev/null +++ b/python/cudf_polars/tests/expressions/test_len.py @@ -0,0 +1,26 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + +import pytest + +import polars as pl + +from cudf_polars.testing.asserts import assert_gpu_result_equal + + +@pytest.mark.parametrize("dtype", [pl.UInt32, pl.Int32, None]) +@pytest.mark.parametrize("empty", [False, True]) +def test_len(dtype, empty): + if empty: + df = pl.LazyFrame({}) + else: + df = pl.LazyFrame({"a": [1, 2, 3]}) + + if dtype is None: + q = df.select(pl.len()) + else: + q = df.select(pl.len().cast(dtype)) + + # Workaround for https://github.com/pola-rs/polars/issues/16904 + assert_gpu_result_equal(q, collect_kwargs={"projection_pushdown": False})