From d7779db41f815cf9248fd78375c1ee4c578ec563 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 12 Jun 2024 15:49:21 +0000 Subject: [PATCH] Add coverage for both expression and dataframe filter Note that expression filter with literals does not work because broadcasting is not implemented. It is also the case that the result could be computed without broadcasting in the case of scalars with some data introspection, but we do not do that here. --- .../tests/expressions/test_filter.py | 30 ++++++++++++++----- python/cudf_polars/tests/test_filter.py | 26 ++++++++++++++++ 2 files changed, 49 insertions(+), 7 deletions(-) create mode 100644 python/cudf_polars/tests/test_filter.py diff --git a/python/cudf_polars/tests/expressions/test_filter.py b/python/cudf_polars/tests/expressions/test_filter.py index 783403d764c..1a8e994e3aa 100644 --- a/python/cudf_polars/tests/expressions/test_filter.py +++ b/python/cudf_polars/tests/expressions/test_filter.py @@ -2,19 +2,35 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations +import pytest + import polars as pl from cudf_polars.testing.asserts import assert_gpu_result_equal -def test_filter(): - ldf = pl.DataFrame( +@pytest.mark.parametrize( + "expr", + [ + pytest.param( + pl.lit(value=False), + marks=pytest.mark.xfail(reason="Expression filter does not handle scalars"), + ), + pl.col("c"), + pl.col("b") > 2, + ], +) +@pytest.mark.parametrize("predicate_pushdown", [False, True]) +def test_filter_expression(expr, predicate_pushdown): + ldf = pl.LazyFrame( { "a": [1, 2, 3, 4, 5, 6, 7], - "b": [1, 1, 1, 1, 1, 1, 1], + "b": [0, 3, 1, 5, 6, 1, 0], + "c": [None, True, False, False, True, True, False], } - ).lazy() + ) - # group-by is just to avoid the filter being pushed into the scan. - query = ldf.group_by(pl.col("a")).agg(pl.col("b").sum()).filter(pl.col("b") < 1) - assert_gpu_result_equal(query) + query = ldf.select(pl.col("a").filter(expr)) + assert_gpu_result_equal( + query, collect_kwargs={"predicate_pushdown": predicate_pushdown} + ) diff --git a/python/cudf_polars/tests/test_filter.py b/python/cudf_polars/tests/test_filter.py new file mode 100644 index 00000000000..f39b348144b --- /dev/null +++ b/python/cudf_polars/tests/test_filter.py @@ -0,0 +1,26 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + +import pytest + +import polars as pl + +from cudf_polars.testing.asserts import assert_gpu_result_equal + + +@pytest.mark.parametrize("expr", [pl.col("c"), pl.col("b") < 1, pl.lit(value=True)]) +@pytest.mark.parametrize("predicate_pushdown", [False, True]) +def test_filter(expr, predicate_pushdown): + ldf = pl.DataFrame( + { + "a": [1, 2, 3, 4, 5, 6, 7], + "b": [1, 1, 1, 1, 1, 1, 1], + "c": [True, False, False, True, True, True, None], + } + ).lazy() + + query = ldf.filter(expr) + assert_gpu_result_equal( + query, collect_kwargs={"predicate_pushdown": predicate_pushdown} + )