diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py index 871134665af..99e5a59de2f 100644 --- a/python/cudf_polars/cudf_polars/dsl/expr.py +++ b/python/cudf_polars/cudf_polars/dsl/expr.py @@ -27,11 +27,12 @@ import cudf._lib.pylibcudf as plc from cudf_polars.containers import Column, NamedColumn -from cudf_polars.utils import sorting +from cudf_polars.utils import dtypes, sorting if TYPE_CHECKING: from collections.abc import Mapping, Sequence + import polars.polars as plrs import polars.type_aliases as pl_types from cudf_polars.containers import DataFrame @@ -368,6 +369,29 @@ def do_evaluate( return Column(plc.Column.from_scalar(plc.interop.from_arrow(self.value), 1)) +class LiteralColumn(Expr): + __slots__ = ("value",) + _non_child = ("dtype", "value") + value: pa.Array[Any, Any] + children: tuple[()] + + def __init__(self, dtype: plc.DataType, value: plrs.PySeries) -> None: + super().__init__(dtype) + data = value.to_arrow() + self.value = data.cast(dtypes.downcast_arrow_lists(data.type)) + + def do_evaluate( + self, + df: DataFrame, + *, + context: ExecutionContext = ExecutionContext.FRAME, + mapping: Mapping[Expr, Column] | None = None, + ) -> Column: + """Evaluate this expression given a dataframe for context.""" + # datatype of pyarrow array is correct by construction. + return Column(plc.interop.from_arrow(self.value)) + + class Col(Expr): __slots__ = ("name",) _non_child = ("dtype", "name") diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py index 5d289885f47..11c6f8301f7 100644 --- a/python/cudf_polars/cudf_polars/dsl/translate.py +++ b/python/cudf_polars/cudf_polars/dsl/translate.py @@ -12,6 +12,7 @@ import pyarrow as pa from typing_extensions import assert_never +import polars.polars as plrs from polars.polars import _expr_nodes as pl_expr, _ir_nodes as pl_ir import cudf._lib.pylibcudf as plc @@ -373,6 +374,8 @@ def _(node: pl_expr.Window, visitor: NodeTraverser, dtype: plc.DataType) -> expr @_translate_expr.register def _(node: pl_expr.Literal, visitor: NodeTraverser, dtype: plc.DataType) -> expr.Expr: + if isinstance(node.value, plrs.PySeries): + return expr.LiteralColumn(dtype, node.value) value = pa.scalar(node.value, type=plc.interop.to_arrow(dtype)) return expr.Literal(dtype, value) diff --git a/python/cudf_polars/tests/expressions/test_series_literal.py b/python/cudf_polars/tests/expressions/test_series_literal.py new file mode 100644 index 00000000000..c7613cf1886 --- /dev/null +++ b/python/cudf_polars/tests/expressions/test_series_literal.py @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + +import polars as pl + +from cudf_polars.testing.asserts import assert_gpu_result_equal + + +def test_select_series_literal(): + df = pl.LazyFrame({}) + + q = df.select( + a=pl.Series([1, 2, 3]), + b=pl.Series(["a", "b", "c"], dtype=pl.String()), + c=pl.Series([[1, 2], [3], None], dtype=pl.List(pl.UInt16())), + ) + + assert_gpu_result_equal(q)