Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CI: Build wheels for cudf-polars #16156

Merged
merged 19 commits into from
Jul 5, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ jobs:
- docs-build
- wheel-build-cudf
- wheel-tests-cudf
- test-cudf-polars
- wheel-build-cudf-polars
- test-wheel-cudf-polars
lithomas1 marked this conversation as resolved.
Show resolved Hide resolved
- wheel-build-dask-cudf
- wheel-tests-dask-cudf
- devcontainer
Expand Down Expand Up @@ -133,17 +134,26 @@ jobs:
with:
build_type: pull-request
script: ci/test_wheel_cudf.sh
test-cudf-polars:
wheel-build-cudf-polars:
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
script: "ci/build_wheel_cudf_polars.sh"
test-wheel-cudf-polars:
lithomas1 marked this conversation as resolved.
Show resolved Hide resolved
needs: wheel-build-cudf-polars
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
# This always runs, but only fails if this PR touches code in
# pylibcudf or cudf_polars
script: "ci/test_cudf_polars.sh"
script: "ci/test_wheel_cudf_polars.sh"
wheel-build-dask-cudf:
needs: wheel-build-cudf
secrets: inherit
Expand Down
11 changes: 11 additions & 0 deletions ci/build_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

set -euo pipefail

package_dir="python/cudf_polars"

./ci/build_wheel.sh ${package_dir}

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 ${package_dir}/dist
17 changes: 5 additions & 12 deletions ci/test_cudf_polars.sh → ci/test_wheel_cudf_polars.sh
lithomas1 marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,14 @@ else
fi

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist

RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"}
RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/
mkdir -p "${RAPIDS_TESTS_DIR}"

rapids-logger "Install cudf wheel"
# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/cudf*.whl)[test]

rapids-logger "Install polars (allow pre-release versions)"
python -m pip install 'polars>=1.0.0a0'
# Download the cudf built in the previous step
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
python -m pip install --no-deps ./local-cudf-dep/cudf*.whl
lithomas1 marked this conversation as resolved.
Show resolved Hide resolved

rapids-logger "Install cudf_polars"
python -m pip install --no-deps python/cudf_polars
python -m pip install $(echo ./dist/cudf_polars*.whl)

rapids-logger "Run cudf_polars tests"

Expand Down
2 changes: 1 addition & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ dependencies:
common:
- output_types: [conda, requirements, pyproject]
packages:
- polars>=0.20.30
- polars>=1.0
run_dask_cudf:
common:
- output_types: [conda, requirements, pyproject]
Expand Down
6 changes: 3 additions & 3 deletions python/cudf_polars/cudf_polars/dsl/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -978,15 +978,15 @@ def collect_agg(self, *, depth: int) -> AggInfo:
class Agg(Expr):
__slots__ = ("name", "options", "op", "request", "children")
_non_child = ("dtype", "name", "options")
children: tuple[Expr]
children: tuple[Expr, ...]

def __init__(
self, dtype: plc.DataType, name: str, options: Any, value: Expr
self, dtype: plc.DataType, name: str, options: Any, *children: Expr
) -> None:
super().__init__(dtype)
self.name = name
self.options = options
self.children = (value,)
self.children = children
if name not in Agg._SUPPORTED:
raise NotImplementedError(
f"Unsupported aggregation {name=}"
Expand Down
11 changes: 9 additions & 2 deletions python/cudf_polars/cudf_polars/dsl/ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import dataclasses
import itertools
import json
import types
from functools import cache
from typing import TYPE_CHECKING, Any, Callable, ClassVar
Expand Down Expand Up @@ -180,8 +181,10 @@ def __post_init__(self):
class Scan(IR):
"""Input from files."""

typ: Any
typ: str
"""What type of file are we reading? Parquet, CSV, etc..."""
options: tuple[Any, ...]
"""Type specific options, as json-encoded strings."""
paths: list[str]
"""List of paths to read from."""
file_options: Any
Expand Down Expand Up @@ -211,17 +214,21 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
with_columns = options.with_columns
row_index = options.row_index
if self.typ == "csv":
opts, cloud_opts = map(json.loads, self.options)
df = DataFrame.from_cudf(
cudf.concat(
[cudf.read_csv(p, usecols=with_columns) for p in self.paths]
)
)
elif self.typ == "parquet":
opts, cloud_opts = map(json.loads, self.options)
cdf = cudf.read_parquet(self.paths, columns=with_columns)
assert isinstance(cdf, cudf.DataFrame)
df = DataFrame.from_cudf(cdf)
else:
assert_never(self.typ)
raise NotImplementedError(
f"Unhandled scan type: {self.typ}"
) # pragma: no cover; post init trips first
if row_index is not None:
name, offset = row_index
dtype = self.schema[name]
Expand Down
6 changes: 4 additions & 2 deletions python/cudf_polars/cudf_polars/dsl/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,11 @@ def _(
def _(
node: pl_ir.Scan, visitor: NodeTraverser, schema: dict[str, plc.DataType]
) -> ir.IR:
typ, *options = node.scan_type
return ir.Scan(
schema,
node.scan_type,
typ,
tuple(options),
node.paths,
node.file_options,
translate_named_expr(visitor, n=node.predicate)
Expand Down Expand Up @@ -445,7 +447,7 @@ def _(node: pl_expr.Agg, visitor: NodeTraverser, dtype: plc.DataType) -> expr.Ex
dtype,
node.name,
node.options,
translate_expr(visitor, n=node.arguments),
*(translate_expr(visitor, n=n) for n in node.arguments),
)


Expand Down
2 changes: 1 addition & 1 deletion python/cudf_polars/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ license = { text = "Apache 2.0" }
requires-python = ">=3.9"
dependencies = [
"cudf==24.8.*,>=0.0.0a0",
"polars>=0.20.30",
"polars>=1.0",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
classifiers = [
"Intended Audience :: Developers",
Expand Down
Loading