Skip to content

Commit

Permalink
Setup pylibcudf package (#16299)
Browse files Browse the repository at this point in the history
Migrates cudf._lib.pylibcudf to a new pylibcudf package

Authors:
  - Thomas Li (https://github.com/lithomas1)
  - Matthew Murray (https://github.com/Matt711)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - James Lamb (https://github.com/jameslamb)

URL: #16299
  • Loading branch information
lithomas1 authored Aug 16, 2024
1 parent 30011c5 commit bc8ca9b
Show file tree
Hide file tree
Showing 475 changed files with 1,916 additions and 1,522 deletions.
2 changes: 1 addition & 1 deletion .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ cudf.polars:
- 'python/cudf_polars/**'

pylibcudf:
- 'python/cudf/cudf/_lib/pylibcudf/**'
- 'python/cudf/pylibcudf/**'

libcudf:
- 'cpp/**'
Expand Down
12 changes: 10 additions & 2 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ jobs:
- static-configure
- conda-notebook-tests
- docs-build
- wheel-build-pylibcudf
- wheel-build-cudf
- wheel-tests-cudf
- wheel-build-cudf-polars
Expand Down Expand Up @@ -120,10 +121,17 @@ jobs:
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/build_docs.sh"
wheel-build-cudf:
wheel-build-pylibcudf:
needs: checks
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
script: "ci/build_wheel_pylibcudf.sh"
wheel-build-cudf:
needs: wheel-build-pylibcudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
script: "ci/build_wheel_cudf.sh"
Expand All @@ -135,7 +143,7 @@ jobs:
build_type: pull-request
script: ci/test_wheel_cudf.sh
wheel-build-cudf-polars:
needs: wheel-build-cudf
needs: wheel-build-pylibcudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
Expand Down
15 changes: 12 additions & 3 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@ ARGS=$*
# script, and that this script resides in the repo dir!
REPODIR=$(cd $(dirname $0); pwd)

VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n --pydevelop -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats --disable_large_strings"
HELP="$0 [clean] [libcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
VALIDARGS="clean libcudf pylibcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n --pydevelop -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats --disable_large_strings"
HELP="$0 [clean] [libcudf] [pylibcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
clean - remove all existing build artifacts and configuration (start
over)
libcudf - build the cudf C++ code only
pylibcudf - build the pylibcudf Python package
cudf - build the cudf Python package
cudfjar - build cudf JAR with static libcudf using devtoolset toolchain
dask_cudf - build the dask_cudf Python package
Expand Down Expand Up @@ -268,7 +269,7 @@ fi
################################################################################
# Configure, build, and install libcudf

if buildAll || hasArg libcudf || hasArg cudf || hasArg cudfjar; then
if buildAll || hasArg libcudf || hasArg pylibcudf || hasArg cudf || hasArg cudfjar; then
if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then
CUDF_CMAKE_CUDA_ARCHITECTURES="${CUDF_CMAKE_CUDA_ARCHITECTURES:-NATIVE}"
if [[ "$CUDF_CMAKE_CUDA_ARCHITECTURES" == "NATIVE" ]]; then
Expand Down Expand Up @@ -340,6 +341,14 @@ if buildAll || hasArg libcudf; then
fi
fi

# Build and install the pylibcudf Python package
if buildAll || hasArg pylibcudf; then

cd ${REPODIR}/python/pylibcudf
SKBUILD_CMAKE_ARGS="-DCMAKE_PREFIX_PATH=${INSTALL_PREFIX};-DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR};-DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES};${EXTRA_CMAKE_ARGS}" \
python ${PYTHON_ARGS_FOR_INSTALL} .
fi

# Build and install the cudf Python package
if buildAll || hasArg cudf; then

Expand Down
2 changes: 1 addition & 1 deletion ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
libcudf cudf dask-cudf
libcudf pylibcudf cudf dask-cudf

export RAPIDS_DOCS_DIR="$(mktemp -d)"

Expand Down
7 changes: 7 additions & 0 deletions ci/build_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,16 @@ CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
# TODO: Remove `--no-test` flag once importing on a CPU
# node works correctly
# With boa installed conda build forwards to the boa builder

RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
conda/recipes/pylibcudf

RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
conda/recipes/cudf

RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
Expand Down
8 changes: 6 additions & 2 deletions ci/build_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@ package_dir="python/cudf"

export SKBUILD_CMAKE_ARGS="-DUSE_LIBARROW_FROM_PYARROW=ON"

# Download the pylibcudf built in the previous step
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 /tmp/pylibcudf_dist

echo "pylibcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/pylibcudf_dist/pylibcudf_*.whl)" > /tmp/constraints.txt
export PIP_CONSTRAINT="/tmp/constraints.txt"
./ci/build_wheel.sh ${package_dir}

python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/*


RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist
16 changes: 16 additions & 0 deletions ci/build_wheel_pylibcudf.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

set -euo pipefail

package_dir="python/pylibcudf"

export SKBUILD_CMAKE_ARGS="-DUSE_LIBARROW_FROM_PYARROW=ON"

./ci/build_wheel.sh ${package_dir}

python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/*


RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist
2 changes: 2 additions & 0 deletions ci/cudf_pandas_scripts/pandas-tests/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ rapids-logger "Running Pandas tests using $PANDAS_TESTS_BRANCH branch and rapids
rapids-logger "PR number: ${RAPIDS_REF_NAME:-"unknown"}"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-pylibcudf-dep
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
python -m pip install $(ls ./local-pylibcudf-dep/pylibcudf*.whl)
python -m pip install $(ls ./local-cudf-dep/cudf*.whl)[test,pandas-tests]

RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"}
Expand Down
2 changes: 2 additions & 0 deletions ci/cudf_pandas_scripts/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ if [ "$no_cudf" = true ]; then
echo "Skipping cudf install"
else
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-pylibcudf-dep
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
python -m pip install $(ls ./local-pylibcudf-dep/pylibcudf*.whl)
python -m pip install $(ls ./local-cudf-dep/cudf*.whl)[test,cudf-pandas-tests]
fi

Expand Down
2 changes: 1 addition & 1 deletion ci/test_python_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ trap "EXITCODE=1" ERR
set +e

rapids-logger "pytest pylibcudf"
pushd python/cudf/cudf/pylibcudf_tests
pushd python/pylibcudf/pylibcudf/tests
python -m pytest \
--cache-clear \
--dist=worksteal \
Expand Down
10 changes: 7 additions & 3 deletions ci/test_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,23 @@

set -eou pipefail

# Download the pylibcudf built in the previous step
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-pylibcudf-dep
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/cudf*.whl)[test]
# Install both pylibcudf and cudf
python -m pip install \
"$(echo ./local-pylibcudf-dep/pylibcudf*.whl)[test]" \
"$(echo ./dist/cudf*.whl)[test]"

RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"}
RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/
mkdir -p "${RAPIDS_TESTS_DIR}"


rapids-logger "pytest pylibcudf"
pushd python/cudf/cudf/pylibcudf_tests
pushd python/pylibcudf/pylibcudf/tests
python -m pytest \
--cache-clear \
--dist=worksteal \
Expand Down
6 changes: 3 additions & 3 deletions ci/test_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ set -eou pipefail
# files in cudf_polars/pylibcudf", rather than "are there changes
# between upstream and this branch which touch cudf_polars/pylibcudf"
# TODO: is the target branch exposed anywhere in an environment variable?
if [ -n "$(git diff --name-only origin/branch-24.10...HEAD -- python/cudf_polars/ python/cudf/cudf/_lib/pylibcudf/)" ];
if [ -n "$(git diff --name-only origin/branch-24.10...HEAD -- python/cudf_polars/ python/pylibcudf/)" ];
then
HAS_CHANGES=1
else
Expand All @@ -21,8 +21,8 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist

# Download the cudf built in the previous step
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
python -m pip install ./local-cudf-dep/cudf*.whl
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-pylibcudf-dep
python -m pip install ./local-pylibcudf-dep/pylibcudf*.whl

rapids-logger "Install cudf_polars"
python -m pip install $(echo ./dist/cudf_polars*.whl)[test]
Expand Down
5 changes: 4 additions & 1 deletion ci/test_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist

# Download the cudf built in the previous step
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-pylibcudf-dep
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
python -m pip install ./local-cudf-dep/cudf*.whl
python -m pip install \
"$(echo ./local-pylibcudf-dep/pylibcudf*.whl)" \
"$(echo ./local-cudf-dep/cudf*.whl)"

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/dask_cudf*.whl)[test]
Expand Down
2 changes: 2 additions & 0 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,14 @@ dependencies:
- ninja
- notebook
- numba>=0.57
- numpy
- numpy>=1.23,<2.0a0
- numpydoc
- nvcc_linux-64=11.8
- nvcomp==3.0.6
- nvtx>=0.2.1
- packaging
- pandas
- pandas>=2.0,<2.2.3dev0
- pandoc
- pip
Expand Down
2 changes: 2 additions & 0 deletions conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,13 @@ dependencies:
- ninja
- notebook
- numba>=0.57
- numpy
- numpy>=1.23,<2.0a0
- numpydoc
- nvcomp==3.0.6
- nvtx>=0.2.1
- packaging
- pandas
- pandas>=2.0,<2.2.3dev0
- pandoc
- pip
Expand Down
2 changes: 2 additions & 0 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ requirements:
- numpy 1.23
- pyarrow ==16.1.0.*
- libcudf ={{ version }}
- pylibcudf ={{ version }}
- rmm ={{ minor_version }}
{% if cuda_major == "11" %}
- cudatoolkit
Expand All @@ -87,6 +88,7 @@ requirements:
- numpy >=1.23,<2.0a0
- {{ pin_compatible('pyarrow', max_pin='x.x') }}
- libcudf ={{ version }}
- pylibcudf ={{ version }}
- {{ pin_compatible('rmm', max_pin='x.x') }}
- fsspec >=0.6.0
{% if cuda_major == "11" %}
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/cudf_kafka/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ requirements:
- python
- cython >=3.0.3
- cuda-version ={{ cuda_version }}
- cudf ={{ version }}
- pylibcudf ={{ version }}
- libcudf_kafka ={{ version }}
- rapids-build-backend >=0.3.0,<0.4.0.dev0
- scikit-build-core >=0.10.0
Expand All @@ -69,7 +69,7 @@ requirements:
- python
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
- libcudf_kafka ={{ version }}
- cudf ={{ version }}
- pylibcudf ={{ version }}
{% if cuda_major != "11" %}
- cuda-cudart
{% endif %}
Expand Down
4 changes: 4 additions & 0 deletions conda/recipes/pylibcudf/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (c) 2018-2024, NVIDIA CORPORATION.

# This assumes the script is executed from the root of the repo directory
./build.sh pylibcudf
20 changes: 20 additions & 0 deletions conda/recipes/pylibcudf/conda_build_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
c_compiler_version:
- 11

cxx_compiler_version:
- 11

c_stdlib:
- sysroot

c_stdlib_version:
- "2.17"

cmake_version:
- ">=3.26.4,!=3.30.0"

cuda_compiler:
- cuda-nvcc

cuda11_compiler:
- nvcc
Loading

0 comments on commit bc8ca9b

Please sign in to comment.