Skip to content

Commit

Permalink
Merge branch 'branch-24.02' into 14496
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar authored Nov 28, 2023
2 parents 53c6cbb + 5e58e71 commit 050a3a7
Show file tree
Hide file tree
Showing 190 changed files with 4,202 additions and 1,258 deletions.
35 changes: 35 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# syntax=docker/dockerfile:1.5

ARG BASE
ARG PYTHON_PACKAGE_MANAGER=conda

FROM ${BASE} as pip-base

RUN apt update -y \
&& DEBIAN_FRONTEND=noninteractive apt install -y \
librdkafka-dev \
&& rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*;

ENV DEFAULT_VIRTUAL_ENV=rapids

FROM ${BASE} as conda-base

ENV DEFAULT_CONDA_ENV=rapids

FROM ${PYTHON_PACKAGE_MANAGER}-base

ARG CUDA
ENV CUDAARCHS="RAPIDS"
ENV CUDA_VERSION="${CUDA_VERSION:-${CUDA}}"

ARG PYTHON_PACKAGE_MANAGER
ENV PYTHON_PACKAGE_MANAGER="${PYTHON_PACKAGE_MANAGER}"

ENV PYTHONSAFEPATH="1"
ENV PYTHONUNBUFFERED="1"
ENV PYTHONDONTWRITEBYTECODE="1"

ENV SCCACHE_REGION="us-east-2"
ENV SCCACHE_BUCKET="rapids-sccache-devs"
ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai"
ENV HISTFILE="/home/coder/.cache/._bash_history"
35 changes: 35 additions & 0 deletions .devcontainer/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# cuDF Development Containers

This directory contains [devcontainer configurations](https://containers.dev/implementors/json_reference/) for using VSCode to [develop in a container](https://code.visualstudio.com/docs/devcontainers/containers) via the `Remote Containers` [extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) or [GitHub Codespaces](https://github.com/codespaces).

This container is a turnkey development environment for building and testing the cuDF C++ and Python libraries.

## Table of Contents

* [Prerequisites](#prerequisites)
* [Host bind mounts](#host-bind-mounts)
* [Launch a Dev Container](#launch-a-dev-container)

## Prerequisites

* [VSCode](https://code.visualstudio.com/download)
* [VSCode Remote Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)

## Host bind mounts

By default, the following directories are bind-mounted into the devcontainer:

* `${repo}:/home/coder/cudf`
* `${repo}/../.aws:/home/coder/.aws`
* `${repo}/../.local:/home/coder/.local`
* `${repo}/../.cache:/home/coder/.cache`
* `${repo}/../.conda:/home/coder/.conda`
* `${repo}/../.config:/home/coder/.config`

This ensures caches, configurations, dependencies, and your commits are persisted on the host across container runs.

## Launch a Dev Container

To launch a devcontainer from VSCode, open the cuDF repo and select the "Reopen in Container" button in the bottom right:<br/><img src="https://user-images.githubusercontent.com/178183/221771999-97ab29d5-e718-4e5f-b32f-2cdd51bba25c.png"/>

Alternatively, open the VSCode command palette (typically `cmd/ctrl + shift + P`) and run the "Rebuild and Reopen in Container" command.
37 changes: 37 additions & 0 deletions .devcontainer/cuda11.8-conda/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"build": {
"context": "${localWorkspaceFolder}/.devcontainer",
"dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
"args": {
"CUDA": "11.8",
"PYTHON_PACKAGE_MANAGER": "conda",
"BASE": "rapidsai/devcontainers:24.02-cpp-llvm16-cuda11.8-mambaforge-ubuntu22.04"
}
},
"hostRequirements": {"gpu": "optional"},
"features": {
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.2": {}
},
"overrideFeatureInstallOrder": [
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
],
"initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda11.8-envs}"],
"postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
"workspaceFolder": "/home/coder",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cudf,type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda11.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
"extensions": [
"ms-python.flake8",
"nvidia.nsight-vscode-edition"
]
}
}
}
36 changes: 36 additions & 0 deletions .devcontainer/cuda11.8-pip/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"build": {
"context": "${localWorkspaceFolder}/.devcontainer",
"dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
"args": {
"CUDA": "11.8",
"PYTHON_PACKAGE_MANAGER": "pip",
"BASE": "rapidsai/devcontainers:24.02-cpp-llvm16-cuda11.8-ubuntu22.04"
}
},
"hostRequirements": {"gpu": "optional"},
"features": {
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.2": {}
},
"overrideFeatureInstallOrder": [
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
],
"initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs}"],
"postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
"workspaceFolder": "/home/coder",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cudf,type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
"extensions": [
"ms-python.flake8",
"nvidia.nsight-vscode-edition"
]
}
}
}
37 changes: 37 additions & 0 deletions .devcontainer/cuda12.0-conda/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"build": {
"context": "${localWorkspaceFolder}/.devcontainer",
"dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
"args": {
"CUDA": "12.0",
"PYTHON_PACKAGE_MANAGER": "conda",
"BASE": "rapidsai/devcontainers:24.02-cpp-mambaforge-ubuntu22.04"
}
},
"hostRequirements": {"gpu": "optional"},
"features": {
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.2": {}
},
"overrideFeatureInstallOrder": [
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
],
"initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.0-envs}"],
"postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
"workspaceFolder": "/home/coder",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cudf,type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.0-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
"extensions": [
"ms-python.flake8",
"nvidia.nsight-vscode-edition"
]
}
}
}
36 changes: 36 additions & 0 deletions .devcontainer/cuda12.0-pip/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"build": {
"context": "${localWorkspaceFolder}/.devcontainer",
"dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
"args": {
"CUDA": "12.0",
"PYTHON_PACKAGE_MANAGER": "pip",
"BASE": "rapidsai/devcontainers:24.02-cpp-llvm16-cuda12.0-ubuntu22.04"
}
},
"hostRequirements": {"gpu": "optional"},
"features": {
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.2": {}
},
"overrideFeatureInstallOrder": [
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
],
"initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.0-venvs}"],
"postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
"workspaceFolder": "/home/coder",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cudf,type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.0-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
"extensions": [
"ms-python.flake8",
"nvidia.nsight-vscode-edition"
]
}
}
}
2 changes: 1 addition & 1 deletion .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ on:
default: nightly

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
cancel-in-progress: true

jobs:
Expand Down
9 changes: 9 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ jobs:
- wheel-tests-cudf
- wheel-build-dask-cudf
- wheel-tests-dask-cudf
- devcontainer
- unit-tests-cudf-pandas
- pandas-tests
#- pandas-tests-diff
Expand Down Expand Up @@ -130,6 +131,14 @@ jobs:
matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1")))
build_type: pull-request
script: ci/test_wheel_dask_cudf.sh
devcontainer:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/[email protected]
with:
build_command: |
sccache -z;
build-all -DBUILD_BENCHMARKS=ON -DNVBench_ENABLE_CUPTI=OFF --verbose;
sccache -s;
unit-tests-cudf-pandas:
needs: wheel-build-cudf
secrets: inherit
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ jobs:
sha: ${{ inputs.sha }}
script: ci/test_wheel_dask_cudf.sh
unit-tests-cudf-pandas:
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
Expand Down
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,7 @@ jupyter_execute

# cibuildwheel
/wheelhouse

# clang tooling
compile_commands.json
.clangd/
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ repos:
^CHANGELOG.md$
)
- repo: https://github.com/rapidsai/dependency-file-generator
rev: v1.5.1
rev: v1.7.1
hooks:
- id: rapids-dependency-file-generator
args: ["--clean"]
Expand Down
22 changes: 4 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ print(tips_df.groupby("size").tip_percentage.mean())
## Resources

- [Try cudf.pandas now](https://nvda.ws/rapids-cudf): Explore `cudf.pandas` on a free GPU enabled instance on Google Colab!
- [Install](https://rapids.ai/start.html): Instructions for installing cuDF and other [RAPIDS](https://rapids.ai) libraries.
- [Install](https://docs.rapids.ai/install): Instructions for installing cuDF and other [RAPIDS](https://rapids.ai) libraries.
- [cudf (Python) documentation](https://docs.rapids.ai/api/cudf/stable/)
- [libcudf (C++/CUDA) documentation](https://docs.rapids.ai/api/libcudf/stable/)
- [RAPIDS Community](https://rapids.ai/community.html): Get help, contribute, and collaborate.
- [RAPIDS Community](https://rapids.ai/learn-more/#get-involved): Get help, contribute, and collaborate.

## Installation

Expand All @@ -66,7 +66,7 @@ print(tips_df.groupby("size").tip_percentage.mean())

### Conda

cuDF can be installed with conda (via [miniconda](https://conda.io/miniconda.html) or the full [Anaconda distribution](https://www.anaconda.com/download)) from the `rapidsai` channel:
cuDF can be installed with conda (via [miniconda](https://docs.conda.io/projects/miniconda/en/latest/) or the full [Anaconda distribution](https://www.anaconda.com/download) from the `rapidsai` channel:

```bash
conda install -c rapidsai -c conda-forge -c nvidia \
Expand All @@ -78,25 +78,11 @@ of our latest development branch.

Note: cuDF is supported only on Linux, and with Python versions 3.9 and later.

See the [Get RAPIDS version picker](https://rapids.ai/start.html) for more OS and version info.
See the [RAPIDS installation guide](https://docs.rapids.ai/install) for more OS and version info.

## Build/Install from Source
See build [instructions](CONTRIBUTING.md#setting-up-your-build-environment).

## Contributing

Please see our [guide for contributing to cuDF](CONTRIBUTING.md).

## Contact

Find out more details on the [RAPIDS site](https://rapids.ai/community.html)

## <div align="left"><img src="img/rapids_logo.png" width="265px"/></div> Open GPU Data Science

The RAPIDS suite of open source software libraries aim to enable execution of end-to-end data science and analytics pipelines entirely on GPUs. It relies on NVIDIA® CUDA® primitives for low-level compute optimization, but exposing that GPU parallelism and high-bandwidth memory speed through user-friendly Python interfaces.

<p align="center"><img src="img/rapids_arrow.png" width="80%"/></p>

### Apache Arrow on GPU

The GPU version of [Apache Arrow](https://arrow.apache.org/) is a common API that enables efficient interchange of tabular data between processes running on the GPU. End-to-end computation on the GPU avoids unnecessary copying and converting of data off the GPU, reducing compute time and cost for high-performance analytics common in artificial intelligence workloads. As the name implies, cuDF uses the Apache Arrow columnar data format on the GPU. Currently, a subset of the features in Apache Arrow are supported.
2 changes: 1 addition & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ fi
# build cudf_kafka Python package
if hasArg cudf_kafka; then
cd ${REPODIR}/python/cudf_kafka
SKBUILD_CONFIGURE_OPTIONS="-DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR}" \
SKBUILD_CONFIGURE_OPTIONS="-DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS}" \
SKBUILD_BUILD_OPTIONS="-j${PARALLEL_LEVEL:-1}" \
python -m pip install --no-build-isolation --no-deps .
fi
Expand Down
2 changes: 2 additions & 0 deletions ci/build_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ fi

if [[ ${package_name} == "dask_cudf" ]]; then
sed -r -i "s/cudf==(.*)\"/cudf${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file}
sed -r -i "s/dask-cuda==(.*)\"/dask-cuda==\1${alpha_spec}\"/g" ${pyproject_file}
sed -r -i "s/rapids-dask-dependency==(.*)\"/rapids-dask-dependency==\1${alpha_spec}\"/g" ${pyproject_file}
else
sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file}
# ptxcompiler and cubinlinker aren't version constrained
Expand Down
11 changes: 9 additions & 2 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g'

# Python CMakeLists updates
sed_runner 's/'"cudf_version .*)"'/'"cudf_version ${NEXT_FULL_TAG})"'/g' python/cudf/CMakeLists.txt
sed_runner 's/'"cudf_kafka_version .*)"'/'"cudf_kafka_version ${NEXT_FULL_TAG})"'/g' python/cudf_kafka/CMakeLists.txt

# cpp libcudf_kafka update
sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/libcudf_kafka/CMakeLists.txt
Expand Down Expand Up @@ -80,6 +81,7 @@ DEPENDENCIES=(
kvikio
libkvikio
librmm
rapids-dask-dependency
rmm
)
for DEP in "${DEPENDENCIES[@]}"; do
Expand All @@ -99,8 +101,7 @@ sed_runner "s/version == ${CURRENT_SHORT_TAG}/version == ${NEXT_SHORT_TAG}/g" RE
sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" README.md

# Libcudf examples update
sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/basic/CMakeLists.txt
sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/strings/CMakeLists.txt
sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/fetch_dependencies.cmake

# CI files
for FILE in .github/workflows/*.yaml; do
Expand All @@ -114,3 +115,9 @@ NEXT_FULL_JAVA_TAG="${NEXT_SHORT_TAG}.${PATCH_PEP440}-SNAPSHOT"
sed_runner "s|<version>.*-SNAPSHOT</version>|<version>${NEXT_FULL_JAVA_TAG}</version>|g" java/pom.xml
sed_runner "s/branch-.*/branch-${NEXT_SHORT_TAG}/g" java/ci/README.md
sed_runner "s/cudf-.*-SNAPSHOT/cudf-${NEXT_FULL_JAVA_TAG}/g" java/ci/README.md

# .devcontainer files
find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r -d '' filename; do
sed_runner "s@rapidsai/devcontainers:[0-9.]*@rapidsai/devcontainers:${NEXT_SHORT_TAG}@g" "${filename}"
sed_runner "s@rapidsai/devcontainers/features/rapids-build-utils:[0-9.]*@rapidsai/devcontainers/features/rapids-build-utils:${NEXT_SHORT_TAG_PEP440}@" "${filename}"
done
3 changes: 0 additions & 3 deletions ci/test_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@ manylinux="manylinux_${manylinux_version}"
RAPIDS_PY_WHEEL_NAME="cudf_${manylinux}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
python -m pip install --no-deps ./local-cudf-dep/cudf*.whl

# Always install latest dask for testing
python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/[email protected]

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/dask_cudf*.whl)[test]

Expand Down
Loading

0 comments on commit 050a3a7

Please sign in to comment.