Skip to content

Commit

Permalink
Merge branch 'branch-23.04' into groupby-apply-caching
Browse files Browse the repository at this point in the history
  • Loading branch information
brandon-b-miller committed Mar 10, 2023
2 parents d0ef1dc + e37bddb commit e41b72c
Show file tree
Hide file tree
Showing 243 changed files with 6,903 additions and 12,779 deletions.
24 changes: 24 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright (c) 2017-2023, NVIDIA CORPORATION.

[flake8]
filename = *.py, *.pyx, *.pxd, *.pxi
exclude = __init__.py, *.egg, build, docs, .git
force-check = True
ignore =
# line break before binary operator
W503,
# whitespace before :
E203
per-file-ignores =
# Rules ignored only in Cython:
# E211: whitespace before '(' (used in multi-line imports)
# E225: Missing whitespace around operators (breaks cython casting syntax like <int>)
# E226: Missing whitespace around arithmetic operators (breaks cython pointer syntax like int*)
# E227: Missing whitespace around bitwise or shift operator (Can also break casting syntax)
# E275: Missing whitespace after keyword (Doesn't work with Cython except?)
# E402: invalid syntax (works for Python, not Cython)
# E999: invalid syntax (works for Python, not Cython)
# W504: line break after binary operator (breaks lines that end with a pointer)
*.pyx: E211, E225, E226, E227, E275, E402, E999, W504
*.pxd: E211, E225, E226, E227, E275, E402, E999, W504
*.pxi: E211, E225, E226, E227, E275, E402, E999, W504
4 changes: 0 additions & 4 deletions .gitattributes

This file was deleted.

13 changes: 13 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,17 @@ jobs:
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
skip_upload_pkgs: libcudf-example
docs-build:
if: github.ref_type == 'branch' && github.event_name == 'push'
needs: python-build
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/[email protected]
with:
build_type: branch
node_type: "gpu-latest-1"
arch: "amd64"
container_image: "rapidsai/ci:latest"
run_script: "ci/build_docs.sh"
wheel-build-cudf:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/[email protected]
Expand All @@ -64,6 +75,7 @@ jobs:
package-name: cudf
package-dir: python/cudf
skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF"
uses-setup-env-vars: false
wheel-publish-cudf:
needs: wheel-build-cudf
secrets: inherit
Expand All @@ -85,6 +97,7 @@ jobs:
date: ${{ inputs.date }}
package-name: dask_cudf
package-dir: python/dask_cudf
uses-setup-env-vars: false
wheel-publish-dask-cudf:
needs: wheel-build-dask-cudf
secrets: inherit
Expand Down
26 changes: 20 additions & 6 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ jobs:
- conda-python-other-tests
- conda-java-tests
- conda-notebook-tests
- docs-build
- wheel-build-cudf
- wheel-tests-cudf
- wheel-build-dask-cudf
Expand All @@ -29,6 +30,8 @@ jobs:
checks:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/[email protected]
with:
enable_check_generated_files: false
conda-cpp-build:
needs: checks
secrets: inherit
Expand Down Expand Up @@ -82,6 +85,16 @@ jobs:
arch: "amd64"
container_image: "rapidsai/ci:latest"
run_script: "ci/test_notebooks.sh"
docs-build:
needs: conda-python-build
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
node_type: "gpu-latest-1"
arch: "amd64"
container_image: "rapidsai/ci:latest"
run_script: "ci/build_docs.sh"
wheel-build-cudf:
needs: checks
secrets: inherit
Expand All @@ -91,6 +104,7 @@ jobs:
package-name: cudf
package-dir: python/cudf
skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF"
uses-setup-env-vars: false
wheel-tests-cudf:
needs: wheel-build-cudf
secrets: inherit
Expand All @@ -99,9 +113,8 @@ jobs:
build_type: pull-request
package-name: cudf
# Install cupy-cuda11x for arm from a special index url
# Install tokenizers last binary wheel to avoid a Rust compile from the latest sdist
test-before-arm64: "pip install tokenizers==0.10.2 cupy-cuda11x -f https://pip.cupy.dev/aarch64"
test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests"
test-before-arm64: "python -m pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64"
test-unittest: "python -m pytest -v -n 8 ./python/cudf/cudf/tests"
test-smoketest: "python ./ci/wheel_smoke_test_cudf.py"
wheel-build-dask-cudf:
needs: wheel-tests-cudf
Expand All @@ -111,13 +124,14 @@ jobs:
build_type: pull-request
package-name: dask_cudf
package-dir: python/dask_cudf
before-wheel: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf && pip install --no-deps ./local-cudf/cudf*.whl"
before-wheel: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf && python -m pip install --no-deps ./local-cudf/cudf*.whl"
uses-setup-env-vars: false
wheel-tests-dask-cudf:
needs: wheel-build-dask-cudf
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
package-name: dask_cudf
test-before: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf-dep && pip install --no-deps ./local-cudf-dep/cudf*.whl"
test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests"
test-before: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf-dep && python -m pip install --no-deps ./local-cudf-dep/cudf*.whl"
test-unittest: "python -m pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests"
18 changes: 15 additions & 3 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,18 @@ jobs:
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
conda-cpp-memcheck-tests:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/[email protected]
with:
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
node_type: "gpu-latest-1"
arch: "amd64"
container_image: "rapidsai/ci:latest"
run_script: "ci/test_cpp_memcheck.sh"
conda-python-cudf-tests:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/[email protected]
Expand Down Expand Up @@ -74,8 +86,8 @@ jobs:
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
package-name: cudf
test-before-arm64: "pip install tokenizers==0.10.2 cupy-cuda11x -f https://pip.cupy.dev/aarch64"
test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests"
test-before-arm64: "python -m pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64"
test-unittest: "python -m pytest -v -n 8 ./python/cudf/cudf/tests"
wheel-tests-dask-cudf:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/[email protected]
Expand All @@ -85,4 +97,4 @@ jobs:
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
package-name: dask_cudf
test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests"
test-unittest: "python -m pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests"
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,9 @@ docs/cudf/source/api_docs/generated/*
docs/cudf/source/api_docs/api/*
docs/cudf/source/user_guide/example_output/*
docs/cudf/source/user_guide/cudf.*Dtype.*.rst
_html
_text
jupyter_execute

# cibuildwheel
/wheelhouse
30 changes: 24 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ repos:
rev: 5.0.4
hooks:
- id: flake8
args: ["--config=setup.cfg"]
args: ["--config=.flake8"]
files: python/.*$
types: [file]
types_or: [python, cython]
Expand All @@ -48,7 +48,7 @@ repos:
hooks:
- id: mypy
additional_dependencies: [types-cachetools]
args: ["--config-file=setup.cfg",
args: ["--config-file=pyproject.toml",
"python/cudf/cudf",
"python/custreamz/custreamz",
"python/cudf_kafka/cudf_kafka",
Expand All @@ -58,7 +58,19 @@ repos:
rev: 6.1.1
hooks:
- id: pydocstyle
args: ["--config=setup.cfg"]
# https://github.com/PyCQA/pydocstyle/issues/603
additional_dependencies: [toml]
args: ["--config=pyproject.toml"]
- repo: https://github.com/nbQA-dev/nbQA
rev: 1.6.3
hooks:
- id: nbqa-isort
# Use the cudf_kafka isort orderings in notebooks so that dask
# and RAPIDS packages have their own sections.
args: ["--settings-file=python/cudf_kafka/pyproject.toml"]
- id: nbqa-black
# Explicitly specify the pyproject.toml at the repo root, not per-project.
args: ["--config=pyproject.toml"]
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v11.1.0
hooks:
Expand Down Expand Up @@ -138,15 +150,21 @@ repos:
pass_filenames: false
verbose: false
- repo: https://github.com/codespell-project/codespell
rev: v2.1.0
rev: v2.2.2
hooks:
- id: codespell
additional_dependencies: [tomli]
args: ["--toml", "pyproject.toml"]
exclude: |
(?x)^(
.*test.*|
^CHANGELOG.md$|
^.*versioneer.py$
^CHANGELOG.md$
)
- repo: https://github.com/rapidsai/dependency-file-generator
rev: v1.4.0
hooks:
- id: rapids-dependency-file-generator
args: ["--clean"]

default_language_version:
python: python3
48 changes: 48 additions & 0 deletions ci/build_docs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION.

set -euo pipefail

rapids-logger "Create test conda environment"
. /opt/conda/etc/profile.d/conda.sh

rapids-dependency-file-generator \
--output conda \
--file_key docs \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml

rapids-mamba-retry env create --force -f env.yaml -n docs
conda activate docs

rapids-print-env

rapids-logger "Downloading artifacts from previous jobs"
CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
VERSION_NUMBER=$(rapids-get-rapids-version-from-git)

rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
libcudf cudf dask-cudf


rapids-logger "Build Doxygen docs"
pushd cpp/doxygen
aws s3 cp s3://rapidsai-docs/librmm/${VERSION_NUMBER}/html/rmm.tag . || echo "Failed to download rmm Doxygen tag"
doxygen Doxyfile
popd

rapids-logger "Build Sphinx docs"
pushd docs/cudf
sphinx-build -b dirhtml source _html
sphinx-build -b text source _text
popd


if [[ ${RAPIDS_BUILD_TYPE} == "branch" ]]; then
rapids-logger "Upload Docs to S3"
aws s3 sync --no-progress --delete cpp/doxygen/html "s3://rapidsai-docs/libcudf/${VERSION_NUMBER}/html"
aws s3 sync --no-progress --delete docs/cudf/_html "s3://rapidsai-docs/cudf/${VERSION_NUMBER}/html"
aws s3 sync --no-progress --delete docs/cudf/_text "s3://rapidsai-docs/cudf/${VERSION_NUMBER}/txt"
fi
4 changes: 2 additions & 2 deletions ci/check_style.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

set -euo pipefail

Expand All @@ -20,4 +20,4 @@ mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE})
wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL}

# Run pre-commit checks
pre-commit run --hook-stage manual --all-files --show-diff-on-failure
pre-commit run --all-files --show-diff-on-failure
3 changes: 1 addition & 2 deletions ci/checks/copyright.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2022, NVIDIA CORPORATION.
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -31,7 +31,6 @@
]
ExemptFiles = [
re.compile(r"cpp/include/cudf_test/cxxopts.hpp"),
re.compile(r"versioneer[.]py"),
]

# this will break starting at year 10000, which is probably OK :)
Expand Down
30 changes: 30 additions & 0 deletions ci/release/apply_wheel_modifications.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION.
#
# Usage: bash apply_wheel_modifications.sh <new_version> <cuda_suffix>

VERSION=${1}
CUDA_SUFFIX=${2}

# __init__.py versions
sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/cudf/cudf/__init__.py
sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/dask_cudf/dask_cudf/__init__.py
sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/cudf_kafka/cudf_kafka/__init__.py
sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/custreamz/custreamz/__init__.py

# pyproject.toml versions
sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/cudf/pyproject.toml
sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/dask_cudf/pyproject.toml
sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/cudf_kafka/pyproject.toml
sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/custreamz/pyproject.toml

# cudf pyproject.toml cuda suffixes
sed -i "s/^name = \"cudf\"/name = \"cudf${CUDA_SUFFIX}\"/g" python/cudf/pyproject.toml
sed -i "s/rmm/rmm${CUDA_SUFFIX}/g" python/cudf/pyproject.toml
sed -i "s/ptxcompiler/ptxcompiler${CUDA_SUFFIX}/g" python/cudf/pyproject.toml
sed -i "s/cubinlinker/cubinlinker${CUDA_SUFFIX}/g" python/cudf/pyproject.toml

# dask_cudf pyproject.toml cuda suffixes
sed -i "s/^name = \"dask_cudf\"/name = \"dask_cudf${CUDA_SUFFIX}\"/g" python/dask_cudf/pyproject.toml
# Need to provide the == to avoid modifying the URL
sed -i "s/\"cudf==/\"cudf${CUDA_SUFFIX}==/g" python/dask_cudf/pyproject.toml
24 changes: 16 additions & 8 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,27 @@ function sed_runner() {
# cpp update
sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/CMakeLists.txt

# cpp stream testing update
sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/tests/utilities/identify_stream_usage/CMakeLists.txt

# Python update
# Python CMakeLists updates
sed_runner 's/'"cudf_version .*)"'/'"cudf_version ${NEXT_FULL_TAG})"'/g' python/cudf/CMakeLists.txt


# cpp libcudf_kafka update
sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/libcudf_kafka/CMakeLists.txt

# cpp cudf_jni update
sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' java/src/main/native/CMakeLists.txt

# Python __init__.py updates
sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cudf/cudf/__init__.py
sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/dask_cudf/dask_cudf/__init__.py
sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cudf_kafka/cudf_kafka/__init__.py
sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/custreamz/custreamz/__init__.py

# Python pyproject.toml updates
sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cudf/pyproject.toml
sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/dask_cudf/pyproject.toml
sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cudf_kafka/pyproject.toml
sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/custreamz/pyproject.toml

# rapids-cmake version
sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' fetch_rapids.cmake

Expand Down Expand Up @@ -81,9 +89,9 @@ sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_
# Need to distutils-normalize the original version
NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")

# Wheel builds install intra-RAPIDS dependencies from same release
sed_runner "s/rmm{cuda_suffix}.*\",/rmm{cuda_suffix}==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/cudf/setup.py
sed_runner "s/cudf{cuda_suffix}==.*\",/cudf{cuda_suffix}==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/dask_cudf/setup.py
# Dependency versions in pyproject.toml
sed_runner "s/rmm==.*\",/rmm==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/cudf/pyproject.toml
sed_runner "s/cudf==.*\",/cudf==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/dask_cudf/pyproject.toml

for FILE in .github/workflows/*.yaml; do
sed_runner "/shared-action-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
Expand Down
Loading

0 comments on commit e41b72c

Please sign in to comment.