Skip to content

Commit

Permalink
Merge branch 'branch-23.04' into wence/feature/dask-cudf-docs
Browse files Browse the repository at this point in the history
  • Loading branch information
quasiben authored Mar 20, 2023
2 parents 3334add + aff1c9f commit 844a4ee
Show file tree
Hide file tree
Showing 531 changed files with 18,020 additions and 23,842 deletions.
24 changes: 24 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright (c) 2017-2023, NVIDIA CORPORATION.

[flake8]
filename = *.py, *.pyx, *.pxd, *.pxi
exclude = __init__.py, *.egg, build, docs, .git
force-check = True
ignore =
# line break before binary operator
W503,
# whitespace before :
E203
per-file-ignores =
# Rules ignored only in Cython:
# E211: whitespace before '(' (used in multi-line imports)
# E225: Missing whitespace around operators (breaks cython casting syntax like <int>)
# E226: Missing whitespace around arithmetic operators (breaks cython pointer syntax like int*)
# E227: Missing whitespace around bitwise or shift operator (Can also break casting syntax)
# E275: Missing whitespace after keyword (Doesn't work with Cython except?)
# E402: invalid syntax (works for Python, not Cython)
# E999: invalid syntax (works for Python, not Cython)
# W504: line break after binary operator (breaks lines that end with a pointer)
*.pyx: E211, E225, E226, E227, E275, E402, E999, W504
*.pxd: E211, E225, E226, E227, E275, E402, E999, W504
*.pxi: E211, E225, E226, E227, E275, E402, E999, W504
5 changes: 0 additions & 5 deletions .gitattributes

This file was deleted.

13 changes: 13 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,17 @@ jobs:
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
skip_upload_pkgs: libcudf-example
docs-build:
if: github.ref_type == 'branch' && github.event_name == 'push'
needs: python-build
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/[email protected]
with:
build_type: branch
node_type: "gpu-latest-1"
arch: "amd64"
container_image: "rapidsai/ci:latest"
run_script: "ci/build_docs.sh"
wheel-build-cudf:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/[email protected]
Expand All @@ -64,6 +75,7 @@ jobs:
package-name: cudf
package-dir: python/cudf
skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF"
uses-setup-env-vars: false
wheel-publish-cudf:
needs: wheel-build-cudf
secrets: inherit
Expand All @@ -85,6 +97,7 @@ jobs:
date: ${{ inputs.date }}
package-name: dask_cudf
package-dir: python/dask_cudf
uses-setup-env-vars: false
wheel-publish-dask-cudf:
needs: wheel-build-dask-cudf
secrets: inherit
Expand Down
27 changes: 21 additions & 6 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ jobs:
- conda-python-other-tests
- conda-java-tests
- conda-notebook-tests
- docs-build
- wheel-build-cudf
- wheel-tests-cudf
- wheel-build-dask-cudf
Expand All @@ -29,6 +30,8 @@ jobs:
checks:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/[email protected]
with:
enable_check_generated_files: false
conda-cpp-build:
needs: checks
secrets: inherit
Expand Down Expand Up @@ -82,6 +85,16 @@ jobs:
arch: "amd64"
container_image: "rapidsai/ci:latest"
run_script: "ci/test_notebooks.sh"
docs-build:
needs: conda-python-build
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
node_type: "gpu-latest-1"
arch: "amd64"
container_image: "rapidsai/ci:latest"
run_script: "ci/build_docs.sh"
wheel-build-cudf:
needs: checks
secrets: inherit
Expand All @@ -91,6 +104,7 @@ jobs:
package-name: cudf
package-dir: python/cudf
skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF"
uses-setup-env-vars: false
wheel-tests-cudf:
needs: wheel-build-cudf
secrets: inherit
Expand All @@ -99,9 +113,8 @@ jobs:
build_type: pull-request
package-name: cudf
# Install cupy-cuda11x for arm from a special index url
# Install tokenizers last binary wheel to avoid a Rust compile from the latest sdist
test-before-arm64: "pip install tokenizers==0.10.2 cupy-cuda11x -f https://pip.cupy.dev/aarch64"
test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests"
test-before-arm64: "python -m pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64"
test-unittest: "python -m pytest -v -n 8 ./python/cudf/cudf/tests"
test-smoketest: "python ./ci/wheel_smoke_test_cudf.py"
wheel-build-dask-cudf:
needs: wheel-tests-cudf
Expand All @@ -111,13 +124,15 @@ jobs:
build_type: pull-request
package-name: dask_cudf
package-dir: python/dask_cudf
before-wheel: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf && pip install --no-deps ./local-cudf/cudf*.whl"
before-wheel: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf && python -m pip install --no-deps ./local-cudf/cudf*.whl"
uses-setup-env-vars: false
wheel-tests-dask-cudf:
needs: wheel-build-dask-cudf
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
package-name: dask_cudf
test-before: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf-dep && pip install --no-deps ./local-cudf-dep/cudf*.whl"
test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests"
# Install the cudf we just built, and also test against latest dask/distributed/dask-cuda.
test-before: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf-dep && python -m pip install --no-deps ./local-cudf-dep/cudf*.whl && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/[email protected]"
test-unittest: "python -m pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests"
20 changes: 17 additions & 3 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,18 @@ jobs:
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
conda-cpp-memcheck-tests:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/[email protected]
with:
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
node_type: "gpu-latest-1"
arch: "amd64"
container_image: "rapidsai/ci:latest"
run_script: "ci/test_cpp_memcheck.sh"
conda-python-cudf-tests:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/[email protected]
Expand Down Expand Up @@ -74,8 +86,8 @@ jobs:
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
package-name: cudf
test-before-arm64: "pip install tokenizers==0.10.2 cupy-cuda11x -f https://pip.cupy.dev/aarch64"
test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests"
test-before-arm64: "python -m pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64"
test-unittest: "python -m pytest -v -n 8 ./python/cudf/cudf/tests"
wheel-tests-dask-cudf:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/[email protected]
Expand All @@ -85,4 +97,6 @@ jobs:
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
package-name: dask_cudf
test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests"
# Test against latest dask/distributed/dask-cuda.
test-before: "pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/[email protected]"
test-unittest: "python -m pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests"
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ python/cudf_kafka/*/_lib/**/*.cpp
python/cudf_kafka/*/_lib/**/*.h
python/custreamz/*/_lib/**/*.cpp
python/custreamz/*/_lib/**/*.h
python/strings_udf/strings_udf/_lib/*.cpp
python/strings_udf/strings_udf/*.ptx
.Python
env/
develop-eggs/
Expand Down Expand Up @@ -168,6 +166,9 @@ docs/cudf/source/api_docs/generated/*
docs/cudf/source/api_docs/api/*
docs/cudf/source/user_guide/example_output/*
docs/cudf/source/user_guide/cudf.*Dtype.*.rst
_html
_text
jupyter_execute

# cibuildwheel
/wheelhouse
30 changes: 24 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ repos:
rev: 5.0.4
hooks:
- id: flake8
args: ["--config=setup.cfg"]
args: ["--config=.flake8"]
files: python/.*$
types: [file]
types_or: [python, cython]
Expand All @@ -48,7 +48,7 @@ repos:
hooks:
- id: mypy
additional_dependencies: [types-cachetools]
args: ["--config-file=setup.cfg",
args: ["--config-file=pyproject.toml",
"python/cudf/cudf",
"python/custreamz/custreamz",
"python/cudf_kafka/cudf_kafka",
Expand All @@ -58,7 +58,19 @@ repos:
rev: 6.1.1
hooks:
- id: pydocstyle
args: ["--config=setup.cfg"]
# https://github.com/PyCQA/pydocstyle/issues/603
additional_dependencies: [toml]
args: ["--config=pyproject.toml"]
- repo: https://github.com/nbQA-dev/nbQA
rev: 1.6.3
hooks:
- id: nbqa-isort
# Use the cudf_kafka isort orderings in notebooks so that dask
# and RAPIDS packages have their own sections.
args: ["--settings-file=python/cudf_kafka/pyproject.toml"]
- id: nbqa-black
# Explicitly specify the pyproject.toml at the repo root, not per-project.
args: ["--config=pyproject.toml"]
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v11.1.0
hooks:
Expand Down Expand Up @@ -138,15 +150,21 @@ repos:
pass_filenames: false
verbose: false
- repo: https://github.com/codespell-project/codespell
rev: v2.1.0
rev: v2.2.2
hooks:
- id: codespell
additional_dependencies: [tomli]
args: ["--toml", "pyproject.toml"]
exclude: |
(?x)^(
.*test.*|
^CHANGELOG.md$|
^.*versioneer.py$
^CHANGELOG.md$
)
- repo: https://github.com/rapidsai/dependency-file-generator
rev: v1.5.1
hooks:
- id: rapids-dependency-file-generator
args: ["--clean"]

default_language_version:
python: python3
23 changes: 9 additions & 14 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ ARGS=$*
# script, and that this script resides in the repo dir!
REPODIR=$(cd $(dirname $0); pwd)

VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz strings_udf -v -g -n -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
HELP="$0 [clean] [libcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
clean - remove all existing build artifacts and configuration (start
over)
Expand Down Expand Up @@ -300,8 +300,7 @@ if buildAll || hasArg libcudf; then
# Record build times
if [[ "$BUILD_REPORT_METRICS" == "ON" && -f "${LIB_BUILD_DIR}/.ninja_log" ]]; then
echo "Formatting build metrics"
python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt xml > ${LIB_BUILD_DIR}/ninja_log.xml
MSG="<p>"
MSG=""
# get some sccache stats after the compile
if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v sccache)" ]]; then
COMPILE_REQUESTS=$(sccache -s | grep "Compile requests \+ [0-9]\+$" | awk '{ print $NF }')
Expand All @@ -315,9 +314,13 @@ if buildAll || hasArg libcudf; then
LIBCUDF_FS=$(ls -lh ${LIB_BUILD_DIR}/libcudf.so | awk '{print $5}')
MSG="${MSG}<br/>libcudf.so size: $LIBCUDF_FS"
fi
echo "$MSG"
python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$MSG" > ${LIB_BUILD_DIR}/ninja_log.html
cp ${LIB_BUILD_DIR}/.ninja_log ${LIB_BUILD_DIR}/ninja.log
BMR_DIR=${RAPIDS_ARTIFACTS_DIR:-"${LIB_BUILD_DIR}"}
echo "Metrics output dir: [$BMR_DIR]"
mkdir -p ${BMR_DIR}
MSG_OUTFILE="$(mktemp)"
echo "$MSG" > "${MSG_OUTFILE}"
python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "${MSG_OUTFILE}" > ${BMR_DIR}/ninja_log.html
cp ${LIB_BUILD_DIR}/.ninja_log ${BMR_DIR}/ninja.log
fi

if [[ ${INSTALL_TARGET} != "" ]]; then
Expand All @@ -335,14 +338,6 @@ if buildAll || hasArg cudf; then
fi
fi

if buildAll || hasArg strings_udf; then

cd ${REPODIR}/python/strings_udf
python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
if [[ ${INSTALL_TARGET} != "" ]]; then
python setup.py install --single-version-externally-managed --record=record.txt -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
fi
fi

# Build and install the dask_cudf Python package
if buildAll || hasArg dask_cudf; then
Expand Down
28 changes: 27 additions & 1 deletion ci/build_cpp.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2022, NVIDIA CORPORATION.
# Copyright (c) 2022-2023, NVIDIA CORPORATION.

set -euo pipefail

Expand All @@ -14,3 +14,29 @@ rapids-logger "Begin cpp build"
rapids-mamba-retry mambabuild conda/recipes/libcudf

rapids-upload-conda-to-s3 cpp

echo "++++++++++++++++++++++++++++++++++++++++++++"

if [[ -d $RAPIDS_ARTIFACTS_DIR ]]; then
ls -l ${RAPIDS_ARTIFACTS_DIR}
fi

echo "++++++++++++++++++++++++++++++++++++++++++++"

FILE=${RAPIDS_ARTIFACTS_DIR}/ninja.log
if [[ -f $FILE ]]; then
echo -e "\x1B[33;1m\x1B[48;5;240m Ninja log for this build available at the following link \x1B[0m"
UPLOAD_NAME=cpp_cuda${RAPIDS_CUDA_VERSION%%.*}_$(arch).ninja.log
rapids-upload-to-s3 "${UPLOAD_NAME}" "${FILE}"
fi

echo "++++++++++++++++++++++++++++++++++++++++++++"

FILE=${RAPIDS_ARTIFACTS_DIR}/ninja_log.html
if [[ -f $FILE ]]; then
echo -e "\x1B[33;1m\x1B[48;5;240m Build Metrics Report for this build available at the following link \x1B[0m"
UPLOAD_NAME=cpp_cuda${RAPIDS_CUDA_VERSION%%.*}_$(arch).BuildMetricsReport.html
rapids-upload-to-s3 "${UPLOAD_NAME}" "${FILE}"
fi

echo "++++++++++++++++++++++++++++++++++++++++++++"
48 changes: 48 additions & 0 deletions ci/build_docs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION.

set -euo pipefail

rapids-logger "Create test conda environment"
. /opt/conda/etc/profile.d/conda.sh

rapids-dependency-file-generator \
--output conda \
--file_key docs \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml

rapids-mamba-retry env create --force -f env.yaml -n docs
conda activate docs

rapids-print-env

rapids-logger "Downloading artifacts from previous jobs"
CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
VERSION_NUMBER=$(rapids-get-rapids-version-from-git)

rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
libcudf cudf dask-cudf


rapids-logger "Build Doxygen docs"
pushd cpp/doxygen
aws s3 cp s3://rapidsai-docs/librmm/${VERSION_NUMBER}/html/rmm.tag . || echo "Failed to download rmm Doxygen tag"
doxygen Doxyfile
popd

rapids-logger "Build Sphinx docs"
pushd docs/cudf
sphinx-build -b dirhtml source _html
sphinx-build -b text source _text
popd


if [[ ${RAPIDS_BUILD_TYPE} == "branch" ]]; then
rapids-logger "Upload Docs to S3"
aws s3 sync --no-progress --delete cpp/doxygen/html "s3://rapidsai-docs/libcudf/${VERSION_NUMBER}/html"
aws s3 sync --no-progress --delete docs/cudf/_html "s3://rapidsai-docs/cudf/${VERSION_NUMBER}/html"
aws s3 sync --no-progress --delete docs/cudf/_text "s3://rapidsai-docs/cudf/${VERSION_NUMBER}/txt"
fi
Loading

0 comments on commit 844a4ee

Please sign in to comment.