Skip to content

Commit

Permalink
Merge branch 'branch-24.04' into fix_parquet_chunk_reader_test
Browse files Browse the repository at this point in the history
  • Loading branch information
ttnghia authored Mar 19, 2024
2 parents f47184c + 4a5fab7 commit b78e6e9
Show file tree
Hide file tree
Showing 241 changed files with 7,377 additions and 5,046 deletions.
29 changes: 14 additions & 15 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
- wheel-tests-dask-cudf
- devcontainer
- unit-tests-cudf-pandas
# - pandas-tests
- pandas-tests
#- pandas-tests-diff
#- pandas-tests-diff-comment
secrets: inherit
Expand Down Expand Up @@ -69,15 +69,15 @@ jobs:
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
test_script: "ci/test_python_cudf.sh"
script: "ci/test_python_cudf.sh"
conda-python-other-tests:
# Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
needs: conda-python-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
test_script: "ci/test_python_other.sh"
script: "ci/test_python_other.sh"
conda-java-tests:
needs: conda-cpp-build
secrets: inherit
Expand Down Expand Up @@ -156,21 +156,20 @@ jobs:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
script: ci/cudf_pandas_scripts/run_tests.sh
# pandas-tests:
# # run the Pandas unit tests using PR branch
# needs: wheel-build-cudf
# secrets: inherit
# uses: rapidsai/shared-workflows/.github/workflows/[email protected]
# with:
# matrix_filter: map(select(.ARCH == "amd64")) | max_by(.CUDA_VER) | [.]
# build_type: pull-request
# script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
# # Hide test failures because they exceed the GITHUB_STEP_SUMMARY output limit.
# test_summary_show: "none"
pandas-tests:
# run the Pandas unit tests using PR branch
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
# Hide test failures because they exceed the GITHUB_STEP_SUMMARY output limit.
test_summary_show: "none"
#pandas-tests-diff:
# # diff the results of running the Pandas unit tests and publish a job summary
# needs: [pandas-tests-main, pandas-tests-pr]
Expand Down
27 changes: 13 additions & 14 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
test_script: "ci/test_python_cudf.sh"
script: "ci/test_python_cudf.sh"
conda-python-other-tests:
# Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
secrets: inherit
Expand All @@ -61,7 +61,7 @@ jobs:
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
test_script: "ci/test_python_other.sh"
script: "ci/test_python_other.sh"
conda-java-tests:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
Expand Down Expand Up @@ -115,15 +115,14 @@ jobs:
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
script: ci/cudf_pandas_scripts/run_tests.sh
# pandas-tests:
# # run the Pandas unit tests
# secrets: inherit
# uses: rapidsai/shared-workflows/.github/workflows/[email protected]
# with:
# matrix_filter: map(select(.ARCH == "amd64")) | max_by(.CUDA_VER) | [.]
# build_type: nightly
# branch: ${{ inputs.branch }}
# date: ${{ inputs.date }}
# sha: ${{ inputs.sha }}
# # pr mode uses the HEAD of the branch, which is also correct for nightlies
# script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
pandas-tests:
# run the Pandas unit tests
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
script: ci/cudf_pandas_scripts/pandas-tests/run.sh main
12 changes: 2 additions & 10 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,6 @@ repos:
args: ["--config-root=python/", "--resolve-all-configs"]
files: python/.*
types_or: [python, cython, pyi]
- repo: https://github.com/psf/black
rev: 23.12.1
hooks:
- id: black
files: python/.*
# Explicitly specify the pyproject.toml at the repo root, not per-project.
args: ["--config", "pyproject.toml"]
- repo: https://github.com/MarcoGorelli/cython-lint
rev: v0.16.0
hooks:
Expand Down Expand Up @@ -64,9 +57,6 @@ repos:
# Use the cudf_kafka isort orderings in notebooks so that dask
# and RAPIDS packages have their own sections.
args: ["--settings-file=python/cudf_kafka/pyproject.toml"]
- id: nbqa-black
# Explicitly specify the pyproject.toml at the repo root, not per-project.
args: ["--config=pyproject.toml"]
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v16.0.6
hooks:
Expand Down Expand Up @@ -155,6 +145,8 @@ repos:
hooks:
- id: ruff
files: python/.*$
- id: ruff-format
files: python/.*$
- repo: https://github.com/rapidsai/pre-commit-hooks
rev: v0.0.1
hooks:
Expand Down
4 changes: 3 additions & 1 deletion ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

set -euo pipefail

export RAPIDS_VERSION_NUMBER="$(rapids-generate-version)"
export RAPIDS_VERSION="$(rapids-version)"
export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
export RAPIDS_VERSION_NUMBER="$RAPIDS_VERSION_MAJOR_MINOR"

rapids-logger "Create test conda environment"
. /opt/conda/etc/profile.d/conda.sh
Expand Down
2 changes: 1 addition & 1 deletion ci/build_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ package_dir="python/dask_cudf"
./ci/build_wheel.sh dask-cudf ${package_dir}

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/dist
RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 ${package_dir}/dist
4 changes: 3 additions & 1 deletion ci/check_style.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ rapids-dependency-file-generator \
rapids-mamba-retry env create --force -f "${ENV_YAML_DIR}/env.yaml" -n checks
conda activate checks

FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.04/cmake-format-rapids-cmake.json
RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

FORMAT_FILE_URL="https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION_MAJOR_MINOR}/cmake-format-rapids-cmake.json"
export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json
mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE})
wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL}
Expand Down
7 changes: 6 additions & 1 deletion ci/checks/doxygen.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
###############################
# cuDF doxygen warnings check #
###############################
Expand All @@ -21,6 +21,11 @@ if [ ! $(version "$DOXYGEN_VERSION") -eq $(version "1.9.1") ] ; then
exit 0
fi

# Set variables for doxygen
# We can't use gha-tools' rapids-version and rapids-version-major-minor here because this script can run outside of CI
export RAPIDS_VERSION="$(sed -E -e "s/^([0-9]{2})\.([0-9]{2})\.([0-9]{2}).*$/\1.\2.\3/" VERSION)"
export RAPIDS_VERSION_MAJOR_MINOR="$(sed -E -e "s/^([0-9]{2})\.([0-9]{2})\.([0-9]{2}).*$/\1.\2/" VERSION)"

# Run doxygen, ignore missing tag files error
TAG_ERROR1="error: Tag file '.*.tag' does not exist or is not a file. Skipping it..."
TAG_ERROR2="error: cannot open tag file .*.tag for writing"
Expand Down
4 changes: 3 additions & 1 deletion ci/cudf_pandas_scripts/pandas-tests/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,16 @@ mkdir -p "${RAPIDS_TESTS_DIR}"

bash python/cudf/cudf/pandas/scripts/run-pandas-tests.sh \
-n 10 \
--tb=line \
--tb=no \
-m "not slow" \
--max-worker-restart=3 \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-pandas.xml" \
--dist worksteal \
--report-log=${PANDAS_TESTS_BRANCH}.json 2>&1

# summarize the results and save them to artifacts:
python python/cudf/cudf/pandas/scripts/summarize-test-results.py --output json pandas-testing/${PANDAS_TESTS_BRANCH}.json > pandas-testing/${PANDAS_TESTS_BRANCH}-results.json
RAPIDS_ARTIFACTS_DIR=${RAPIDS_ARTIFACTS_DIR:-"${PWD}/artifacts"}
mkdir -p "${RAPIDS_ARTIFACTS_DIR}"
mv pandas-testing/${PANDAS_TESTS_BRANCH}-results.json ${RAPIDS_ARTIFACTS_DIR}/
rapids-upload-to-s3 ${RAPIDS_ARTIFACTS_DIR}/${PANDAS_TESTS_BRANCH}-results.json "${RAPIDS_ARTIFACTS_DIR}"
9 changes: 0 additions & 9 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,6 @@ echo "${NEXT_FULL_TAG}" > VERSION
# Wheel testing script
sed_runner "s/branch-.*/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_dask_cudf.sh

# cmake-format rapids-cmake definitions
sed_runner 's/'"branch-.*\/cmake-format-rapids-cmake.json"'/'"branch-${NEXT_SHORT_TAG}\/cmake-format-rapids-cmake.json"'/g' ci/check_style.sh

# doxyfile update
sed_runner 's/PROJECT_NUMBER = .*/PROJECT_NUMBER = '${NEXT_FULL_TAG}'/g' cpp/doxygen/Doxyfile

DEPENDENCIES=(
cudf
cudf_kafka
Expand All @@ -71,9 +65,6 @@ for DEP in "${DEPENDENCIES[@]}"; do
done
done

# Doxyfile update
sed_runner "s|\(TAGFILES.*librmm/\).*|\1${NEXT_SHORT_TAG}|" cpp/doxygen/Doxyfile

# README.md update
sed_runner "s/version == ${CURRENT_SHORT_TAG}/version == ${NEXT_SHORT_TAG}/g" README.md
sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" README.md
Expand Down
3 changes: 3 additions & 0 deletions ci/run_cudf_memcheck_ctests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ trap "EXITCODE=1" ERR
cd "${INSTALL_PREFIX:-${CONDA_PREFIX:-/usr}}/bin/gtests/libcudf/";

export GTEST_CUDF_RMM_MODE=cuda
# compute-sanitizer bug 4553815
export LIBCUDF_MEMCHECK_ENABLED=1
for gt in ./*_TEST ; do
test_name=$(basename ${gt})
# Run gtests with compute-sanitizer
Expand All @@ -20,5 +22,6 @@ for gt in ./*_TEST ; do
compute-sanitizer --tool memcheck ${gt} "$@"
done
unset GTEST_CUDF_RMM_MODE
unset LIBCUDF_MEMCHECK_ENABLED

exit ${EXITCODE}
8 changes: 8 additions & 0 deletions ci/test_python_other.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ rapids-logger "pytest dask_cudf"
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cudf-coverage.xml" \
--cov-report=term

# Run tests in dask_cudf/tests and dask_cudf/io/tests with dask-expr
rapids-logger "pytest dask_cudf + dask_expr"
DASK_DATAFRAME__QUERY_PLANNING=True ./ci/run_dask_cudf_pytests.sh \
--junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-expr.xml" \
--numprocesses=8 \
--dist=loadscope \
.

rapids-logger "pytest custreamz"
./ci/run_custreamz_pytests.sh \
--junitxml="${RAPIDS_TESTS_DIR}/junit-custreamz.xml" \
Expand Down
11 changes: 10 additions & 1 deletion ci/test_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
set -eou pipefail

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist

# Download the cudf built in the previous step
# Set the manylinux version used for downloading the wheels so that we test the
Expand Down Expand Up @@ -38,3 +38,12 @@ python -m pytest \
--numprocesses=8 \
.
popd

# Run tests in dask_cudf/tests and dask_cudf/io/tests with dask-expr
rapids-logger "pytest dask_cudf + dask_expr"
pushd python/dask_cudf/dask_cudf
DASK_DATAFRAME__QUERY_PLANNING=True python -m pytest \
--junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-expr.xml" \
--numprocesses=8 \
.
popd
4 changes: 2 additions & 2 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ dependencies:
- cxx-compiler
- cython>=3.0.3
- dask-cuda==24.4.*
- dlpack>=0.5,<0.6.0a0
- dlpack>=0.8,<1.0
- doxygen=1.9.1
- fastavro>=0.22.9
- fmt>=10.1.1,<11
Expand Down Expand Up @@ -58,7 +58,7 @@ dependencies:
- ninja
- notebook
- numba>=0.57
- numpy>=1.23
- numpy>=1.23,<2.0a0
- numpydoc
- nvcc_linux-64=11.8
- nvcomp==3.0.6
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/all_cuda-122_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ dependencies:
- cxx-compiler
- cython>=3.0.3
- dask-cuda==24.4.*
- dlpack>=0.5,<0.6.0a0
- dlpack>=0.8,<1.0
- doxygen=1.9.1
- fastavro>=0.22.9
- fmt>=10.1.1,<11
Expand Down Expand Up @@ -57,7 +57,7 @@ dependencies:
- ninja
- notebook
- numba>=0.57
- numpy>=1.23
- numpy>=1.23,<2.0a0
- numpydoc
- nvcomp==3.0.6
- nvtx>=0.2.1
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ requirements:
- cython >=3.0.3
- scikit-build-core >=0.7.0
- setuptools
- dlpack >=0.5,<0.6.0a0
- dlpack >=0.8,<1.0
- numpy 1.23
- pyarrow ==14.0.2.*
- libcudf ={{ version }}
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/libcudf/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ libarrow_version:
- "==14.0.2"

dlpack_version:
- ">=0.5,<0.6.0a0"
- ">=0.8,<1.0"

librdkafka_version:
- ">=1.9.0,<1.10.0a0"
Expand Down
10 changes: 8 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ endif()
rapids_cpm_init()
# find jitify
include(cmake/thirdparty/get_jitify.cmake)
# find NVTX
include(cmake/thirdparty/get_nvtx.cmake)
# find nvCOMP
include(cmake/thirdparty/get_nvcomp.cmake)
# find CCCL before rmm so that we get cudf's patched version of CCCL
Expand Down Expand Up @@ -337,6 +339,7 @@ add_library(
src/groupby/sort/group_count_scan.cu
src/groupby/sort/group_max_scan.cu
src/groupby/sort/group_min_scan.cu
src/groupby/sort/group_product_scan.cu
src/groupby/sort/group_rank_scan.cu
src/groupby/sort/group_replace_nulls.cu
src/groupby/sort/group_sum_scan.cu
Expand Down Expand Up @@ -382,6 +385,7 @@ add_library(
src/io/json/read_json.cu
src/io/json/legacy/json_gpu.cu
src/io/json/legacy/reader_impl.cu
src/io/json/parser_features.cpp
src/io/json/write_json.cu
src/io/orc/aggregate_orc_metadata.cpp
src/io/orc/dict_enc.cu
Expand Down Expand Up @@ -411,6 +415,7 @@ add_library(
src/io/parquet/reader_impl_helpers.cpp
src/io/parquet/reader_impl_preprocess.cu
src/io/parquet/writer_impl.cu
src/io/parquet/decode_fixed.cu
src/io/statistics/orc_column_statistics.cu
src/io/statistics/parquet_column_statistics.cu
src/io/text/byte_range_info.cpp
Expand Down Expand Up @@ -776,7 +781,7 @@ add_dependencies(cudf jitify_preprocess_run)
target_link_libraries(
cudf
PUBLIC ${ARROW_LIBRARIES} CCCL::CCCL rmm::rmm
PRIVATE cuco::cuco ZLIB::ZLIB nvcomp::nvcomp kvikio::kvikio
PRIVATE $<BUILD_LOCAL_INTERFACE:nvtx3-cpp> cuco::cuco ZLIB::ZLIB nvcomp::nvcomp kvikio::kvikio
$<TARGET_NAME_IF_EXISTS:cuFile_interface>
)

Expand Down Expand Up @@ -1081,7 +1086,8 @@ rapids_export(
add_custom_command(
OUTPUT CUDF_DOXYGEN
WORKING_DIRECTORY ${CUDF_SOURCE_DIR}/doxygen
COMMAND doxygen Doxyfile
COMMAND ${CMAKE_COMMAND} -E env "RAPIDS_VERSION=${RAPIDS_VERSION}"
"RAPIDS_VERSION_MAJOR_MINOR=${RAPIDS_VERSION_MAJOR_MINOR}" doxygen Doxyfile
VERBATIM
COMMENT "Custom command for building cudf doxygen docs."
)
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ target_compile_options(
target_link_libraries(
cudf_datagen
PUBLIC GTest::gmock GTest::gtest benchmark::benchmark nvbench::nvbench Threads::Threads cudf
cudftestutil
cudftestutil nvtx3-cpp
PRIVATE $<TARGET_NAME_IF_EXISTS:conda_env>
)

Expand Down
Loading

0 comments on commit b78e6e9

Please sign in to comment.