Skip to content

Commit

Permalink
Merge branch 'branch-24.08' into fea/use_cudf_hidden_for_cuda_kernels
Browse files Browse the repository at this point in the history
  • Loading branch information
robertmaynard authored Jul 5, 2024
2 parents cd1d047 + a583c97 commit 2458866
Show file tree
Hide file tree
Showing 89 changed files with 2,192 additions and 509 deletions.
16 changes: 13 additions & 3 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ jobs:
- docs-build
- wheel-build-cudf
- wheel-tests-cudf
- test-cudf-polars
- wheel-build-cudf-polars
- wheel-tests-cudf-polars
- wheel-build-dask-cudf
- wheel-tests-dask-cudf
- devcontainer
Expand Down Expand Up @@ -133,17 +134,26 @@ jobs:
with:
build_type: pull-request
script: ci/test_wheel_cudf.sh
test-cudf-polars:
wheel-build-cudf-polars:
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
script: "ci/build_wheel_cudf_polars.sh"
wheel-tests-cudf-polars:
needs: wheel-build-cudf-polars
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
# This always runs, but only fails if this PR touches code in
# pylibcudf or cudf_polars
script: "ci/test_cudf_polars.sh"
script: "ci/test_wheel_cudf_polars.sh"
wheel-build-dask-cudf:
needs: wheel-build-cudf
secrets: inherit
Expand Down
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,6 @@ repos:
.*test.*|
^CHANGELOG.md$
)
- repo: https://github.com/rapidsai/dependency-file-generator
rev: v1.13.11
hooks:
- id: rapids-dependency-file-generator
args: ["--clean"]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.8
hooks:
Expand All @@ -159,6 +154,11 @@ repos:
cpp/src/io/parquet/ipc/Schema_generated[.]h$
)
- id: verify-alpha-spec
- repo: https://github.com/rapidsai/dependency-file-generator
rev: v1.13.11
hooks:
- id: rapids-dependency-file-generator
args: ["--clean"]

default_language_version:
python: python3
9 changes: 8 additions & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ ARGS=$*
# script, and that this script resides in the repo dir!
REPODIR=$(cd $(dirname $0); pwd)

VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n --pydevelop -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n --pydevelop -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats --disable_large_strings"
HELP="$0 [clean] [libcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
clean - remove all existing build artifacts and configuration (start
over)
Expand All @@ -39,6 +39,7 @@ HELP="$0 [clean] [libcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [li
--opensource_nvcomp - disable use of proprietary nvcomp extensions
--show_depr_warn - show cmake deprecation warnings
--ptds - enable per-thread default stream
--disable_large_strings - disable large strings support
--build_metrics - generate build metrics report for libcudf
--incl_cache_stats - include cache statistics in build metrics report
--cmake-args=\\\"<args>\\\" - pass arbitrary list of CMake configuration options (escape all quotes in argument)
Expand Down Expand Up @@ -69,6 +70,7 @@ BUILD_DISABLE_DEPRECATION_WARNINGS=ON
BUILD_PER_THREAD_DEFAULT_STREAM=OFF
BUILD_REPORT_METRICS=OFF
BUILD_REPORT_INCL_CACHE_STATS=OFF
BUILD_DISABLE_LARGE_STRINGS=OFF
USE_PROPRIETARY_NVCOMP=ON
PYTHON_ARGS_FOR_INSTALL="-m pip install --no-build-isolation --no-deps --config-settings rapidsai.disable-cuda=true"

Expand Down Expand Up @@ -153,6 +155,7 @@ function buildLibCudfJniInDocker {
-DCUDF_ENABLE_ARROW_S3=OFF \
-DBUILD_TESTS=OFF \
-DCUDF_USE_PER_THREAD_DEFAULT_STREAM=ON \
-DCUDF_LARGE_STRINGS_DISABLED=ON \
-DRMM_LOGGING_LEVEL=OFF \
-DBUILD_SHARED_LIBS=OFF && \
cmake --build . --parallel ${PARALLEL_LEVEL} && \
Expand Down Expand Up @@ -239,6 +242,9 @@ if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_CUDF_CPP"* ]]; then
EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DFIND_CUDF_CPP=ON"
fi

if hasArg --disable_large_strings; then
BUILD_DISABLE_LARGE_STRINGS="ON"
fi

# If clean given, run it prior to any other steps
if hasArg clean; then
Expand Down Expand Up @@ -292,6 +298,7 @@ if buildAll || hasArg libcudf; then
-DBUILD_BENCHMARKS=${BUILD_BENCHMARKS} \
-DDISABLE_DEPRECATION_WARNINGS=${BUILD_DISABLE_DEPRECATION_WARNINGS} \
-DCUDF_USE_PER_THREAD_DEFAULT_STREAM=${BUILD_PER_THREAD_DEFAULT_STREAM} \
-DCUDF_LARGE_STRINGS_DISABLED=${BUILD_DISABLE_LARGE_STRINGS} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
${EXTRA_CMAKE_ARGS}

Expand Down
11 changes: 11 additions & 0 deletions ci/build_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

set -euo pipefail

package_dir="python/cudf_polars"

./ci/build_wheel.sh ${package_dir}

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 ${package_dir}/dist
11 changes: 11 additions & 0 deletions ci/run_cudf_polars_pytests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.

set -euo pipefail

# It is essential to cd into python/cudf_polars as `pytest-xdist` + `coverage` seem to work only at this directory level.

# Support invoking run_cudf_polars_pytests.sh outside the script directory
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cudf_polars/

python -m pytest --cache-clear "$@" tests
3 changes: 3 additions & 0 deletions ci/test_java.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ EXITCODE=0
trap "EXITCODE=1" ERR
set +e

# disable large strings
export LIBCUDF_LARGE_STRINGS_ENABLED=0

rapids-logger "Run Java tests"
pushd java
mvn test -B -DCUDF_JNI_ENABLE_PROFILING=OFF
Expand Down
23 changes: 8 additions & 15 deletions ci/test_cudf_polars.sh → ci/test_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,14 @@ else
fi

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist

RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"}
RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/
mkdir -p "${RAPIDS_TESTS_DIR}"

rapids-logger "Install cudf wheel"
# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/cudf*.whl)[test]
# Download the cudf built in the previous step
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
python -m pip install ./local-cudf-dep/cudf*.whl

rapids-logger "Install cudf_polars"
python -m pip install 'polars>=1.0'
python -m pip install --no-deps python/cudf_polars
python -m pip install $(echo ./dist/cudf_polars*.whl)[test]

rapids-logger "Run cudf_polars tests"

Expand All @@ -42,13 +37,11 @@ EXITCODE=0
trap set_exitcode ERR
set +e

python -m pytest \
--cache-clear \
./ci/run_cudf_polars_pytests.sh \
--cov cudf_polars \
--cov-fail-under=100 \
--cov-config=python/cudf_polars/pyproject.toml \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cudf_polars.xml" \
python/cudf_polars/tests
--cov-config=./pyproject.toml \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-polars.xml"

trap ERR
set -e
Expand Down
2 changes: 1 addition & 1 deletion ci/test_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="

# Download the cudf built in the previous step
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
python -m pip install --no-deps ./local-cudf-dep/cudf*.whl
python -m pip install ./local-cudf-dep/cudf*.whl

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/dask_cudf*.whl)[test]
Expand Down
7 changes: 7 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ option(JITIFY_USE_CACHE "Use a file cache for JIT compiled kernels" ON)
option(CUDF_BUILD_TESTUTIL "Whether to build the test utilities contained in libcudf" ON)
mark_as_advanced(CUDF_BUILD_TESTUTIL)
option(CUDF_USE_PROPRIETARY_NVCOMP "Download and use NVCOMP with proprietary extensions" ON)
option(CUDF_LARGE_STRINGS_DISABLED "Build with large string support disabled" OFF)
mark_as_advanced(CUDF_LARGE_STRINGS_DISABLED)
option(CUDF_USE_ARROW_STATIC "Build and statically link Arrow libraries" OFF)
option(CUDF_ENABLE_ARROW_ORC "Build the Arrow ORC adapter" OFF)
option(CUDF_ENABLE_ARROW_PYTHON "Find (or build) Arrow with Python support" OFF)
Expand Down Expand Up @@ -783,6 +785,11 @@ if(NOT USE_NVTX)
target_compile_definitions(cudf PUBLIC NVTX_DISABLE)
endif()

# Disable large strings support
if(CUDF_LARGE_STRINGS_DISABLED)
target_compile_definitions(cudf PRIVATE CUDF_LARGE_STRINGS_DISABLED)
endif()

# Define RMM logging level
target_compile_definitions(cudf PRIVATE "RMM_LOGGING_LEVEL=LIBCUDF_LOGGING_LEVEL")

Expand Down
9 changes: 7 additions & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,13 @@ target_include_directories(

# Use an OBJECT library so we only compile these helper source files only once
add_library(
cudf_benchmark_common OBJECT "${CUDF_SOURCE_DIR}/tests/utilities/random_seed.cpp"
synchronization/synchronization.cpp io/cuio_common.cpp
cudf_benchmark_common OBJECT
"${CUDF_SOURCE_DIR}/tests/utilities/random_seed.cpp"
synchronization/synchronization.cpp
io/cuio_common.cpp
common/table_utilities.cpp
common/benchmark_utilities.cpp
common/nvbench_utilities.cpp
)
target_link_libraries(cudf_benchmark_common PRIVATE cudf_datagen $<TARGET_NAME_IF_EXISTS:conda_env>)
add_custom_command(
Expand Down
27 changes: 27 additions & 0 deletions cpp/benchmarks/common/benchmark_utilities.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "benchmark_utilities.hpp"

void set_items_processed(::benchmark::State& state, int64_t items_processed_per_iteration)
{
state.SetItemsProcessed(state.iterations() * items_processed_per_iteration);
}

void set_bytes_processed(::benchmark::State& state, int64_t bytes_processed_per_iteration)
{
state.SetBytesProcessed(state.iterations() * bytes_processed_per_iteration);
}
41 changes: 41 additions & 0 deletions cpp/benchmarks/common/benchmark_utilities.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <benchmark/benchmark.h>

/**
* @brief Sets the number of items processed during the benchmark.
*
* This function could be used instead of ::benchmark::State.SetItemsProcessed()
* to avoid repeatedly computing ::benchmark::State.iterations() * items_processed_per_iteration.
*
* @param state the benchmark state
* @param items_processed_per_iteration number of items processed per iteration
*/
void set_items_processed(::benchmark::State& state, int64_t items_processed_per_iteration);

/**
* @brief Sets the number of bytes processed during the benchmark.
*
* This function could be used instead of ::benchmark::State.SetItemsProcessed()
* to avoid repeatedly computing ::benchmark::State.iterations() * bytes_processed_per_iteration.
*
* @param state the benchmark state
* @param bytes_processed_per_iteration number of bytes processed per iteration
*/
void set_bytes_processed(::benchmark::State& state, int64_t bytes_processed_per_iteration);
60 changes: 60 additions & 0 deletions cpp/benchmarks/common/nvbench_utilities.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "nvbench_utilities.hpp"

#include <nvbench/nvbench.cuh>

// This function is copied over from
// https://github.com/NVIDIA/nvbench/blob/a171514056e5d6a7f52a035dd6c812fa301d4f4f/nvbench/detail/measure_cold.cu#L190-L224.
void set_throughputs(nvbench::state& state)
{
double avg_cuda_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");

if (const auto items = state.get_element_count(); items != 0) {
auto& summ = state.add_summary("nv/cold/bw/item_rate");
summ.set_string("name", "Elem/s");
summ.set_string("hint", "item_rate");
summ.set_string("description", "Number of input elements processed per second");
summ.set_float64("value", static_cast<double>(items) / avg_cuda_time);
}

if (const auto bytes = state.get_global_memory_rw_bytes(); bytes != 0) {
const auto avg_used_gmem_bw = static_cast<double>(bytes) / avg_cuda_time;
{
auto& summ = state.add_summary("nv/cold/bw/global/bytes_per_second");
summ.set_string("name", "GlobalMem BW");
summ.set_string("hint", "byte_rate");
summ.set_string("description",
"Number of bytes read/written per second to the CUDA "
"device's global memory");
summ.set_float64("value", avg_used_gmem_bw);
}

{
const auto peak_gmem_bw =
static_cast<double>(state.get_device()->get_global_memory_bus_bandwidth());

auto& summ = state.add_summary("nv/cold/bw/global/utilization");
summ.set_string("name", "BWUtil");
summ.set_string("hint", "percentage");
summ.set_string("description",
"Global device memory utilization as a percentage of the "
"device's peak bandwidth");
summ.set_float64("value", avg_used_gmem_bw / peak_gmem_bw);
}
}
}
31 changes: 31 additions & 0 deletions cpp/benchmarks/common/nvbench_utilities.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

namespace nvbench {
struct state;
}

/**
* @brief Sets throughput statistics, such as "Elem/s", "GlobalMem BW", and "BWUtil" for the
* nvbench results summary.
*
* This function could be used to work around a known issue that the throughput statistics
* should be added before the nvbench::state.exec() call, otherwise they will not be printed
* in the summary. See https://github.com/NVIDIA/nvbench/issues/175 for more details.
*/
void set_throughputs(nvbench::state& state);
Loading

0 comments on commit 2458866

Please sign in to comment.