-
Notifications
You must be signed in to change notification settings - Fork 917
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'branch-24.08' into fea/use_cudf_hidden_for_cuda_kernels
- Loading branch information
Showing
89 changed files
with
2,192 additions
and
509 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,7 +25,8 @@ jobs: | |
- docs-build | ||
- wheel-build-cudf | ||
- wheel-tests-cudf | ||
- test-cudf-polars | ||
- wheel-build-cudf-polars | ||
- wheel-tests-cudf-polars | ||
- wheel-build-dask-cudf | ||
- wheel-tests-dask-cudf | ||
- devcontainer | ||
|
@@ -133,17 +134,26 @@ jobs: | |
with: | ||
build_type: pull-request | ||
script: ci/test_wheel_cudf.sh | ||
test-cudf-polars: | ||
wheel-build-cudf-polars: | ||
needs: wheel-build-cudf | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/[email protected] | ||
with: | ||
# This selects "ARCH=amd64 + the latest supported Python + CUDA". | ||
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) | ||
build_type: pull-request | ||
script: "ci/build_wheel_cudf_polars.sh" | ||
wheel-tests-cudf-polars: | ||
needs: wheel-build-cudf-polars | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/[email protected] | ||
with: | ||
# This selects "ARCH=amd64 + the latest supported Python + CUDA". | ||
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) | ||
build_type: pull-request | ||
# This always runs, but only fails if this PR touches code in | ||
# pylibcudf or cudf_polars | ||
script: "ci/test_cudf_polars.sh" | ||
script: "ci/test_wheel_cudf_polars.sh" | ||
wheel-build-dask-cudf: | ||
needs: wheel-build-cudf | ||
secrets: inherit | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/bash | ||
# Copyright (c) 2023-2024, NVIDIA CORPORATION. | ||
|
||
set -euo pipefail | ||
|
||
package_dir="python/cudf_polars" | ||
|
||
./ci/build_wheel.sh ${package_dir} | ||
|
||
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" | ||
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 ${package_dir}/dist |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/bash | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
set -euo pipefail | ||
|
||
# It is essential to cd into python/cudf_polars as `pytest-xdist` + `coverage` seem to work only at this directory level. | ||
|
||
# Support invoking run_cudf_polars_pytests.sh outside the script directory | ||
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cudf_polars/ | ||
|
||
python -m pytest --cache-clear "$@" tests |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
/* | ||
* Copyright (c) 2024, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include "benchmark_utilities.hpp" | ||
|
||
void set_items_processed(::benchmark::State& state, int64_t items_processed_per_iteration) | ||
{ | ||
state.SetItemsProcessed(state.iterations() * items_processed_per_iteration); | ||
} | ||
|
||
void set_bytes_processed(::benchmark::State& state, int64_t bytes_processed_per_iteration) | ||
{ | ||
state.SetBytesProcessed(state.iterations() * bytes_processed_per_iteration); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
/* | ||
* Copyright (c) 2024, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <benchmark/benchmark.h> | ||
|
||
/** | ||
* @brief Sets the number of items processed during the benchmark. | ||
* | ||
* This function could be used instead of ::benchmark::State.SetItemsProcessed() | ||
* to avoid repeatedly computing ::benchmark::State.iterations() * items_processed_per_iteration. | ||
* | ||
* @param state the benchmark state | ||
* @param items_processed_per_iteration number of items processed per iteration | ||
*/ | ||
void set_items_processed(::benchmark::State& state, int64_t items_processed_per_iteration); | ||
|
||
/** | ||
* @brief Sets the number of bytes processed during the benchmark. | ||
* | ||
* This function could be used instead of ::benchmark::State.SetItemsProcessed() | ||
* to avoid repeatedly computing ::benchmark::State.iterations() * bytes_processed_per_iteration. | ||
* | ||
* @param state the benchmark state | ||
* @param bytes_processed_per_iteration number of bytes processed per iteration | ||
*/ | ||
void set_bytes_processed(::benchmark::State& state, int64_t bytes_processed_per_iteration); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
/* | ||
* Copyright (c) 2024, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include "nvbench_utilities.hpp" | ||
|
||
#include <nvbench/nvbench.cuh> | ||
|
||
// This function is copied over from | ||
// https://github.com/NVIDIA/nvbench/blob/a171514056e5d6a7f52a035dd6c812fa301d4f4f/nvbench/detail/measure_cold.cu#L190-L224. | ||
void set_throughputs(nvbench::state& state) | ||
{ | ||
double avg_cuda_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); | ||
|
||
if (const auto items = state.get_element_count(); items != 0) { | ||
auto& summ = state.add_summary("nv/cold/bw/item_rate"); | ||
summ.set_string("name", "Elem/s"); | ||
summ.set_string("hint", "item_rate"); | ||
summ.set_string("description", "Number of input elements processed per second"); | ||
summ.set_float64("value", static_cast<double>(items) / avg_cuda_time); | ||
} | ||
|
||
if (const auto bytes = state.get_global_memory_rw_bytes(); bytes != 0) { | ||
const auto avg_used_gmem_bw = static_cast<double>(bytes) / avg_cuda_time; | ||
{ | ||
auto& summ = state.add_summary("nv/cold/bw/global/bytes_per_second"); | ||
summ.set_string("name", "GlobalMem BW"); | ||
summ.set_string("hint", "byte_rate"); | ||
summ.set_string("description", | ||
"Number of bytes read/written per second to the CUDA " | ||
"device's global memory"); | ||
summ.set_float64("value", avg_used_gmem_bw); | ||
} | ||
|
||
{ | ||
const auto peak_gmem_bw = | ||
static_cast<double>(state.get_device()->get_global_memory_bus_bandwidth()); | ||
|
||
auto& summ = state.add_summary("nv/cold/bw/global/utilization"); | ||
summ.set_string("name", "BWUtil"); | ||
summ.set_string("hint", "percentage"); | ||
summ.set_string("description", | ||
"Global device memory utilization as a percentage of the " | ||
"device's peak bandwidth"); | ||
summ.set_float64("value", avg_used_gmem_bw / peak_gmem_bw); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
/* | ||
* Copyright (c) 2024, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
namespace nvbench { | ||
struct state; | ||
} | ||
|
||
/** | ||
* @brief Sets throughput statistics, such as "Elem/s", "GlobalMem BW", and "BWUtil" for the | ||
* nvbench results summary. | ||
* | ||
* This function could be used to work around a known issue that the throughput statistics | ||
* should be added before the nvbench::state.exec() call, otherwise they will not be printed | ||
* in the summary. See https://github.com/NVIDIA/nvbench/issues/175 for more details. | ||
*/ | ||
void set_throughputs(nvbench::state& state); |
Oops, something went wrong.