From dcc65094240258cce64d953b3edf4301103a5def Mon Sep 17 00:00:00 2001 From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com> Date: Fri, 11 Mar 2022 15:29:52 +0100 Subject: [PATCH] Add benchmarks (#549) Copied the benchmarking setup and one simple benchmark (as an example) from cuml. Authors: - Artem M. Chirkin (https://github.com/achirkin) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - Jordan Jacobelli (https://github.com/Ethyling) URL: https://github.com/rapidsai/raft/pull/549 --- BUILD.md | 56 +++-- build.sh | 48 +++-- ci/checks/style.sh | 1 + ci/gpu/build.sh | 4 +- conda/recipes/libraft_distance/build.sh | 3 +- conda/recipes/libraft_headers/build.sh | 3 +- conda/recipes/libraft_nn/build.sh | 3 +- cpp/CMakeLists.txt | 13 ++ cpp/bench/CMakeLists.txt | 60 ++++++ cpp/bench/common/benchmark.hpp | 262 ++++++++++++++++++++++++ cpp/bench/linalg/reduce.cu | 62 ++++++ cpp/bench/main.cpp | 19 ++ cpp/cmake/thirdparty/get_gbench.cmake | 43 ++++ 13 files changed, 530 insertions(+), 47 deletions(-) create mode 100644 cpp/bench/CMakeLists.txt create mode 100644 cpp/bench/common/benchmark.hpp create mode 100644 cpp/bench/linalg/reduce.cu create mode 100644 cpp/bench/main.cpp create mode 100644 cpp/cmake/thirdparty/get_gbench.cmake diff --git a/BUILD.md b/BUILD.md index 1bf3783fae..457ee85aad 100644 --- a/BUILD.md +++ b/BUILD.md @@ -26,39 +26,52 @@ The recommended way to build and install RAFT is to use the `build.sh` script in ### Header-only C++ -RAFT depends on many different core libraries such as `thrust`, `cub`, `cucollections`, and `rmm`, which will be downloaded automatically by `cmake` even when only installing the headers. It's important to note that while all the headers will be installed and available, some parts of the RAFT API depend on libraries like `FAISS`, which can also be downloaded in the RAFT build but will need to be told to do so. +RAFT depends on many different core libraries such as `thrust`, `cub`, `cucollections`, and `rmm`, which will be downloaded automatically by `cmake` even when only installing the headers. It's important to note that while all the headers will be installed and available, some parts of the RAFT API depend on libraries like `FAISS`, which can also be downloaded in the RAFT build but will need to be told to do so. The following example builds and installs raft in header-only mode: ```bash -./build.sh libraft --nogtest +./build.sh libraft ``` ###C++ Shared Libraries (optional) -Shared libraries are provided to speed up compile times for larger libraries which may heavily utilize some of the APIs. These shared libraries can also significantly improve re-compile times while developing against the APIs. +Shared libraries are provided to speed up compile times for larger libraries which may heavily utilize some of the APIs. These shared libraries can also significantly improve re-compile times while developing against the APIs. Build all the shared libraries by passing `--compile-libs` flag to `build.sh`: ```bash -./build.sh libraft --compile-libs --nogtest +./build.sh libraft --compile-libs ``` - + To remain flexible, the individual shared libraries have their own flags and multiple can be used (though currently only the `nn` and `distance` packages contain shared libraries): ```bash -./build.sh libraft --compile-nn --compile-dist --nogtest +./build.sh libraft --compile-nn --compile-dist ``` ###Googletests -Compile the Googletests by removing the `--nogtest` flag from `build.sh`: +Compile the Googletests using the `tests` target in `build.sh`: ```bash -./build.sh libraft --compile-nn --compile-dist +./build.sh libraft tests --compile-nn --compile-dist ``` To run C++ tests: ```bash -./test_raft +./cpp/build/test_raft +``` + +###Benchmarks + +Compile the benchmarks using the `bench` target in `build.sh`: +```bash +./build.sh libraft bench --compile-nn --compile-dist +``` + +To run C++ tests: + +```bash +./cpp/build/bench_raft ``` ### C++ Using Cmake @@ -77,15 +90,16 @@ RAFT's cmake has the following configurable flags available:. | Flag | Possible Values | Default Value | Behavior | | --- | --- | --- | --- | -| BUILD_TESTS | ON, OFF | ON | Compile Googletests | +| BUILD_TESTS | ON, OFF | ON | Compile Googletests | +| BUILD_BENCH | ON, OFF | ON | Compile benchmarks | | RAFT_COMPILE_LIBRARIES | ON, OFF | OFF | Compiles all `libraft` shared libraries (these are required for Googletests) | -| RAFT_COMPILE_NN_LIBRARY | ON, OFF | ON | Compiles the `libraft-nn` shared library | -| RAFT_COMPILE_DIST_LIBRARY | ON, OFF | ON | Compiles the `libraft-distance` shared library | +| RAFT_COMPILE_NN_LIBRARY | ON, OFF | ON | Compiles the `libraft-nn` shared library | +| RAFT_COMPILE_DIST_LIBRARY | ON, OFF | ON | Compiles the `libraft-distance` shared library | | RAFT_ENABLE_NN_DEPENDENCIES | ON, OFF | OFF | Searches for dependencies of nearest neighbors API, such as FAISS, and compiles them if not found. | -| RAFT_USE_FAISS_STATIC | ON, OFF | OFF | Statically link FAISS into `libraft-nn` | +| RAFT_USE_FAISS_STATIC | ON, OFF | OFF | Statically link FAISS into `libraft-nn` | | DETECT_CONDA_ENV | ON, OFF | ON | Enable detection of conda environment for dependencies | | NVTX | ON, OFF | OFF | Enable NVTX Markers | -| CUDA_ENABLE_KERNELINFO | ON, OFF | OFF | Enables `kernelinfo` in nvcc. This is useful for `compute-sanitizer` | +| CUDA_ENABLE_KERNELINFO | ON, OFF | OFF | Enables `kernelinfo` in nvcc. This is useful for `compute-sanitizer` | | CUDA_ENABLE_LINEINFO | ON, OFF | OFF | Enable the -lineinfo option for nvcc | | CUDA_STATIC_RUNTIME | ON, OFF | OFF | Statically link the CUDA runtime | @@ -115,8 +129,8 @@ python setup.py install ``` To run the Python tests: -```bash -cd python +```bash +cd python python -m pytest raft ``` @@ -142,14 +156,14 @@ The following example shows how to use the `libraft-distance` API with the pre-c RAFT uses the [RAPIDS cmake](https://github.com/rapidsai/rapids-cmake) library, so it can be easily included into downstream projects. RAPIDS cmake provides a convenience layer around the [Cmake Package Manager (CPM)](https://github.com/cpm-cmake/CPM.cmake). The following example is similar to building RAFT itself from source but allows it to be done in cmake, providing the `raft::raft` link target and `RAFT_INCLUDE_DIR` for includes. The `COMPILE_LIBRARIES` option enables the building of the shared libraries. -The following `cmake` snippet enables a flexible configuration of RAFT: +The following `cmake` snippet enables a flexible configuration of RAFT: ```cmake set(RAFT_VERSION "22.04") function(find_and_configure_raft) - set(oneValueArgs VERSION FORK PINNED_TAG USE_FAISS_STATIC + set(oneValueArgs VERSION FORK PINNED_TAG USE_FAISS_STATIC COMPILE_LIBRARIES ENABLE_NN_DEPENDENCIES CLONE_ON_PIN USE_NN_LIBRARY USE_DISTANCE_LIBRARY) cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" @@ -165,14 +179,14 @@ function(find_and_configure_raft) endif() #----------------------------------------------------- - # Add components + # Add components #----------------------------------------------------- string(APPEND RAFT_COMPONENTS "") if(PKG_USE_NN_LIBRARY) string(APPEND RAFT_COMPONENTS " nn") endif() - + if(PKG_USE_DISTANCE_LIBRARY) string(APPEND RAFT_COMPONENTS " distance") endif() @@ -221,4 +235,4 @@ find_and_configure_raft(VERSION ${RAFT_VERSION}.00 ### Python/Cython Integration -Once installed, RAFT's Python library can be imported and used directly. \ No newline at end of file +Once installed, RAFT's Python library can be imported and used directly. diff --git a/build.sh b/build.sh index 9d3a796c65..eb5fa0a250 100755 --- a/build.sh +++ b/build.sh @@ -18,25 +18,26 @@ ARGS=$* # script, and that this script resides in the repo dir! REPODIR=$(cd $(dirname $0); pwd) -VALIDARGS="clean libraft pyraft docs -v -g --noinstall --compile-libs --compile-nn --compile-dist --allgpuarch --nvtx --show_depr_warn -h --nogtest --buildfaiss" +VALIDARGS="clean libraft pyraft docs tests bench -v -g --noinstall --compile-libs --compile-nn --compile-dist --allgpuarch --nvtx --show_depr_warn -h --buildfaiss" HELP="$0 [ ...] [ ...] where is: clean - remove all existing build artifacts and configuration (start over) libraft - build the raft C++ code only. Also builds the C-wrapper library around the C++ code. - pyraft - build the cuml Python package + pyraft - build the cuml Python package docs - build the documentation + tests - build the tests + bench - build the benchmarks and is: -v - verbose build mode -g - build for debug - --compile-libs - compile shared libraries for all components + --compile-libs - compile shared libraries for all components --compile-nn - compile shared library for nn component --compile-dist - compile shared library for distance component --allgpuarch - build for all supported GPU architectures --buildfaiss - build faiss statically into raft - --nogtest - do not build google tests for libraft - --noinstall - do not install cmake targets + --noinstall - do not install cmake targets --nvtx - Enable nvtx for profiling support --show_depr_warn - show cmake deprecation warnings -h - print this text @@ -53,12 +54,13 @@ BUILD_DIRS="${LIBRAFT_BUILD_DIR} ${PY_RAFT_BUILD_DIR} ${PYTHON_DEPS_CLONE}" CMAKE_LOG_LEVEL="" VERBOSE_FLAG="" BUILD_ALL_GPU_ARCH=0 -BUILD_TESTS=YES +BUILD_TESTS=OFF +BUILD_BENCH=OFF BUILD_STATIC_FAISS=OFF COMPILE_LIBRARIES=OFF COMPILE_NN_LIBRARY=OFF COMPILE_DIST_LIBRARY=OFF -ENABLE_NN_DEPENDENCIES=${BUILD_TESTS} +ENABLE_NN_DEPENDENCIES=OFF NVTX=OFF CLEAN=0 DISABLE_DEPRECATION_WARNINGS=ON @@ -110,11 +112,6 @@ fi if hasArg --allgpuarch; then BUILD_ALL_GPU_ARCH=1 fi -if hasArg --nogtest; then - BUILD_TESTS=OFF - COMPILE_LIBRARIES=OFF - ENABLE_NN_DEPENDENCIES=OFF -fi if hasArg --compile-libs || (( ${NUMARGS} == 0 )); then COMPILE_LIBRARIES=ON @@ -123,11 +120,24 @@ fi if hasArg --compile-nn || hasArg --compile-libs || (( ${NUMARGS} == 0 )); then ENABLE_NN_DEPENDENCIES=ON COMPILE_NN_LIBRARY=ON - CMAKE_TARGET="raft_nn_lib;${CMAKE_TARGET}" + CMAKE_TARGET="${CMAKE_TARGET};raft_nn_lib" fi + if hasArg --compile-dist || hasArg --compile-libs || (( ${NUMARGS} == 0 )); then COMPILE_DIST_LIBRARY=ON - CMAKE_TARGET="raft_distance_lib;${CMAKE_TARGET}" + CMAKE_TARGET="${CMAKE_TARGET};raft_distance_lib" +fi + +if hasArg tests || (( ${NUMARGS} == 0 )); then + BUILD_TESTS=ON + ENABLE_NN_DEPENDENCIES=ON + CMAKE_TARGET="${CMAKE_TARGET};test_raft" +fi + +if hasArg bench || (( ${NUMARGS} == 0 )); then + BUILD_BENCH=ON + ENABLE_NN_DEPENDENCIES=ON + CMAKE_TARGET="${CMAKE_TARGET};bench_raft" fi if hasArg --buildfaiss; then @@ -165,7 +175,7 @@ fi ################################################################################ # Configure for building all C++ targets -if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs; then +if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || hasArg bench; then if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then RAFT_CMAKE_CUDA_ARCHITECTURES="NATIVE" echo "Building for the architecture of the GPU in the system..." @@ -184,17 +194,13 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs; then -DNVTX=${NVTX} \ -DDISABLE_DEPRECATION_WARNINGS=${DISABLE_DEPRECATION_WARNINGS} \ -DBUILD_TESTS=${BUILD_TESTS} \ + -DBUILD_BENCH=${BUILD_BENCH} \ -DCMAKE_MESSAGE_LOG_LEVEL=${CMAKE_LOG_LEVEL} \ -DRAFT_COMPILE_NN_LIBRARY=${COMPILE_NN_LIBRARY} \ -DRAFT_COMPILE_DIST_LIBRARY=${COMPILE_DIST_LIBRARY} \ -DRAFT_USE_FAISS_STATIC=${BUILD_STATIC_FAISS} - if (( ${NUMARGS} == 0 )) || hasArg libraft; then - # Run all c++ targets at once - if ! hasArg --nogtest; then - CMAKE_TARGET="${CMAKE_TARGET};test_raft;" - fi - + if [[ ${CMAKE_TARGET} != "" ]] || [[ ${INSTALL_TARGET} != "" ]]; then echo "-- Compiling targets: ${CMAKE_TARGET}, verbose=${VERBOSE_FLAG}" cmake --build "${LIBRAFT_BUILD_DIR}" ${VERBOSE_FLAG} -j${PARALLEL_LEVEL} --target ${CMAKE_TARGET} ${INSTALL_TARGET} fi diff --git a/ci/checks/style.sh b/ci/checks/style.sh index 2ce8b446b8..fb5a64fdac 100644 --- a/ci/checks/style.sh +++ b/ci/checks/style.sh @@ -43,6 +43,7 @@ fi # Check for a consistent #include syntax HASH_INCLUDE=`python cpp/scripts/include_checker.py \ + cpp/bench \ cpp/include \ cpp/test \ 2>&1` diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index afc6056b42..1affaef0b1 100644 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -96,9 +96,9 @@ export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH gpuci_logger "Build C++ and Python targets" # These should link against the existing shared libs if hasArg --skip-tests; then - "$WORKSPACE/build.sh" pyraft libraft -v --nogtest -else "$WORKSPACE/build.sh" pyraft libraft -v +else + "$WORKSPACE/build.sh" pyraft libraft tests bench -v fi gpuci_logger "sccache stats" diff --git a/conda/recipes/libraft_distance/build.sh b/conda/recipes/libraft_distance/build.sh index 7523263f01..062a5219db 100644 --- a/conda/recipes/libraft_distance/build.sh +++ b/conda/recipes/libraft_distance/build.sh @@ -1,3 +1,4 @@ #!/usr/bin/env bash +# Copyright (c) 2022, NVIDIA CORPORATION. -./build.sh libraft -v --allgpuarch --compile-dist --nogtest +./build.sh libraft -v --allgpuarch --compile-dist diff --git a/conda/recipes/libraft_headers/build.sh b/conda/recipes/libraft_headers/build.sh index ca6d9b4960..876f46cdfe 100644 --- a/conda/recipes/libraft_headers/build.sh +++ b/conda/recipes/libraft_headers/build.sh @@ -1,3 +1,4 @@ #!/usr/bin/env bash +# Copyright (c) 2022, NVIDIA CORPORATION. -./build.sh libraft -v --allgpuarch --nogtest +./build.sh libraft -v --allgpuarch diff --git a/conda/recipes/libraft_nn/build.sh b/conda/recipes/libraft_nn/build.sh index 5c60cd2fa1..4f6ffbca25 100644 --- a/conda/recipes/libraft_nn/build.sh +++ b/conda/recipes/libraft_nn/build.sh @@ -1,3 +1,4 @@ #!/usr/bin/env bash +# Copyright (c) 2022, NVIDIA CORPORATION. -./build.sh libraft -v --allgpuarch --compile-nn --nogtest +./build.sh libraft -v --allgpuarch --compile-nn diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c13ee03a33..c68be5e619 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -41,6 +41,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # - User Options ------------------------------------------------------------ option(BUILD_TESTS "Build raft unit-tests" ON) +option(BUILD_BENCH "Build raft C++ benchmark tests" ON) option(CUDA_ENABLE_KERNELINFO "Enable kernel resource usage info" OFF) option(CUDA_ENABLE_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler)" OFF) option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF) @@ -58,6 +59,7 @@ include(CMakeDependentOption) cmake_dependent_option(RAFT_USE_FAISS_STATIC "Build and statically link the FAISS library for nearest neighbors search on GPU" ON RAFT_COMPILE_LIBRARIES OFF) message(VERBOSE "RAFT: Build RAFT unit-tests: ${BUILD_TESTS}") +message(VERBOSE "RAFT: Building raft C++ benchmarks: ${BUILD_BENCH}") message(VERBOSE "RAFT: Enable detection of conda environment for dependencies: ${DETECT_CONDA_ENV}") message(VERBOSE "RAFT: Disable depreaction warnings " ${DISABLE_DEPRECATION_WARNINGS}) message(VERBOSE "RAFT: Disable OpenMP: ${DISABLE_OPENMP}") @@ -122,6 +124,10 @@ if(BUILD_TESTS) include(cmake/thirdparty/get_ucx.cmake) endif() +if(BUILD_BENCH) + include(cmake/thirdparty/get_gbench.cmake) +endif() + ############################################################################## # - raft --------------------------------------------------------------------- @@ -411,6 +417,13 @@ if(BUILD_TESTS) include(test/CMakeLists.txt) endif() +############################################################################## +# - build benchmark executable ----------------------------------------------- + +if(BUILD_BENCH) + include(bench/CMakeLists.txt) +endif() + ############################################################################## # - doxygen targets ---------------------------------------------------------- diff --git a/cpp/bench/CMakeLists.txt b/cpp/bench/CMakeLists.txt new file mode 100644 index 0000000000..9f0a6096d9 --- /dev/null +++ b/cpp/bench/CMakeLists.txt @@ -0,0 +1,60 @@ +#============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +set(RAFT_CPP_BENCH_TARGET "bench_raft") + +# (please keep the filenames in alphabetical order) +add_executable(${RAFT_CPP_BENCH_TARGET} + bench/linalg/reduce.cu + bench/main.cpp +) + +set_target_properties(${RAFT_CPP_BENCH_TARGET} + PROPERTIES BUILD_RPATH "\$ORIGIN" + # set target compile options + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + INTERFACE_POSITION_INDEPENDENT_CODE ON + INSTALL_RPATH "\$ORIGIN/../../../lib" +) + +target_compile_options(${RAFT_CPP_BENCH_TARGET} + PRIVATE "$<$:${RAFT_CXX_FLAGS}>" + "$<$:${RAFT_CUDA_FLAGS}>" +) + +target_include_directories(${RAFT_CPP_BENCH_TARGET} + PUBLIC "$" +) + +target_link_libraries(${RAFT_CPP_BENCH_TARGET} + PRIVATE + raft::raft + faiss::faiss + benchmark::benchmark + $ + $ +) + +install( + TARGETS ${RAFT_CPP_BENCH_TARGET} + COMPONENT testing + DESTINATION bin/libraft/gbench + EXCLUDE_FROM_ALL +) diff --git a/cpp/bench/common/benchmark.hpp b/cpp/bench/common/benchmark.hpp new file mode 100644 index 0000000000..93814ead44 --- /dev/null +++ b/cpp/bench/common/benchmark.hpp @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include + +namespace raft::bench { + +/** + * RAII way to temporary set the pooling memory allocator in rmm. + * This may be useful for benchmarking functions that do some memory allocations. + */ +struct using_pool_memory_res { + private: + rmm::mr::device_memory_resource* orig_res_; + rmm::mr::cuda_memory_resource cuda_res_; + rmm::mr::pool_memory_resource pool_res_; + + public: + using_pool_memory_res(size_t initial_size, size_t max_size) + : orig_res_(rmm::mr::get_current_device_resource()), + pool_res_(&cuda_res_, initial_size, max_size) + { + rmm::mr::set_current_device_resource(&pool_res_); + } + + using_pool_memory_res() : using_pool_memory_res(size_t(1) << size_t(30), size_t(16) << size_t(30)) + { + } + + ~using_pool_memory_res() { rmm::mr::set_current_device_resource(orig_res_); } +}; + +/** + * RAII way of timing cuda calls. This has been shamelessly copied from the + * cudf codebase via cuml codebase. So, credits for this class goes to cudf developers. + */ +struct cuda_event_timer { + private: + ::benchmark::State* state_; + rmm::cuda_stream_view stream_; + cudaEvent_t start_; + cudaEvent_t stop_; + + public: + /** + * @param state the benchmark::State whose timer we are going to update. + * @param stream CUDA stream we are measuring time on. + */ + cuda_event_timer(::benchmark::State& state, rmm::cuda_stream_view stream) + : state_(&state), stream_(stream) + { + RAFT_CUDA_TRY(cudaEventCreate(&start_)); + RAFT_CUDA_TRY(cudaEventCreate(&stop_)); + raft::interruptible::synchronize(stream_); + RAFT_CUDA_TRY(cudaEventRecord(start_, stream_)); + } + cuda_event_timer() = delete; + + /** + * @brief The dtor stops the timer and performs a synchroniazation. Time of + * the benchmark::State object provided to the ctor will be set to the + * value given by `cudaEventElapsedTime()`. + */ + ~cuda_event_timer() + { + RAFT_CUDA_TRY_NO_THROW(cudaEventRecord(stop_, stream_)); + raft::interruptible::synchronize(stop_); + float milliseconds = 0.0f; + RAFT_CUDA_TRY_NO_THROW(cudaEventElapsedTime(&milliseconds, start_, stop_)); + state_->SetIterationTime(milliseconds / 1000.f); + RAFT_CUDA_TRY_NO_THROW(cudaEventDestroy(start_)); + RAFT_CUDA_TRY_NO_THROW(cudaEventDestroy(stop_)); + } +}; + +/** Main fixture to be inherited and used by all other c++ benchmarks */ +class fixture { + private: + rmm::cuda_stream stream_owner_{}; + rmm::device_buffer scratch_buf_; + + public: + rmm::cuda_stream_view stream; + + fixture() : stream{stream_owner_.view()} + { + int l2_cache_size = 0; + int device_id = 0; + RAFT_CUDA_TRY(cudaGetDevice(&device_id)); + RAFT_CUDA_TRY(cudaDeviceGetAttribute(&l2_cache_size, cudaDevAttrL2CacheSize, device_id)); + scratch_buf_ = rmm::device_buffer(l2_cache_size, stream); + } + + // every benchmark should be overriding this + virtual void run_benchmark(::benchmark::State& state) = 0; + virtual void generate_metrics(::benchmark::State& state) {} + + /** + * The helper to be used inside `run_benchmark`, to loop over the state and record time using the + * cuda_event_timer. + */ + template + void loop_on_state(::benchmark::State& state, Lambda benchmark_func, bool flush_L2 = true) + { + for (auto _ : state) { + if (flush_L2) { + RAFT_CUDA_TRY(cudaMemsetAsync(scratch_buf_.data(), 0, scratch_buf_.size(), stream)); + } + cuda_event_timer timer(state, stream); + benchmark_func(); + } + } +}; + +namespace internal { + +template +class Fixture : public ::benchmark::Fixture { + using State = ::benchmark::State; + + public: + explicit Fixture(const std::string name, const Params&... params) + : ::benchmark::Fixture(), params_(params...) + { + SetName(name.c_str()); + } + Fixture() = delete; + + void SetUp(const State& state) override + { + fixture_ = + std::apply([](const Params&... ps) { return std::make_unique(ps...); }, params_); + } + void TearDown(const State& state) override { fixture_.reset(); } + void SetUp(State& st) override { SetUp(const_cast(st)); } + void TearDown(State& st) override { TearDown(const_cast(st)); } + + private: + std::unique_ptr fixture_; + std::tuple params_; + + protected: + void BenchmarkCase(State& state) override + { + fixture_->run_benchmark(state); + fixture_->generate_metrics(state); + } +}; // class Fixture + +/** + * A helper struct to create a fixture for every combination of input vectors. + * Use with care, this can blow up quickly! + */ +template +struct cartesian_registrar { + template + static void run(const std::string case_name, + const std::vector&... params, + const Fixed&... fixed); +}; + +template +struct cartesian_registrar { + template + static void run(const std::string case_name, const Fixed&... fixed) + { + auto* b = ::benchmark::internal::RegisterBenchmarkInternal( + new Fixture(case_name, fixed...)); + b->UseManualTime(); + b->Unit(benchmark::kMillisecond); + } +}; + +template +struct cartesian_registrar { + template + static void run(const std::string case_name, + const std::vector& param, + const std::vector&... params, + const Fixed&... fixed) + { + int param_len = param.size(); + for (int i = 0; i < param_len; i++) { + cartesian_registrar::run( + case_name + "/" + std::to_string(i), params..., fixed..., param[i]); + } + } +}; + +template +struct registrar { + /** + * Register a fixture `Class` named `testClass` for every combination of input `params`. + * + * @param test_class + * A string representation of the `Class` name. + * @param test_name + * Optional test name. Leave empty, if you don't need it. + * @param params + * Zero or more vectors of parameters. + * The generated test cases are a cartesian product of these vectors. + * Use with care, this can blow up quickly! + */ + template + registrar(const std::string& test_class, + const std::string& test_name, + const std::vector&... params) + { + std::stringstream name_stream; + name_stream << test_class; + if (!test_name.empty()) { name_stream << "/" << test_name; } + cartesian_registrar::run(name_stream.str(), params...); + } +}; + +}; // namespace internal + +/** + * This is the entry point macro for all benchmarks. This needs to be called + * for the set of benchmarks to be registered so that the main harness inside + * google bench can find these benchmarks and run them. + * + * @param TestClass child class of `raft::bench::Fixture` which contains + * the logic to generate the dataset and run training on it + * for a given algo. Ideally, once such struct is needed for + * every algo to be benchmarked + * @param test_name a unique string to identify these tests at the end of run + * This is optional and if choose not to use this, pass an + * empty string + * @param params... zero or more lists of params upon which to benchmark. + */ +#define RAFT_BENCH_REGISTER(TestClass, ...) \ + static raft::bench::internal::registrar BENCHMARK_PRIVATE_NAME(registrar)( \ + #TestClass, __VA_ARGS__) + +} // namespace raft::bench diff --git a/cpp/bench/linalg/reduce.cu b/cpp/bench/linalg/reduce.cu new file mode 100644 index 0000000000..018086a689 --- /dev/null +++ b/cpp/bench/linalg/reduce.cu @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +namespace raft::bench::linalg { + +struct input_size { + int rows, cols; + bool along_rows; +}; + +template +struct reduce : public fixture { + reduce(bool along_rows, const input_size& p) + : input_size(p), along_rows(along_rows), in(p.rows * p.cols, stream), out(p.rows, stream) + { + } + + void run_benchmark(::benchmark::State& state) override + { + loop_on_state(state, [this]() { + raft::linalg::reduce( + out.data(), in.data(), input_size.cols, input_size.rows, T(0.f), true, along_rows, stream); + }); + } + + private: + bool along_rows; + input_size input_size; + rmm::device_uvector in, out; +}; // struct reduce + +const std::vector kInputSizes{{8 * 1024, 1024}, + {1024, 8 * 1024}, + {8 * 1024, 8 * 1024}, + {32 * 1024, 1024}, + {1024, 32 * 1024}, + {32 * 1024, 32 * 1024}}; + +const std::vector kAlongRows{false, true}; + +RAFT_BENCH_REGISTER(reduce, "", kAlongRows, kInputSizes); +RAFT_BENCH_REGISTER(reduce, "", kAlongRows, kInputSizes); + +} // namespace raft::bench::linalg diff --git a/cpp/bench/main.cpp b/cpp/bench/main.cpp new file mode 100644 index 0000000000..3162422e8e --- /dev/null +++ b/cpp/bench/main.cpp @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include // NOLINT + +BENCHMARK_MAIN(); diff --git a/cpp/cmake/thirdparty/get_gbench.cmake b/cpp/cmake/thirdparty/get_gbench.cmake new file mode 100644 index 0000000000..a3d5678f74 --- /dev/null +++ b/cpp/cmake/thirdparty/get_gbench.cmake @@ -0,0 +1,43 @@ +#============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_gbench) + + set(oneValueArgs VERSION PINNED_TAG) + cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN} ) + + rapids_cpm_find(benchmark ${PKG_VERSION} + GLOBAL_TARGETS benchmark::benchmark + CPM_ARGS + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG ${PKG_PINNED_TAG} + OPTIONS + "BENCHMARK_ENABLE_GTEST_TESTS OFF" + "BENCHMARK_ENABLE_TESTING OFF" + "BENCHMARK_ENABLE_INSTALL OFF" + "CMAKE_BUILD_TYPE Release" + "CMAKE_INSTALL_LIBDIR lib" + ) + + if(NOT TARGET benchmark::benchmark) + add_library(benchmark::benchmark ALIAS benchmark) + endif() + +endfunction() + +find_and_configure_gbench(VERSION 1.5.3 + PINNED_TAG c05843a9f622db08ad59804c190f98879b76beba)