From 09c9955cce226e8c973773359c778a0ce48f2f61 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Mon, 8 May 2023 12:08:30 -0700 Subject: [PATCH] Re-use memory pool between benchmark runs Don't recreate a new memory pool for each benchmark, and instead re-use the pool. This significantly speeds up running the benchmarks that use the cuda memory pool. As an example running `time ./cpp/build/MATRIX_BENCH --benchmark_filter=SelectK/float/uint32_t.*/0/` which runs benchmarks for 9 different selection algorithms - the times are reduced from `0m36.317s` on branch-23.06 to `0m10.038s` with this change. --- cpp/bench/prims/common/benchmark.hpp | 13 ++++++++++++- cpp/bench/prims/matrix/select_k.cu | 4 ++-- cpp/bench/prims/neighbors/knn.cuh | 5 ++--- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/cpp/bench/prims/common/benchmark.hpp b/cpp/bench/prims/common/benchmark.hpp index 4b6e1ba286..1e783eb338 100644 --- a/cpp/bench/prims/common/benchmark.hpp +++ b/cpp/bench/prims/common/benchmark.hpp @@ -113,8 +113,19 @@ class fixture { raft::device_resources handle; rmm::cuda_stream_view stream; - fixture() : stream{handle.get_stream()} + fixture(bool use_pool_memory_resource = false) : stream{handle.get_stream()} { + // Cache memory pool between test runs, since it is expensive to create. + // This speeds up the time required to run the select_k bench by over 3x. + // This is part of the fixture class here so that the pool will get cleaned + // up, rather than outliving the benchmarks that require it. + static std::unique_ptr memory_pool; + if (use_pool_memory_resource) { + if (!memory_pool) { memory_pool.reset(new using_pool_memory_res()); } + } else if (memory_pool) { + memory_pool.reset(); + } + int l2_cache_size = 0; int device_id = 0; RAFT_CUDA_TRY(cudaGetDevice(&device_id)); diff --git a/cpp/bench/prims/matrix/select_k.cu b/cpp/bench/prims/matrix/select_k.cu index 8e75280029..d0bc993cc1 100644 --- a/cpp/bench/prims/matrix/select_k.cu +++ b/cpp/bench/prims/matrix/select_k.cu @@ -42,7 +42,8 @@ using namespace raft::bench; // NOLINT template struct selection : public fixture { explicit selection(const select::params& p) - : params_(p), + : fixture(true), + params_(p), in_dists_(p.batch_size * p.len, stream), in_ids_(p.batch_size * p.len, stream), out_dists_(p.batch_size * p.k, stream), @@ -72,7 +73,6 @@ struct selection : public fixture { void run_benchmark(::benchmark::State& state) override // NOLINT { device_resources handle{stream}; - using_pool_memory_res res; try { std::ostringstream label_stream; label_stream << params_.batch_size << "#" << params_.len << "#" << params_.k; diff --git a/cpp/bench/prims/neighbors/knn.cuh b/cpp/bench/prims/neighbors/knn.cuh index afb3bf9da3..8239fa4f89 100644 --- a/cpp/bench/prims/neighbors/knn.cuh +++ b/cpp/bench/prims/neighbors/knn.cuh @@ -222,7 +222,8 @@ struct brute_force_knn { template struct knn : public fixture { explicit knn(const params& p, const TransferStrategy& strategy, const Scope& scope) - : params_(p), + : fixture(true), + params_(p), strategy_(strategy), scope_(scope), dev_mem_res_(strategy == TransferStrategy::MANAGED), @@ -274,8 +275,6 @@ struct knn : public fixture { "device (TransferStrategy::NO_COPY)"); } - using_pool_memory_res default_resource; - try { std::ostringstream label_stream; label_stream << params_ << "#" << strategy_ << "#" << scope_;