From 09c9955cce226e8c973773359c778a0ce48f2f61 Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Mon, 8 May 2023 12:08:30 -0700
Subject: [PATCH] Re-use memory pool between benchmark runs

Don't recreate a new memory pool for each benchmark, and instead
re-use the pool.

This significantly speeds up running the benchmarks that use the
cuda memory pool. As an example running
`time ./cpp/build/MATRIX_BENCH --benchmark_filter=SelectK/float/uint32_t.*/0/`
which runs benchmarks for 9 different selection algorithms -
the times are reduced from `0m36.317s` on branch-23.06 to `0m10.038s`
with this change.
---
 cpp/bench/prims/common/benchmark.hpp | 13 ++++++++++++-
 cpp/bench/prims/matrix/select_k.cu   |  4 ++--
 cpp/bench/prims/neighbors/knn.cuh    |  5 ++---
 3 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/cpp/bench/prims/common/benchmark.hpp b/cpp/bench/prims/common/benchmark.hpp
index 4b6e1ba286..1e783eb338 100644
--- a/cpp/bench/prims/common/benchmark.hpp
+++ b/cpp/bench/prims/common/benchmark.hpp
@@ -113,8 +113,19 @@ class fixture {
   raft::device_resources handle;
   rmm::cuda_stream_view stream;
 
-  fixture() : stream{handle.get_stream()}
+  fixture(bool use_pool_memory_resource = false) : stream{handle.get_stream()}
   {
+    // Cache memory pool between test runs, since it is expensive to create.
+    // This speeds up the time required to run the select_k bench by over 3x.
+    // This is part of the fixture class here so that the pool will get cleaned
+    // up, rather than outliving the benchmarks that require it.
+    static std::unique_ptr<using_pool_memory_res> memory_pool;
+    if (use_pool_memory_resource) {
+      if (!memory_pool) { memory_pool.reset(new using_pool_memory_res()); }
+    } else if (memory_pool) {
+      memory_pool.reset();
+    }
+
     int l2_cache_size = 0;
     int device_id     = 0;
     RAFT_CUDA_TRY(cudaGetDevice(&device_id));
diff --git a/cpp/bench/prims/matrix/select_k.cu b/cpp/bench/prims/matrix/select_k.cu
index 8e75280029..d0bc993cc1 100644
--- a/cpp/bench/prims/matrix/select_k.cu
+++ b/cpp/bench/prims/matrix/select_k.cu
@@ -42,7 +42,8 @@ using namespace raft::bench;  // NOLINT
 template <typename KeyT, typename IdxT, select::Algo Algo>
 struct selection : public fixture {
   explicit selection(const select::params& p)
-    : params_(p),
+    : fixture(true),
+      params_(p),
       in_dists_(p.batch_size * p.len, stream),
       in_ids_(p.batch_size * p.len, stream),
       out_dists_(p.batch_size * p.k, stream),
@@ -72,7 +73,6 @@ struct selection : public fixture {
   void run_benchmark(::benchmark::State& state) override  // NOLINT
   {
     device_resources handle{stream};
-    using_pool_memory_res res;
     try {
       std::ostringstream label_stream;
       label_stream << params_.batch_size << "#" << params_.len << "#" << params_.k;
diff --git a/cpp/bench/prims/neighbors/knn.cuh b/cpp/bench/prims/neighbors/knn.cuh
index afb3bf9da3..8239fa4f89 100644
--- a/cpp/bench/prims/neighbors/knn.cuh
+++ b/cpp/bench/prims/neighbors/knn.cuh
@@ -222,7 +222,8 @@ struct brute_force_knn {
 template <typename ValT, typename IdxT, typename ImplT>
 struct knn : public fixture {
   explicit knn(const params& p, const TransferStrategy& strategy, const Scope& scope)
-    : params_(p),
+    : fixture(true),
+      params_(p),
       strategy_(strategy),
       scope_(scope),
       dev_mem_res_(strategy == TransferStrategy::MANAGED),
@@ -274,8 +275,6 @@ struct knn : public fixture {
         "device (TransferStrategy::NO_COPY)");
     }
 
-    using_pool_memory_res default_resource;
-
     try {
       std::ostringstream label_stream;
       label_stream << params_ << "#" << strategy_ << "#" << scope_;