Skip to content

Commit

Permalink
Re-use memory pool between benchmark runs (#1495)
Browse files Browse the repository at this point in the history
Don't recreate a new memory pool for each benchmark, and instead re-use the pool.

This significantly speeds up running the benchmarks that use a cuda memory pool. As an example running
`time ./cpp/build/MATRIX_BENCH --benchmark_filter=SelectK/float/uint32_t.*/0/` which runs benchmarks for 9 different selection algorithms - the time to run the benchmarks is reduced from`36.317s` on branch-23.06 to `10.038s` with this change.

Authors:
  - Ben Frederickson (https://github.com/benfred)

Approvers:
  - Divye Gala (https://github.com/divyegala)

URL: #1495
  • Loading branch information
benfred authored May 9, 2023
1 parent 576b22f commit 8816910
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 6 deletions.
13 changes: 12 additions & 1 deletion cpp/bench/prims/common/benchmark.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,19 @@ class fixture {
raft::device_resources handle;
rmm::cuda_stream_view stream;

fixture() : stream{handle.get_stream()}
fixture(bool use_pool_memory_resource = false) : stream{handle.get_stream()}
{
// Cache memory pool between test runs, since it is expensive to create.
// This speeds up the time required to run the select_k bench by over 3x.
// This is part of the fixture class here so that the pool will get cleaned
// up, rather than outliving the benchmarks that require it.
static std::unique_ptr<using_pool_memory_res> memory_pool;
if (use_pool_memory_resource) {
if (!memory_pool) { memory_pool.reset(new using_pool_memory_res()); }
} else if (memory_pool) {
memory_pool.reset();
}

int l2_cache_size = 0;
int device_id = 0;
RAFT_CUDA_TRY(cudaGetDevice(&device_id));
Expand Down
4 changes: 2 additions & 2 deletions cpp/bench/prims/matrix/select_k.cu
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ using namespace raft::bench; // NOLINT
template <typename KeyT, typename IdxT, select::Algo Algo>
struct selection : public fixture {
explicit selection(const select::params& p)
: params_(p),
: fixture(true),
params_(p),
in_dists_(p.batch_size * p.len, stream),
in_ids_(p.batch_size * p.len, stream),
out_dists_(p.batch_size * p.k, stream),
Expand Down Expand Up @@ -72,7 +73,6 @@ struct selection : public fixture {
void run_benchmark(::benchmark::State& state) override // NOLINT
{
device_resources handle{stream};
using_pool_memory_res res;
try {
std::ostringstream label_stream;
label_stream << params_.batch_size << "#" << params_.len << "#" << params_.k;
Expand Down
5 changes: 2 additions & 3 deletions cpp/bench/prims/neighbors/knn.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,8 @@ struct brute_force_knn {
template <typename ValT, typename IdxT, typename ImplT>
struct knn : public fixture {
explicit knn(const params& p, const TransferStrategy& strategy, const Scope& scope)
: params_(p),
: fixture(true),
params_(p),
strategy_(strategy),
scope_(scope),
dev_mem_res_(strategy == TransferStrategy::MANAGED),
Expand Down Expand Up @@ -274,8 +275,6 @@ struct knn : public fixture {
"device (TransferStrategy::NO_COPY)");
}

using_pool_memory_res default_resource;

try {
std::ostringstream label_stream;
label_stream << params_ << "#" << strategy_ << "#" << scope_;
Expand Down

0 comments on commit 8816910

Please sign in to comment.