rapidsai · rapids-bot · Jul 19, 2021 · Mar 30, 2021 · Mar 30, 2021 · Mar 30, 2021
diff --git a/cpp/benchmarks/common/memory_tracking_resource.hpp b/cpp/benchmarks/common/memory_tracking_resource.hpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <rmm/mr/device/device_memory_resource.hpp>
+
+namespace cudf {
+
+/**
+ * @brief Resource that uses `Upstream` to allocate memory and tracks the current and peak memory
+ * allocated using this resource
+ *
+ * An instance of this resource can be constructed with an existing, upstream resource in order to
+ * satisfy allocation requests and track memory use.
+ *
+ * @tparam Upstream Type of the upstream resource used for allocation/deallocation.
+ */
+template <typename Upstream>
+class memory_tracking_resource final : public rmm::mr::device_memory_resource {
+ public:
+  /**
+   * @brief Construct a new tracking resource adaptor using `upstream` to satisfy allocation
+   * requests and tracking information about each allocation/free to the members
+   * current_allocated_size_ and max_allocated_size_.
+   *
+   * @throws `rmm::logic_error` if `upstream == nullptr`
+   *
+   * @param upstream The resource used for allocating/deallocating device memory
+   */
+  memory_tracking_resource(Upstream* upstream) : upstream_{upstream}
+  {
+    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+  }
+
+  memory_tracking_resource()                                = delete;
+  ~memory_tracking_resource()                               = default;
+  memory_tracking_resource(memory_tracking_resource const&) = delete;
+  memory_tracking_resource(memory_tracking_resource&&)      = default;
+  memory_tracking_resource& operator=(memory_tracking_resource const&) = delete;
+  memory_tracking_resource& operator=(memory_tracking_resource&&) = default;
+
+  /**
+   * @brief Return pointer to the upstream resource.
+   *
+   * @return Upstream* Pointer to the upstream resource.
+   */
+  Upstream* get_upstream() const noexcept { return upstream_; }
+
+  /**
+   * @brief Checks whether the upstream resource supports streams.
+   *
+   * @return true The upstream resource supports streams
+   * @return false The upstream resource does not support streams.
+   */
+  bool supports_streams() const noexcept override { return upstream_->supports_streams(); }
+
+  /**
+   * @brief Query whether the resource supports the get_mem_info API.
+   *
+   * @return bool true if the upstream resource supports get_mem_info, false otherwise.
+   */
+  bool supports_get_mem_info() const noexcept override
+  {
+    return upstream_->supports_get_mem_info();
+  }
+
+  size_t max_allocated_size() const noexcept { return max_allocated_size_; }
+  size_t current_allocated_size() const noexcept { return current_allocated_size_; }
+
+ private:
+  /**
+   * @brief Allocates memory of size at least `bytes` using the upstream resource and updates the
+   * size of memory in use.
+   *
+   * If the upstream allocation is successful updates the current total memory and peak memory
+   * allocated with this resource
+   *
+   * The returned pointer has at least 256B alignment.
+   *
+   * @param bytes The size, in bytes, of the allocation
+   * @param stream Stream on which to perform the allocation
+   * @return void* Pointer to the newly allocated memory
+   */
+  void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override
+  {
+    auto const p = upstream_->allocate(bytes, stream);
+    current_allocated_size_ += bytes;
+    max_allocated_size_ = std::max(current_allocated_size_, max_allocated_size_);
+    return p;
+  }
+
+  /**
+   * @brief Free allocation of size `bytes` pointed to by `p` and log the deallocation.
+   *
+   * Updates the current total memory and peak memory allocated with this resource
+   *
+   * @param p Pointer to be deallocated
+   * @param bytes Size of the allocation
+   * @param stream Stream on which to perform the deallocation
+   */
+  void do_deallocate(void* p, std::size_t bytes, rmm::cuda_stream_view stream) override
+  {
+    current_allocated_size_ -= bytes;
+    upstream_->deallocate(p, bytes, stream);
+  }
+
+  /**
+   * @brief Compare the upstream resource to another.
+   *
+   * @param other The other resource to compare to
+   * @return true If the two resources are equivalent
+   * @return false If the two resources are not equal
+   */
+  bool do_is_equal(device_memory_resource const& other) const noexcept override
+  {
+    if (this == &other)
+      return true;
+    else {
+      memory_tracking_resource<Upstream> const* cast =
+        dynamic_cast<memory_tracking_resource<Upstream> const*>(&other);
+      if (cast != nullptr)
+        return upstream_->is_equal(*cast->get_upstream());
+      else
+        return upstream_->is_equal(other);
+    }
+  }
+
+  /**
+   * @brief Get free and available memory from upstream resource.
+   *
+   * @param stream Stream on which to get the mem info.
+   * @return std::pair contaiing free_size and total_size of memory
+   */
+  std::pair<size_t, size_t> do_get_mem_info(rmm::cuda_stream_view stream) const override
+  {
+    return upstream_->get_mem_info(stream);
+  }
+
+  size_t current_allocated_size_ = 0;
+  size_t max_allocated_size_     = 0;
+
+  Upstream* upstream_;  ///< The upstream resource used for satisfying
+                        ///< allocation requests
+};
+
+}  // namespace cudf
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#pragma once
+
 #include <benchmark/benchmark.h>
 #include <rmm/mr/device/cuda_memory_resource.hpp>
 #include <rmm/mr/device/owning_wrapper.hpp>

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #include <benchmark/benchmark.h>
 
 #include <benchmarks/common/generate_benchmark_input.hpp>
+#include <benchmarks/common/memory_tracking_resource.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/io/cuio_benchmark_common.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
@@ -51,12 +52,18 @@ void BM_csv_read_varying_input(benchmark::State& state)
   cudf_io::csv_reader_options const read_options =
     cudf_io::csv_reader_options::builder(source_sink.make_source_info());
 
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource();
+  cudf::memory_tracking_resource<rmm::mr::device_memory_resource> tracking_mr(mr);
+
+  rmm::mr::set_current_device_resource(&tracking_mr);
   for (auto _ : state) {
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
     cudf_io::read_csv(read_options);
   }
+  rmm::mr::set_current_device_resource(mr);
 
   state.SetBytesProcessed(data_size * state.iterations());
+  state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size();
 }
 
 void BM_csv_read_varying_options(benchmark::State& state)
@@ -94,6 +101,10 @@ void BM_csv_read_varying_options(benchmark::State& state)
 
   size_t const chunk_size             = csv_data.size() / num_chunks;
   cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource();
+  cudf::memory_tracking_resource<rmm::mr::device_memory_resource> tracking_mr(mr);
+
+  rmm::mr::set_current_device_resource(&tracking_mr);
   for (auto _ : state) {
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
     for (int32_t chunk = 0; chunk < num_chunks; ++chunk) {
@@ -124,8 +135,11 @@ void BM_csv_read_varying_options(benchmark::State& state)
       cudf_io::read_csv(read_options);
     }
   }
+  rmm::mr::set_current_device_resource(mr);
+
   auto const data_processed = data_size * cols_to_read.size() / view.num_columns();
   state.SetBytesProcessed(data_processed * state.iterations());
+  state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size();
 }
 
 #define CSV_RD_BM_INPUTS_DEFINE(name, type_or_group, src_type)       \

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #include <benchmark/benchmark.h>
 
 #include <benchmarks/common/generate_benchmark_input.hpp>
+#include <benchmarks/common/memory_tracking_resource.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/io/cuio_benchmark_common.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
@@ -42,6 +43,10 @@ void BM_csv_write_varying_inout(benchmark::State& state)
   auto const view = tbl->view();
 
   cuio_source_sink_pair source_sink(sink_type);
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource();
+  cudf::memory_tracking_resource<rmm::mr::device_memory_resource> tracking_mr(mr);
+
+  rmm::mr::set_current_device_resource(&tracking_mr);
   for (auto _ : state) {
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
     cudf_io::csv_writer_options options =
@@ -50,8 +55,10 @@ void BM_csv_write_varying_inout(benchmark::State& state)
         .rows_per_chunk(1 << 14);  // TODO: remove once default is sensible
     cudf_io::write_csv(options);
   }
+  rmm::mr::set_current_device_resource(mr);
 
   state.SetBytesProcessed(data_size * state.iterations());
+  state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size();
 }
 
 void BM_csv_write_varying_options(benchmark::State& state)
@@ -69,6 +76,10 @@ void BM_csv_write_varying_options(benchmark::State& state)
 
   std::string const na_per(na_per_len, '#');
   std::vector<char> csv_data;
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource();
+  cudf::memory_tracking_resource<rmm::mr::device_memory_resource> tracking_mr(mr);
+
+  rmm::mr::set_current_device_resource(&tracking_mr);
   for (auto _ : state) {
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
     cudf_io::csv_writer_options options =
@@ -78,8 +89,10 @@ void BM_csv_write_varying_options(benchmark::State& state)
         .rows_per_chunk(rows_per_chunk);
     cudf_io::write_csv(options);
   }
+  rmm::mr::set_current_device_resource(mr);
 
   state.SetBytesProcessed(data_size * state.iterations());
+  state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size();
 }
 
 #define CSV_WR_BM_INOUTS_DEFINE(name, type_or_group, sink_type)       \

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #include <benchmark/benchmark.h>
 
 #include <benchmarks/common/generate_benchmark_input.hpp>
+#include <benchmarks/common/memory_tracking_resource.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/io/cuio_benchmark_common.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
@@ -58,12 +59,18 @@ void BM_orc_read_varying_input(benchmark::State& state)
   cudf_io::orc_reader_options read_opts =
     cudf_io::orc_reader_options::builder(source_sink.make_source_info());
 
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource();
+  cudf::memory_tracking_resource<rmm::mr::device_memory_resource> tracking_mr(mr);
+
+  rmm::mr::set_current_device_resource(&tracking_mr);
   for (auto _ : state) {
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
     cudf_io::read_orc(read_opts);
   }
+  rmm::mr::set_current_device_resource(mr);
 
   state.SetBytesProcessed(data_size * state.iterations());
+  state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size();
 }
 
 std::vector<std::string> get_col_names(std::vector<char> const& orc_data)
@@ -112,6 +119,10 @@ void BM_orc_read_varying_options(benchmark::State& state)
 
   auto const num_stripes              = data_size / (64 << 20);
   cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource();
+  cudf::memory_tracking_resource<rmm::mr::device_memory_resource> tracking_mr(mr);
+
+  rmm::mr::set_current_device_resource(&tracking_mr);
   for (auto _ : state) {
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
 
@@ -141,8 +152,11 @@ void BM_orc_read_varying_options(benchmark::State& state)
 
     CUDF_EXPECTS(rows_read == view.num_rows(), "Benchmark did not read the entire table");
   }
+  rmm::mr::set_current_device_resource(mr);
+
   auto const data_processed = data_size * cols_to_read.size() / view.num_columns();
   state.SetBytesProcessed(data_processed * state.iterations());
+  state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size();
 }
 
 #define ORC_RD_BM_INPUTS_DEFINE(name, type_or_group, src_type)                               \