From c34b32b248309382d1329adc97fe7c9d3e55eefe Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Wed, 31 Mar 2021 00:51:01 +0530 Subject: [PATCH 01/10] Initial memory tracking resource And sample use in parquet writer bench --- cpp/benchmarks/fixture/benchmark_fixture.hpp | 149 +++++++++++++++++- .../io/parquet/parquet_writer_benchmark.cpp | 3 + 2 files changed, 151 insertions(+), 1 deletion(-) diff --git a/cpp/benchmarks/fixture/benchmark_fixture.hpp b/cpp/benchmarks/fixture/benchmark_fixture.hpp index dd1bbcba0b4..a330ccf26d9 100644 --- a/cpp/benchmarks/fixture/benchmark_fixture.hpp +++ b/cpp/benchmarks/fixture/benchmark_fixture.hpp @@ -30,8 +30,150 @@ inline auto make_pool() { return rmm::mr::make_owning_wrapper(make_cuda()); } + +template +class memory_tracking_resource final : public rmm::mr::device_memory_resource { + public: + /** + * @brief Construct a new tracking resource adaptor using `upstream` to satisfy + * allocation requests and tracking information about each allocation/free to + * the members current_allocated_size_ and max_allocated_size_. + * + * @throws `rmm::logic_error` if `upstream == nullptr` + * + * @param upstream The resource used for allocating/deallocating device memory + */ + memory_tracking_resource(Upstream* upstream) : upstream_{upstream} + { + RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer."); + } + + memory_tracking_resource() = delete; + ~memory_tracking_resource() = default; + memory_tracking_resource(memory_tracking_resource const&) = delete; + memory_tracking_resource(memory_tracking_resource&&) = default; + memory_tracking_resource& operator=(memory_tracking_resource const&) = delete; + memory_tracking_resource& operator=(memory_tracking_resource&&) = default; + + /** + * @brief Return pointer to the upstream resource. + * + * @return Upstream* Pointer to the upstream resource. + */ + Upstream* get_upstream() const noexcept { return upstream_; } + + /** + * @brief Checks whether the upstream resource supports streams. + * + * @return true The upstream resource supports streams + * @return false The upstream resource does not support streams. + */ + bool supports_streams() const noexcept override { return upstream_->supports_streams(); } + + /** + * @brief Query whether the resource supports the get_mem_info API. + * + * @return bool true if the upstream resource supports get_mem_info, false otherwise. + */ + bool supports_get_mem_info() const noexcept override + { + return upstream_->supports_get_mem_info(); + } + + size_t max_allocated_size() const noexcept { return max_allocated_size_; } + size_t current_allocated_size() const noexcept { return current_allocated_size_; } + + private: + /** + * @brief Allocates memory of size at least `bytes` using the upstream + * resource and logs the allocation. + * + * If the upstream allocation is successful updates the current total memory and peak memory + * allocated with this resource + * + * The returned pointer has at least 256B alignment. + * + * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled + * by the upstream resource. + * + * @param bytes The size, in bytes, of the allocation + * @param stream Stream on which to perform the allocation + * @return void* Pointer to the newly allocated memory + */ + void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override + { + auto const p = upstream_->allocate(bytes, stream); + current_allocated_size_ += bytes; + max_allocated_size_ = std::max(current_allocated_size_, max_allocated_size_); + return p; + } + + /** + * @brief Free allocation of size `bytes` pointed to by `p` and log the + * deallocation. + * + * Updates the current total memory and peak memory allocated with this resource + * + * @throws Nothing. + * + * @param p Pointer to be deallocated + * @param bytes Size of the allocation + * @param stream Stream on which to perform the deallocation + */ + void do_deallocate(void* p, std::size_t bytes, rmm::cuda_stream_view stream) override + { + current_allocated_size_ -= bytes; + upstream_->deallocate(p, bytes, stream); + } + + /** + * @brief Compare the upstream resource to another. + * + * @throws Nothing. + * + * @param other The other resource to compare to + * @return true If the two resources are equivalent + * @return false If the two resources are not equal + */ + bool do_is_equal(device_memory_resource const& other) const noexcept override + { + if (this == &other) + return true; + else { + memory_tracking_resource const* cast = + dynamic_cast const*>(&other); + if (cast != nullptr) + return upstream_->is_equal(*cast->get_upstream()); + else + return upstream_->is_equal(other); + } + } + + /** + * @brief Get free and available memory from upstream resource. + * + * @throws `rmm::cuda_error` if unable to retrieve memory info. + * + * @param stream Stream on which to get the mem info. + * @return std::pair contaiing free_size and total_size of memory + */ + std::pair do_get_mem_info(rmm::cuda_stream_view stream) const override + { + return upstream_->get_mem_info(stream); + } + + size_t current_allocated_size_ = 0; + size_t max_allocated_size_ = 0; + + Upstream* upstream_; ///< The upstream resource used for satisfying + ///< allocation requests +}; + } // namespace +using memory_tracking_pool_resource_type = + memory_tracking_resource; + /** * @brief Google Benchmark fixture for libcudf benchmarks * @@ -67,7 +209,10 @@ class benchmark : public ::benchmark::Fixture { public: virtual void SetUp(const ::benchmark::State& state) { - mr = make_pool(); + auto pool = make_pool(); + mr = std::make_shared( + memory_tracking_pool_resource_type(pool.get())); + pool_mr = pool; rmm::mr::set_current_device_resource(mr.get()); // set default resource to pool } @@ -75,6 +220,7 @@ class benchmark : public ::benchmark::Fixture { { // reset default resource to the initial resource rmm::mr::set_current_device_resource(nullptr); + pool_mr.reset(); mr.reset(); } @@ -86,6 +232,7 @@ class benchmark : public ::benchmark::Fixture { } std::shared_ptr mr; + std::shared_ptr pool_mr; }; } // namespace cudf diff --git a/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp b/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp index d17e7b126c7..923af428b8f 100644 --- a/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp +++ b/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp @@ -59,6 +59,9 @@ void BM_parq_write_varying_inout(benchmark::State& state) } state.SetBytesProcessed(data_size * state.iterations()); + auto mr = + dynamic_cast(rmm::mr::get_current_device_resource()); + state.counters["peak mem"] = mr->max_allocated_size(); } void BM_parq_write_varying_options(benchmark::State& state) From b9147f432014e0cc3f23e5b782e6c6409e929299 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Wed, 31 Mar 2021 03:34:55 +0530 Subject: [PATCH 02/10] Localize the memory usage calculator to just the API being benchmarked Separate out memory_tracking_resource into its own header and remove association with benchmark fixture --- .../common/memory_tracking_resource.hpp | 169 ++++++++++++++++++ cpp/benchmarks/fixture/benchmark_fixture.hpp | 151 +--------------- .../io/parquet/parquet_writer_benchmark.cpp | 16 +- 3 files changed, 185 insertions(+), 151 deletions(-) create mode 100644 cpp/benchmarks/common/memory_tracking_resource.hpp diff --git a/cpp/benchmarks/common/memory_tracking_resource.hpp b/cpp/benchmarks/common/memory_tracking_resource.hpp new file mode 100644 index 00000000000..0a1cc6d175d --- /dev/null +++ b/cpp/benchmarks/common/memory_tracking_resource.hpp @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cudf { + +/** + * @brief Resource that uses `Upstream` to allocate memory and tracks the current and peak memory + * allocated using this resource + * + * An instance of this resource can be constructed with an existing, upstream resource in order to + * satisfy allocation requests and track memory use. + * + * @tparam Upstream Type of the upstream resource used for allocation/deallocation. + */ +template +class memory_tracking_resource final : public rmm::mr::device_memory_resource { + public: + /** + * @brief Construct a new tracking resource adaptor using `upstream` to satisfy allocation + * requests and tracking information about each allocation/free to the members + * current_allocated_size_ and max_allocated_size_. + * + * @throws `rmm::logic_error` if `upstream == nullptr` + * + * @param upstream The resource used for allocating/deallocating device memory + */ + memory_tracking_resource(Upstream* upstream) : upstream_{upstream} + { + RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer."); + } + + memory_tracking_resource() = delete; + ~memory_tracking_resource() = default; + memory_tracking_resource(memory_tracking_resource const&) = delete; + memory_tracking_resource(memory_tracking_resource&&) = default; + memory_tracking_resource& operator=(memory_tracking_resource const&) = delete; + memory_tracking_resource& operator=(memory_tracking_resource&&) = default; + + /** + * @brief Return pointer to the upstream resource. + * + * @return Upstream* Pointer to the upstream resource. + */ + Upstream* get_upstream() const noexcept { return upstream_; } + + /** + * @brief Checks whether the upstream resource supports streams. + * + * @return true The upstream resource supports streams + * @return false The upstream resource does not support streams. + */ + bool supports_streams() const noexcept override { return upstream_->supports_streams(); } + + /** + * @brief Query whether the resource supports the get_mem_info API. + * + * @return bool true if the upstream resource supports get_mem_info, false otherwise. + */ + bool supports_get_mem_info() const noexcept override + { + return upstream_->supports_get_mem_info(); + } + + size_t max_allocated_size() const noexcept { return max_allocated_size_; } + size_t current_allocated_size() const noexcept { return current_allocated_size_; } + + private: + /** + * @brief Allocates memory of size at least `bytes` using the upstream resource and updates the + * size of memory in use. + * + * If the upstream allocation is successful updates the current total memory and peak memory + * allocated with this resource + * + * The returned pointer has at least 256B alignment. + * + * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled + * by the upstream resource. + * + * @param bytes The size, in bytes, of the allocation + * @param stream Stream on which to perform the allocation + * @return void* Pointer to the newly allocated memory + */ + void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override + { + auto const p = upstream_->allocate(bytes, stream); + current_allocated_size_ += bytes; + max_allocated_size_ = std::max(current_allocated_size_, max_allocated_size_); + return p; + } + + /** + * @brief Free allocation of size `bytes` pointed to by `p` and log the deallocation. + * + * Updates the current total memory and peak memory allocated with this resource + * + * @throws Nothing. + * + * @param p Pointer to be deallocated + * @param bytes Size of the allocation + * @param stream Stream on which to perform the deallocation + */ + void do_deallocate(void* p, std::size_t bytes, rmm::cuda_stream_view stream) override + { + current_allocated_size_ -= bytes; + upstream_->deallocate(p, bytes, stream); + } + + /** + * @brief Compare the upstream resource to another. + * + * @throws Nothing. + * + * @param other The other resource to compare to + * @return true If the two resources are equivalent + * @return false If the two resources are not equal + */ + bool do_is_equal(device_memory_resource const& other) const noexcept override + { + if (this == &other) + return true; + else { + memory_tracking_resource const* cast = + dynamic_cast const*>(&other); + if (cast != nullptr) + return upstream_->is_equal(*cast->get_upstream()); + else + return upstream_->is_equal(other); + } + } + + /** + * @brief Get free and available memory from upstream resource. + * + * @throws `rmm::cuda_error` if unable to retrieve memory info. + * + * @param stream Stream on which to get the mem info. + * @return std::pair contaiing free_size and total_size of memory + */ + std::pair do_get_mem_info(rmm::cuda_stream_view stream) const override + { + return upstream_->get_mem_info(stream); + } + + size_t current_allocated_size_ = 0; + size_t max_allocated_size_ = 0; + + Upstream* upstream_; ///< The upstream resource used for satisfying + ///< allocation requests +}; + +} // namespace cudf diff --git a/cpp/benchmarks/fixture/benchmark_fixture.hpp b/cpp/benchmarks/fixture/benchmark_fixture.hpp index a330ccf26d9..4a84b479198 100644 --- a/cpp/benchmarks/fixture/benchmark_fixture.hpp +++ b/cpp/benchmarks/fixture/benchmark_fixture.hpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#pragma once + #include #include #include @@ -30,150 +32,8 @@ inline auto make_pool() { return rmm::mr::make_owning_wrapper(make_cuda()); } - -template -class memory_tracking_resource final : public rmm::mr::device_memory_resource { - public: - /** - * @brief Construct a new tracking resource adaptor using `upstream` to satisfy - * allocation requests and tracking information about each allocation/free to - * the members current_allocated_size_ and max_allocated_size_. - * - * @throws `rmm::logic_error` if `upstream == nullptr` - * - * @param upstream The resource used for allocating/deallocating device memory - */ - memory_tracking_resource(Upstream* upstream) : upstream_{upstream} - { - RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer."); - } - - memory_tracking_resource() = delete; - ~memory_tracking_resource() = default; - memory_tracking_resource(memory_tracking_resource const&) = delete; - memory_tracking_resource(memory_tracking_resource&&) = default; - memory_tracking_resource& operator=(memory_tracking_resource const&) = delete; - memory_tracking_resource& operator=(memory_tracking_resource&&) = default; - - /** - * @brief Return pointer to the upstream resource. - * - * @return Upstream* Pointer to the upstream resource. - */ - Upstream* get_upstream() const noexcept { return upstream_; } - - /** - * @brief Checks whether the upstream resource supports streams. - * - * @return true The upstream resource supports streams - * @return false The upstream resource does not support streams. - */ - bool supports_streams() const noexcept override { return upstream_->supports_streams(); } - - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return bool true if the upstream resource supports get_mem_info, false otherwise. - */ - bool supports_get_mem_info() const noexcept override - { - return upstream_->supports_get_mem_info(); - } - - size_t max_allocated_size() const noexcept { return max_allocated_size_; } - size_t current_allocated_size() const noexcept { return current_allocated_size_; } - - private: - /** - * @brief Allocates memory of size at least `bytes` using the upstream - * resource and logs the allocation. - * - * If the upstream allocation is successful updates the current total memory and peak memory - * allocated with this resource - * - * The returned pointer has at least 256B alignment. - * - * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled - * by the upstream resource. - * - * @param bytes The size, in bytes, of the allocation - * @param stream Stream on which to perform the allocation - * @return void* Pointer to the newly allocated memory - */ - void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override - { - auto const p = upstream_->allocate(bytes, stream); - current_allocated_size_ += bytes; - max_allocated_size_ = std::max(current_allocated_size_, max_allocated_size_); - return p; - } - - /** - * @brief Free allocation of size `bytes` pointed to by `p` and log the - * deallocation. - * - * Updates the current total memory and peak memory allocated with this resource - * - * @throws Nothing. - * - * @param p Pointer to be deallocated - * @param bytes Size of the allocation - * @param stream Stream on which to perform the deallocation - */ - void do_deallocate(void* p, std::size_t bytes, rmm::cuda_stream_view stream) override - { - current_allocated_size_ -= bytes; - upstream_->deallocate(p, bytes, stream); - } - - /** - * @brief Compare the upstream resource to another. - * - * @throws Nothing. - * - * @param other The other resource to compare to - * @return true If the two resources are equivalent - * @return false If the two resources are not equal - */ - bool do_is_equal(device_memory_resource const& other) const noexcept override - { - if (this == &other) - return true; - else { - memory_tracking_resource const* cast = - dynamic_cast const*>(&other); - if (cast != nullptr) - return upstream_->is_equal(*cast->get_upstream()); - else - return upstream_->is_equal(other); - } - } - - /** - * @brief Get free and available memory from upstream resource. - * - * @throws `rmm::cuda_error` if unable to retrieve memory info. - * - * @param stream Stream on which to get the mem info. - * @return std::pair contaiing free_size and total_size of memory - */ - std::pair do_get_mem_info(rmm::cuda_stream_view stream) const override - { - return upstream_->get_mem_info(stream); - } - - size_t current_allocated_size_ = 0; - size_t max_allocated_size_ = 0; - - Upstream* upstream_; ///< The upstream resource used for satisfying - ///< allocation requests -}; - } // namespace -using memory_tracking_pool_resource_type = - memory_tracking_resource; - /** * @brief Google Benchmark fixture for libcudf benchmarks * @@ -209,10 +69,7 @@ class benchmark : public ::benchmark::Fixture { public: virtual void SetUp(const ::benchmark::State& state) { - auto pool = make_pool(); - mr = std::make_shared( - memory_tracking_pool_resource_type(pool.get())); - pool_mr = pool; + mr = make_pool(); rmm::mr::set_current_device_resource(mr.get()); // set default resource to pool } @@ -220,7 +77,6 @@ class benchmark : public ::benchmark::Fixture { { // reset default resource to the initial resource rmm::mr::set_current_device_resource(nullptr); - pool_mr.reset(); mr.reset(); } @@ -232,7 +88,6 @@ class benchmark : public ::benchmark::Fixture { } std::shared_ptr mr; - std::shared_ptr pool_mr; }; } // namespace cudf diff --git a/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp b/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp index 923af428b8f..83ff0021516 100644 --- a/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp +++ b/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -50,6 +51,10 @@ void BM_parq_write_varying_inout(benchmark::State& state) auto const view = tbl->view(); cuio_source_sink_pair source_sink(sink_type); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); + cudf::memory_tracking_resource tracking_mr(mr); + + rmm::mr::set_current_device_resource(&tracking_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::parquet_writer_options opts = @@ -57,11 +62,10 @@ void BM_parq_write_varying_inout(benchmark::State& state) .compression(compression); cudf_io::write_parquet(opts); } + rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); - auto mr = - dynamic_cast(rmm::mr::get_current_device_resource()); - state.counters["peak mem"] = mr->max_allocated_size(); + state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); } void BM_parq_write_varying_options(benchmark::State& state) @@ -79,6 +83,10 @@ void BM_parq_write_varying_options(benchmark::State& state) auto const view = tbl->view(); cuio_source_sink_pair source_sink(io_type::FILEPATH); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); + cudf::memory_tracking_resource tracking_mr(mr); + + rmm::mr::set_current_device_resource(&tracking_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::parquet_writer_options const options = @@ -88,8 +96,10 @@ void BM_parq_write_varying_options(benchmark::State& state) .column_chunks_file_path(file_path); cudf_io::write_parquet(options); } + rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); + state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); } #define PARQ_WR_BM_INOUTS_DEFINE(name, type_or_group, sink_type) \ From c49d60b4437e563e606a09dfe6a3d694b2900fbe Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Wed, 31 Mar 2021 04:30:43 +0530 Subject: [PATCH 03/10] Update cpp/benchmarks/common/memory_tracking_resource.hpp Co-authored-by: David <45795991+davidwendt@users.noreply.github.com> --- cpp/benchmarks/common/memory_tracking_resource.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/benchmarks/common/memory_tracking_resource.hpp b/cpp/benchmarks/common/memory_tracking_resource.hpp index 0a1cc6d175d..2482c0b5d0f 100644 --- a/cpp/benchmarks/common/memory_tracking_resource.hpp +++ b/cpp/benchmarks/common/memory_tracking_resource.hpp @@ -111,8 +111,6 @@ class memory_tracking_resource final : public rmm::mr::device_memory_resource { * * Updates the current total memory and peak memory allocated with this resource * - * @throws Nothing. - * * @param p Pointer to be deallocated * @param bytes Size of the allocation * @param stream Stream on which to perform the deallocation From 4664f9a5295cf2bc190fcca0b5ac0eff8ef35f32 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Wed, 31 Mar 2021 14:37:20 +0530 Subject: [PATCH 04/10] Expand mem tracking to all cuIO benchmarks --- cpp/benchmarks/fixture/benchmark_fixture.hpp | 2 +- cpp/benchmarks/io/csv/csv_reader_benchmark.cpp | 16 +++++++++++++++- cpp/benchmarks/io/csv/csv_writer_benchmark.cpp | 15 ++++++++++++++- cpp/benchmarks/io/orc/orc_reader_benchmark.cpp | 16 +++++++++++++++- cpp/benchmarks/io/orc/orc_writer_benchmark.cpp | 15 ++++++++++++++- .../io/parquet/parquet_reader_benchmark.cpp | 16 +++++++++++++++- .../io/parquet/parquet_writer_benchmark.cpp | 2 +- 7 files changed, 75 insertions(+), 7 deletions(-) diff --git a/cpp/benchmarks/fixture/benchmark_fixture.hpp b/cpp/benchmarks/fixture/benchmark_fixture.hpp index 4a84b479198..7ca2300543d 100644 --- a/cpp/benchmarks/fixture/benchmark_fixture.hpp +++ b/cpp/benchmarks/fixture/benchmark_fixture.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp b/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp index a3ee1a3f333..b3b4996a934 100644 --- a/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp +++ b/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -51,12 +52,18 @@ void BM_csv_read_varying_input(benchmark::State& state) cudf_io::csv_reader_options const read_options = cudf_io::csv_reader_options::builder(source_sink.make_source_info()); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); + cudf::memory_tracking_resource tracking_mr(mr); + + rmm::mr::set_current_device_resource(&tracking_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::read_csv(read_options); } + rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); + state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); } void BM_csv_read_varying_options(benchmark::State& state) @@ -94,6 +101,10 @@ void BM_csv_read_varying_options(benchmark::State& state) size_t const chunk_size = csv_data.size() / num_chunks; cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks; + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); + cudf::memory_tracking_resource tracking_mr(mr); + + rmm::mr::set_current_device_resource(&tracking_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 for (int32_t chunk = 0; chunk < num_chunks; ++chunk) { @@ -124,8 +135,11 @@ void BM_csv_read_varying_options(benchmark::State& state) cudf_io::read_csv(read_options); } } + rmm::mr::set_current_device_resource(mr); + auto const data_processed = data_size * cols_to_read.size() / view.num_columns(); state.SetBytesProcessed(data_processed * state.iterations()); + state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); } #define CSV_RD_BM_INPUTS_DEFINE(name, type_or_group, src_type) \ diff --git a/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp b/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp index fcb9155e646..026b852ebd5 100644 --- a/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp +++ b/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -42,6 +43,10 @@ void BM_csv_write_varying_inout(benchmark::State& state) auto const view = tbl->view(); cuio_source_sink_pair source_sink(sink_type); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); + cudf::memory_tracking_resource tracking_mr(mr); + + rmm::mr::set_current_device_resource(&tracking_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::csv_writer_options options = @@ -50,8 +55,10 @@ void BM_csv_write_varying_inout(benchmark::State& state) .rows_per_chunk(1 << 14); // TODO: remove once default is sensible cudf_io::write_csv(options); } + rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); + state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); } void BM_csv_write_varying_options(benchmark::State& state) @@ -69,6 +76,10 @@ void BM_csv_write_varying_options(benchmark::State& state) std::string const na_per(na_per_len, '#'); std::vector csv_data; + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); + cudf::memory_tracking_resource tracking_mr(mr); + + rmm::mr::set_current_device_resource(&tracking_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::csv_writer_options options = @@ -78,8 +89,10 @@ void BM_csv_write_varying_options(benchmark::State& state) .rows_per_chunk(rows_per_chunk); cudf_io::write_csv(options); } + rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); + state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); } #define CSV_WR_BM_INOUTS_DEFINE(name, type_or_group, sink_type) \ diff --git a/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp b/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp index d38747b934f..51823085132 100644 --- a/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -58,12 +59,18 @@ void BM_orc_read_varying_input(benchmark::State& state) cudf_io::orc_reader_options read_opts = cudf_io::orc_reader_options::builder(source_sink.make_source_info()); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); + cudf::memory_tracking_resource tracking_mr(mr); + + rmm::mr::set_current_device_resource(&tracking_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::read_orc(read_opts); } + rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); + state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); } std::vector get_col_names(std::vector const& orc_data) @@ -112,6 +119,10 @@ void BM_orc_read_varying_options(benchmark::State& state) auto const num_stripes = data_size / (64 << 20); cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks; + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); + cudf::memory_tracking_resource tracking_mr(mr); + + rmm::mr::set_current_device_resource(&tracking_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 @@ -141,8 +152,11 @@ void BM_orc_read_varying_options(benchmark::State& state) CUDF_EXPECTS(rows_read == view.num_rows(), "Benchmark did not read the entire table"); } + rmm::mr::set_current_device_resource(mr); + auto const data_processed = data_size * cols_to_read.size() / view.num_columns(); state.SetBytesProcessed(data_processed * state.iterations()); + state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); } #define ORC_RD_BM_INPUTS_DEFINE(name, type_or_group, src_type) \ diff --git a/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp b/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp index bddfc3dfaa2..fa20ea8e96a 100644 --- a/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp +++ b/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -50,6 +51,10 @@ void BM_orc_write_varying_inout(benchmark::State& state) auto const view = tbl->view(); cuio_source_sink_pair source_sink(sink_type); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); + cudf::memory_tracking_resource tracking_mr(mr); + + rmm::mr::set_current_device_resource(&tracking_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::orc_writer_options options = @@ -57,8 +62,10 @@ void BM_orc_write_varying_inout(benchmark::State& state) .compression(compression); cudf_io::write_orc(options); } + rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); + state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); } void BM_orc_write_varying_options(benchmark::State& state) @@ -75,6 +82,10 @@ void BM_orc_write_varying_options(benchmark::State& state) auto const view = tbl->view(); cuio_source_sink_pair source_sink(io_type::FILEPATH); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); + cudf::memory_tracking_resource tracking_mr(mr); + + rmm::mr::set_current_device_resource(&tracking_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::orc_writer_options const options = @@ -83,8 +94,10 @@ void BM_orc_write_varying_options(benchmark::State& state) .enable_statistics(enable_stats); cudf_io::write_orc(options); } + rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); + state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); } #define ORC_WR_BM_INOUTS_DEFINE(name, type_or_group, sink_type) \ diff --git a/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp b/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp index 8fc8b29d19d..41afa4c8637 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -58,12 +59,18 @@ void BM_parq_read_varying_input(benchmark::State& state) cudf_io::parquet_reader_options read_opts = cudf_io::parquet_reader_options::builder(source_sink.make_source_info()); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); + cudf::memory_tracking_resource tracking_mr(mr); + + rmm::mr::set_current_device_resource(&tracking_mr); for (auto _ : state) { cuda_event_timer const raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::read_parquet(read_opts); } + rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); + state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); } std::vector get_col_names(std::vector const& parquet_data) @@ -112,6 +119,10 @@ void BM_parq_read_varying_options(benchmark::State& state) auto const num_row_groups = data_size / (128 << 20); cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks; + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); + cudf::memory_tracking_resource tracking_mr(mr); + + rmm::mr::set_current_device_resource(&tracking_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 @@ -141,8 +152,11 @@ void BM_parq_read_varying_options(benchmark::State& state) CUDF_EXPECTS(rows_read == view.num_rows(), "Benchmark did not read the entire table"); } + rmm::mr::set_current_device_resource(mr); + auto const data_processed = data_size * cols_to_read.size() / view.num_columns(); state.SetBytesProcessed(data_processed * state.iterations()); + state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); } #define PARQ_RD_BM_INPUTS_DEFINE(name, type_or_group, src_type) \ diff --git a/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp b/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp index 83ff0021516..ec8da1b6e4d 100644 --- a/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp +++ b/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 5c90e76a5a46b3e1477bd886b99eaae1d431fd4f Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Wed, 31 Mar 2021 14:38:40 +0530 Subject: [PATCH 05/10] Update docs --- cpp/benchmarks/common/memory_tracking_resource.hpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/cpp/benchmarks/common/memory_tracking_resource.hpp b/cpp/benchmarks/common/memory_tracking_resource.hpp index 2482c0b5d0f..cbdb47bde73 100644 --- a/cpp/benchmarks/common/memory_tracking_resource.hpp +++ b/cpp/benchmarks/common/memory_tracking_resource.hpp @@ -91,9 +91,6 @@ class memory_tracking_resource final : public rmm::mr::device_memory_resource { * * The returned pointer has at least 256B alignment. * - * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled - * by the upstream resource. - * * @param bytes The size, in bytes, of the allocation * @param stream Stream on which to perform the allocation * @return void* Pointer to the newly allocated memory @@ -124,8 +121,6 @@ class memory_tracking_resource final : public rmm::mr::device_memory_resource { /** * @brief Compare the upstream resource to another. * - * @throws Nothing. - * * @param other The other resource to compare to * @return true If the two resources are equivalent * @return false If the two resources are not equal @@ -147,8 +142,6 @@ class memory_tracking_resource final : public rmm::mr::device_memory_resource { /** * @brief Get free and available memory from upstream resource. * - * @throws `rmm::cuda_error` if unable to retrieve memory info. - * * @param stream Stream on which to get the mem info. * @return std::pair contaiing free_size and total_size of memory */ From 30a921cf0dee6605363b3c13a1b65d9dee00d041 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Fri, 2 Jul 2021 00:25:51 +0530 Subject: [PATCH 06/10] Move to new statistics resource --- .../common/memory_tracking_resource.hpp | 160 ------------------ .../io/csv/csv_reader_benchmark.cpp | 15 +- .../io/csv/csv_writer_benchmark.cpp | 15 +- .../io/orc/orc_reader_benchmark.cpp | 15 +- .../io/orc/orc_writer_benchmark.cpp | 15 +- .../io/parquet/parquet_reader_benchmark.cpp | 15 +- .../io/parquet/parquet_writer_benchmark.cpp | 17 +- .../parquet_writer_chunks_benchmark.cpp | 14 ++ 8 files changed, 64 insertions(+), 202 deletions(-) delete mode 100644 cpp/benchmarks/common/memory_tracking_resource.hpp diff --git a/cpp/benchmarks/common/memory_tracking_resource.hpp b/cpp/benchmarks/common/memory_tracking_resource.hpp deleted file mode 100644 index cbdb47bde73..00000000000 --- a/cpp/benchmarks/common/memory_tracking_resource.hpp +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace cudf { - -/** - * @brief Resource that uses `Upstream` to allocate memory and tracks the current and peak memory - * allocated using this resource - * - * An instance of this resource can be constructed with an existing, upstream resource in order to - * satisfy allocation requests and track memory use. - * - * @tparam Upstream Type of the upstream resource used for allocation/deallocation. - */ -template -class memory_tracking_resource final : public rmm::mr::device_memory_resource { - public: - /** - * @brief Construct a new tracking resource adaptor using `upstream` to satisfy allocation - * requests and tracking information about each allocation/free to the members - * current_allocated_size_ and max_allocated_size_. - * - * @throws `rmm::logic_error` if `upstream == nullptr` - * - * @param upstream The resource used for allocating/deallocating device memory - */ - memory_tracking_resource(Upstream* upstream) : upstream_{upstream} - { - RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer."); - } - - memory_tracking_resource() = delete; - ~memory_tracking_resource() = default; - memory_tracking_resource(memory_tracking_resource const&) = delete; - memory_tracking_resource(memory_tracking_resource&&) = default; - memory_tracking_resource& operator=(memory_tracking_resource const&) = delete; - memory_tracking_resource& operator=(memory_tracking_resource&&) = default; - - /** - * @brief Return pointer to the upstream resource. - * - * @return Upstream* Pointer to the upstream resource. - */ - Upstream* get_upstream() const noexcept { return upstream_; } - - /** - * @brief Checks whether the upstream resource supports streams. - * - * @return true The upstream resource supports streams - * @return false The upstream resource does not support streams. - */ - bool supports_streams() const noexcept override { return upstream_->supports_streams(); } - - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return bool true if the upstream resource supports get_mem_info, false otherwise. - */ - bool supports_get_mem_info() const noexcept override - { - return upstream_->supports_get_mem_info(); - } - - size_t max_allocated_size() const noexcept { return max_allocated_size_; } - size_t current_allocated_size() const noexcept { return current_allocated_size_; } - - private: - /** - * @brief Allocates memory of size at least `bytes` using the upstream resource and updates the - * size of memory in use. - * - * If the upstream allocation is successful updates the current total memory and peak memory - * allocated with this resource - * - * The returned pointer has at least 256B alignment. - * - * @param bytes The size, in bytes, of the allocation - * @param stream Stream on which to perform the allocation - * @return void* Pointer to the newly allocated memory - */ - void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override - { - auto const p = upstream_->allocate(bytes, stream); - current_allocated_size_ += bytes; - max_allocated_size_ = std::max(current_allocated_size_, max_allocated_size_); - return p; - } - - /** - * @brief Free allocation of size `bytes` pointed to by `p` and log the deallocation. - * - * Updates the current total memory and peak memory allocated with this resource - * - * @param p Pointer to be deallocated - * @param bytes Size of the allocation - * @param stream Stream on which to perform the deallocation - */ - void do_deallocate(void* p, std::size_t bytes, rmm::cuda_stream_view stream) override - { - current_allocated_size_ -= bytes; - upstream_->deallocate(p, bytes, stream); - } - - /** - * @brief Compare the upstream resource to another. - * - * @param other The other resource to compare to - * @return true If the two resources are equivalent - * @return false If the two resources are not equal - */ - bool do_is_equal(device_memory_resource const& other) const noexcept override - { - if (this == &other) - return true; - else { - memory_tracking_resource const* cast = - dynamic_cast const*>(&other); - if (cast != nullptr) - return upstream_->is_equal(*cast->get_upstream()); - else - return upstream_->is_equal(other); - } - } - - /** - * @brief Get free and available memory from upstream resource. - * - * @param stream Stream on which to get the mem info. - * @return std::pair contaiing free_size and total_size of memory - */ - std::pair do_get_mem_info(rmm::cuda_stream_view stream) const override - { - return upstream_->get_mem_info(stream); - } - - size_t current_allocated_size_ = 0; - size_t max_allocated_size_ = 0; - - Upstream* upstream_; ///< The upstream resource used for satisfying - ///< allocation requests -}; - -} // namespace cudf diff --git a/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp b/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp index b3b4996a934..8169d4c060e 100644 --- a/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp +++ b/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp @@ -17,13 +17,14 @@ #include #include -#include #include #include #include #include +#include + // to enable, run cmake with -DBUILD_BENCHMARKS=ON constexpr size_t data_size = 256 << 20; @@ -53,9 +54,9 @@ void BM_csv_read_varying_input(benchmark::State& state) cudf_io::csv_reader_options::builder(source_sink.make_source_info()); rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - cudf::memory_tracking_resource tracking_mr(mr); + auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - rmm::mr::set_current_device_resource(&tracking_mr); + rmm::mr::set_current_device_resource(&statistics_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::read_csv(read_options); @@ -63,7 +64,7 @@ void BM_csv_read_varying_input(benchmark::State& state) rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); - state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); + state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; } void BM_csv_read_varying_options(benchmark::State& state) @@ -102,9 +103,9 @@ void BM_csv_read_varying_options(benchmark::State& state) size_t const chunk_size = csv_data.size() / num_chunks; cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks; rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - cudf::memory_tracking_resource tracking_mr(mr); + auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - rmm::mr::set_current_device_resource(&tracking_mr); + rmm::mr::set_current_device_resource(&statistics_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 for (int32_t chunk = 0; chunk < num_chunks; ++chunk) { @@ -139,7 +140,7 @@ void BM_csv_read_varying_options(benchmark::State& state) auto const data_processed = data_size * cols_to_read.size() / view.num_columns(); state.SetBytesProcessed(data_processed * state.iterations()); - state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); + state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; } #define CSV_RD_BM_INPUTS_DEFINE(name, type_or_group, src_type) \ diff --git a/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp b/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp index 026b852ebd5..42e12c2d265 100644 --- a/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp +++ b/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp @@ -17,13 +17,14 @@ #include #include -#include #include #include #include #include +#include + // to enable, run cmake with -DBUILD_BENCHMARKS=ON constexpr size_t data_size = 256 << 20; @@ -44,9 +45,9 @@ void BM_csv_write_varying_inout(benchmark::State& state) cuio_source_sink_pair source_sink(sink_type); rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - cudf::memory_tracking_resource tracking_mr(mr); + auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - rmm::mr::set_current_device_resource(&tracking_mr); + rmm::mr::set_current_device_resource(&statistics_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::csv_writer_options options = @@ -58,7 +59,7 @@ void BM_csv_write_varying_inout(benchmark::State& state) rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); - state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); + state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; } void BM_csv_write_varying_options(benchmark::State& state) @@ -77,9 +78,9 @@ void BM_csv_write_varying_options(benchmark::State& state) std::string const na_per(na_per_len, '#'); std::vector csv_data; rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - cudf::memory_tracking_resource tracking_mr(mr); + auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - rmm::mr::set_current_device_resource(&tracking_mr); + rmm::mr::set_current_device_resource(&statistics_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::csv_writer_options options = @@ -92,7 +93,7 @@ void BM_csv_write_varying_options(benchmark::State& state) rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); - state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); + state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; } #define CSV_WR_BM_INOUTS_DEFINE(name, type_or_group, sink_type) \ diff --git a/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp b/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp index 79ce6bed3ce..77d929aecaa 100644 --- a/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp @@ -17,13 +17,14 @@ #include #include -#include #include #include #include #include +#include + // to enable, run cmake with -DBUILD_BENCHMARKS=ON constexpr int64_t data_size = 512 << 20; @@ -60,9 +61,9 @@ void BM_orc_read_varying_input(benchmark::State& state) cudf_io::orc_reader_options::builder(source_sink.make_source_info()); rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - cudf::memory_tracking_resource tracking_mr(mr); + auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - rmm::mr::set_current_device_resource(&tracking_mr); + rmm::mr::set_current_device_resource(&statistics_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::read_orc(read_opts); @@ -70,7 +71,7 @@ void BM_orc_read_varying_input(benchmark::State& state) rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); - state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); + state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; } std::vector get_col_names(std::vector const& orc_data) @@ -118,9 +119,9 @@ void BM_orc_read_varying_options(benchmark::State& state) auto const num_stripes = data_size / (64 << 20); cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks; rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - cudf::memory_tracking_resource tracking_mr(mr); + auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - rmm::mr::set_current_device_resource(&tracking_mr); + rmm::mr::set_current_device_resource(&statistics_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 @@ -154,7 +155,7 @@ void BM_orc_read_varying_options(benchmark::State& state) auto const data_processed = data_size * cols_to_read.size() / view.num_columns(); state.SetBytesProcessed(data_processed * state.iterations()); - state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); + state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; } #define ORC_RD_BM_INPUTS_DEFINE(name, type_or_group, src_type) \ diff --git a/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp b/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp index fa20ea8e96a..2a5fe7941c8 100644 --- a/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp +++ b/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp @@ -17,13 +17,14 @@ #include #include -#include #include #include #include #include +#include + // to enable, run cmake with -DBUILD_BENCHMARKS=ON constexpr int64_t data_size = 512 << 20; @@ -52,9 +53,9 @@ void BM_orc_write_varying_inout(benchmark::State& state) cuio_source_sink_pair source_sink(sink_type); rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - cudf::memory_tracking_resource tracking_mr(mr); + auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - rmm::mr::set_current_device_resource(&tracking_mr); + rmm::mr::set_current_device_resource(&statistics_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::orc_writer_options options = @@ -65,7 +66,7 @@ void BM_orc_write_varying_inout(benchmark::State& state) rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); - state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); + state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; } void BM_orc_write_varying_options(benchmark::State& state) @@ -83,9 +84,9 @@ void BM_orc_write_varying_options(benchmark::State& state) cuio_source_sink_pair source_sink(io_type::FILEPATH); rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - cudf::memory_tracking_resource tracking_mr(mr); + auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - rmm::mr::set_current_device_resource(&tracking_mr); + rmm::mr::set_current_device_resource(&statistics_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::orc_writer_options const options = @@ -97,7 +98,7 @@ void BM_orc_write_varying_options(benchmark::State& state) rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); - state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); + state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; } #define ORC_WR_BM_INOUTS_DEFINE(name, type_or_group, sink_type) \ diff --git a/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp b/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp index 41afa4c8637..41b073db36c 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp @@ -17,13 +17,14 @@ #include #include -#include #include #include #include #include +#include + // to enable, run cmake with -DBUILD_BENCHMARKS=ON constexpr size_t data_size = 512 << 20; @@ -60,9 +61,9 @@ void BM_parq_read_varying_input(benchmark::State& state) cudf_io::parquet_reader_options::builder(source_sink.make_source_info()); rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - cudf::memory_tracking_resource tracking_mr(mr); + auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - rmm::mr::set_current_device_resource(&tracking_mr); + rmm::mr::set_current_device_resource(&statistics_mr); for (auto _ : state) { cuda_event_timer const raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::read_parquet(read_opts); @@ -70,7 +71,7 @@ void BM_parq_read_varying_input(benchmark::State& state) rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); - state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); + state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; } std::vector get_col_names(std::vector const& parquet_data) @@ -120,9 +121,9 @@ void BM_parq_read_varying_options(benchmark::State& state) auto const num_row_groups = data_size / (128 << 20); cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks; rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - cudf::memory_tracking_resource tracking_mr(mr); + auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - rmm::mr::set_current_device_resource(&tracking_mr); + rmm::mr::set_current_device_resource(&statistics_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 @@ -156,7 +157,7 @@ void BM_parq_read_varying_options(benchmark::State& state) auto const data_processed = data_size * cols_to_read.size() / view.num_columns(); state.SetBytesProcessed(data_processed * state.iterations()); - state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); + state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; } #define PARQ_RD_BM_INPUTS_DEFINE(name, type_or_group, src_type) \ diff --git a/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp b/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp index ec8da1b6e4d..02b2bbec3b0 100644 --- a/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp +++ b/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp @@ -17,13 +17,16 @@ #include #include -#include #include #include #include #include +#include + +#include + // to enable, run cmake with -DBUILD_BENCHMARKS=ON constexpr size_t data_size = 512 << 20; @@ -52,9 +55,9 @@ void BM_parq_write_varying_inout(benchmark::State& state) cuio_source_sink_pair source_sink(sink_type); rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - cudf::memory_tracking_resource tracking_mr(mr); + auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - rmm::mr::set_current_device_resource(&tracking_mr); + rmm::mr::set_current_device_resource(&statistics_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::parquet_writer_options opts = @@ -65,7 +68,7 @@ void BM_parq_write_varying_inout(benchmark::State& state) rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); - state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); + state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; } void BM_parq_write_varying_options(benchmark::State& state) @@ -84,9 +87,9 @@ void BM_parq_write_varying_options(benchmark::State& state) cuio_source_sink_pair source_sink(io_type::FILEPATH); rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - cudf::memory_tracking_resource tracking_mr(mr); + auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - rmm::mr::set_current_device_resource(&tracking_mr); + rmm::mr::set_current_device_resource(&statistics_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::parquet_writer_options const options = @@ -99,7 +102,7 @@ void BM_parq_write_varying_options(benchmark::State& state) rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); - state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size(); + state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; } #define PARQ_WR_BM_INOUTS_DEFINE(name, type_or_group, sink_type) \ diff --git a/cpp/benchmarks/io/parquet/parquet_writer_chunks_benchmark.cpp b/cpp/benchmarks/io/parquet/parquet_writer_chunks_benchmark.cpp index b38dda4d17e..f30cfa64768 100644 --- a/cpp/benchmarks/io/parquet/parquet_writer_chunks_benchmark.cpp +++ b/cpp/benchmarks/io/parquet/parquet_writer_chunks_benchmark.cpp @@ -29,6 +29,8 @@ #include +#include + // to enable, run cmake with -DBUILD_BENCHMARKS=ON constexpr int64_t data_size = 512 << 20; @@ -47,14 +49,20 @@ void PQ_write(benchmark::State& state) auto tbl = create_random_table({cudf::type_id::INT32}, num_cols, table_size_bytes{data_size}); cudf::table_view view = tbl->view(); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); + auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); + + rmm::mr::set_current_device_resource(&statistics_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::parquet_writer_options opts = cudf_io::parquet_writer_options::builder(cudf_io::sink_info(), view); cudf_io::write_parquet(opts); } + rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(static_cast(state.iterations()) * state.range(0)); + state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; } void PQ_write_chunked(benchmark::State& state) @@ -68,6 +76,10 @@ void PQ_write_chunked(benchmark::State& state) {cudf::type_id::INT32}, num_cols, table_size_bytes{size_t(data_size / num_tables)})); } + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); + auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); + + rmm::mr::set_current_device_resource(&statistics_mr); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::chunked_parquet_writer_options opts = @@ -78,8 +90,10 @@ void PQ_write_chunked(benchmark::State& state) }); writer.close(); } + rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(static_cast(state.iterations()) * state.range(0)); + state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; } #define PWBM_BENCHMARK_DEFINE(name, size, num_columns) \ From 8d85fa9bf214255ecee8bf620e0546c480a32074 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Sat, 10 Jul 2021 01:26:57 +0530 Subject: [PATCH 07/10] Proposed RAII statistics resource wrapper --- cpp/benchmarks/fixture/benchmark_fixture.hpp | 19 +++++++++++++++++++ .../io/parquet/parquet_writer_benchmark.cpp | 12 ++---------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/cpp/benchmarks/fixture/benchmark_fixture.hpp b/cpp/benchmarks/fixture/benchmark_fixture.hpp index 7ca2300543d..8bfd9bfdfaa 100644 --- a/cpp/benchmarks/fixture/benchmark_fixture.hpp +++ b/cpp/benchmarks/fixture/benchmark_fixture.hpp @@ -21,6 +21,7 @@ #include #include #include +#include namespace cudf { @@ -90,4 +91,22 @@ class benchmark : public ::benchmark::Fixture { std::shared_ptr mr; }; +class memory_stats_logger { + public: + memory_stats_logger() + : existing_mr(rmm::mr::get_current_device_resource()), + statistics_mr(rmm::mr::make_statistics_adaptor(existing_mr)) + { + rmm::mr::set_current_device_resource(&statistics_mr); + } + + ~memory_stats_logger() { rmm::mr::set_current_device_resource(existing_mr); } + + size_t peak_memory_usage() { return statistics_mr.get_bytes_counter().peak; } + + private: + rmm::mr::device_memory_resource* existing_mr; + rmm::mr::statistics_resource_adaptor statistics_mr; +}; + } // namespace cudf diff --git a/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp b/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp index 02b2bbec3b0..4d278a47f79 100644 --- a/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp +++ b/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp @@ -23,10 +23,6 @@ #include -#include - -#include - // to enable, run cmake with -DBUILD_BENCHMARKS=ON constexpr size_t data_size = 512 << 20; @@ -54,10 +50,7 @@ void BM_parq_write_varying_inout(benchmark::State& state) auto const view = tbl->view(); cuio_source_sink_pair source_sink(sink_type); - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - - rmm::mr::set_current_device_resource(&statistics_mr); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::parquet_writer_options opts = @@ -65,10 +58,9 @@ void BM_parq_write_varying_inout(benchmark::State& state) .compression(compression); cudf_io::write_parquet(opts); } - rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); - state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } void BM_parq_write_varying_options(benchmark::State& state) From 804cb80e33c2c23789c092ad7b72afadcc725757 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Wed, 14 Jul 2021 01:32:17 +0530 Subject: [PATCH 08/10] Change all io benchmarks to use RAII stats logger --- cpp/benchmarks/io/csv/csv_reader_benchmark.cpp | 16 ++++------------ cpp/benchmarks/io/csv/csv_writer_benchmark.cpp | 16 ++++------------ cpp/benchmarks/io/orc/orc_reader_benchmark.cpp | 16 ++++------------ cpp/benchmarks/io/orc/orc_writer_benchmark.cpp | 16 ++++------------ .../io/parquet/parquet_reader_benchmark.cpp | 16 ++++------------ .../io/parquet/parquet_writer_benchmark.cpp | 8 ++------ .../parquet/parquet_writer_chunks_benchmark.cpp | 16 ++++------------ 7 files changed, 26 insertions(+), 78 deletions(-) diff --git a/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp b/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp index 8169d4c060e..b796b284d0a 100644 --- a/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp +++ b/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp @@ -53,18 +53,14 @@ void BM_csv_read_varying_input(benchmark::State& state) cudf_io::csv_reader_options const read_options = cudf_io::csv_reader_options::builder(source_sink.make_source_info()); - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - - rmm::mr::set_current_device_resource(&statistics_mr); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::read_csv(read_options); } - rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); - state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } void BM_csv_read_varying_options(benchmark::State& state) @@ -102,10 +98,7 @@ void BM_csv_read_varying_options(benchmark::State& state) size_t const chunk_size = csv_data.size() / num_chunks; cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks; - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - - rmm::mr::set_current_device_resource(&statistics_mr); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 for (int32_t chunk = 0; chunk < num_chunks; ++chunk) { @@ -136,11 +129,10 @@ void BM_csv_read_varying_options(benchmark::State& state) cudf_io::read_csv(read_options); } } - rmm::mr::set_current_device_resource(mr); auto const data_processed = data_size * cols_to_read.size() / view.num_columns(); state.SetBytesProcessed(data_processed * state.iterations()); - state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } #define CSV_RD_BM_INPUTS_DEFINE(name, type_or_group, src_type) \ diff --git a/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp b/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp index 42e12c2d265..c25b56f88dd 100644 --- a/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp +++ b/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp @@ -44,10 +44,7 @@ void BM_csv_write_varying_inout(benchmark::State& state) auto const view = tbl->view(); cuio_source_sink_pair source_sink(sink_type); - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - - rmm::mr::set_current_device_resource(&statistics_mr); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::csv_writer_options options = @@ -56,10 +53,9 @@ void BM_csv_write_varying_inout(benchmark::State& state) .rows_per_chunk(1 << 14); // TODO: remove once default is sensible cudf_io::write_csv(options); } - rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); - state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } void BM_csv_write_varying_options(benchmark::State& state) @@ -77,10 +73,7 @@ void BM_csv_write_varying_options(benchmark::State& state) std::string const na_per(na_per_len, '#'); std::vector csv_data; - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - - rmm::mr::set_current_device_resource(&statistics_mr); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::csv_writer_options options = @@ -90,10 +83,9 @@ void BM_csv_write_varying_options(benchmark::State& state) .rows_per_chunk(rows_per_chunk); cudf_io::write_csv(options); } - rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); - state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } #define CSV_WR_BM_INOUTS_DEFINE(name, type_or_group, sink_type) \ diff --git a/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp b/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp index 77d929aecaa..03f5832e500 100644 --- a/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp @@ -60,18 +60,14 @@ void BM_orc_read_varying_input(benchmark::State& state) cudf_io::orc_reader_options read_opts = cudf_io::orc_reader_options::builder(source_sink.make_source_info()); - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - - rmm::mr::set_current_device_resource(&statistics_mr); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::read_orc(read_opts); } - rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); - state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } std::vector get_col_names(std::vector const& orc_data) @@ -118,10 +114,7 @@ void BM_orc_read_varying_options(benchmark::State& state) auto const num_stripes = data_size / (64 << 20); cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks; - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - - rmm::mr::set_current_device_resource(&statistics_mr); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 @@ -151,11 +144,10 @@ void BM_orc_read_varying_options(benchmark::State& state) CUDF_EXPECTS(rows_read == view.num_rows(), "Benchmark did not read the entire table"); } - rmm::mr::set_current_device_resource(mr); auto const data_processed = data_size * cols_to_read.size() / view.num_columns(); state.SetBytesProcessed(data_processed * state.iterations()); - state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } #define ORC_RD_BM_INPUTS_DEFINE(name, type_or_group, src_type) \ diff --git a/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp b/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp index 2a5fe7941c8..fbd560071bd 100644 --- a/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp +++ b/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp @@ -52,10 +52,7 @@ void BM_orc_write_varying_inout(benchmark::State& state) auto const view = tbl->view(); cuio_source_sink_pair source_sink(sink_type); - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - - rmm::mr::set_current_device_resource(&statistics_mr); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::orc_writer_options options = @@ -63,10 +60,9 @@ void BM_orc_write_varying_inout(benchmark::State& state) .compression(compression); cudf_io::write_orc(options); } - rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); - state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } void BM_orc_write_varying_options(benchmark::State& state) @@ -83,10 +79,7 @@ void BM_orc_write_varying_options(benchmark::State& state) auto const view = tbl->view(); cuio_source_sink_pair source_sink(io_type::FILEPATH); - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - - rmm::mr::set_current_device_resource(&statistics_mr); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::orc_writer_options const options = @@ -95,10 +88,9 @@ void BM_orc_write_varying_options(benchmark::State& state) .enable_statistics(enable_stats); cudf_io::write_orc(options); } - rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); - state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } #define ORC_WR_BM_INOUTS_DEFINE(name, type_or_group, sink_type) \ diff --git a/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp b/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp index 41b073db36c..42c8b1dc641 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp @@ -60,18 +60,14 @@ void BM_parq_read_varying_input(benchmark::State& state) cudf_io::parquet_reader_options read_opts = cudf_io::parquet_reader_options::builder(source_sink.make_source_info()); - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - - rmm::mr::set_current_device_resource(&statistics_mr); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer const raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::read_parquet(read_opts); } - rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); - state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } std::vector get_col_names(std::vector const& parquet_data) @@ -120,10 +116,7 @@ void BM_parq_read_varying_options(benchmark::State& state) auto const num_row_groups = data_size / (128 << 20); cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks; - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - - rmm::mr::set_current_device_resource(&statistics_mr); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 @@ -153,11 +146,10 @@ void BM_parq_read_varying_options(benchmark::State& state) CUDF_EXPECTS(rows_read == view.num_rows(), "Benchmark did not read the entire table"); } - rmm::mr::set_current_device_resource(mr); auto const data_processed = data_size * cols_to_read.size() / view.num_columns(); state.SetBytesProcessed(data_processed * state.iterations()); - state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } #define PARQ_RD_BM_INPUTS_DEFINE(name, type_or_group, src_type) \ diff --git a/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp b/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp index 4d278a47f79..b4c11179c35 100644 --- a/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp +++ b/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp @@ -78,10 +78,7 @@ void BM_parq_write_varying_options(benchmark::State& state) auto const view = tbl->view(); cuio_source_sink_pair source_sink(io_type::FILEPATH); - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - - rmm::mr::set_current_device_resource(&statistics_mr); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::parquet_writer_options const options = @@ -91,10 +88,9 @@ void BM_parq_write_varying_options(benchmark::State& state) .column_chunks_file_path(file_path); cudf_io::write_parquet(options); } - rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(data_size * state.iterations()); - state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } #define PARQ_WR_BM_INOUTS_DEFINE(name, type_or_group, sink_type) \ diff --git a/cpp/benchmarks/io/parquet/parquet_writer_chunks_benchmark.cpp b/cpp/benchmarks/io/parquet/parquet_writer_chunks_benchmark.cpp index f30cfa64768..90758088a4f 100644 --- a/cpp/benchmarks/io/parquet/parquet_writer_chunks_benchmark.cpp +++ b/cpp/benchmarks/io/parquet/parquet_writer_chunks_benchmark.cpp @@ -49,20 +49,16 @@ void PQ_write(benchmark::State& state) auto tbl = create_random_table({cudf::type_id::INT32}, num_cols, table_size_bytes{data_size}); cudf::table_view view = tbl->view(); - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - - rmm::mr::set_current_device_resource(&statistics_mr); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::parquet_writer_options opts = cudf_io::parquet_writer_options::builder(cudf_io::sink_info(), view); cudf_io::write_parquet(opts); } - rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(static_cast(state.iterations()) * state.range(0)); - state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } void PQ_write_chunked(benchmark::State& state) @@ -76,10 +72,7 @@ void PQ_write_chunked(benchmark::State& state) {cudf::type_id::INT32}, num_cols, table_size_bytes{size_t(data_size / num_tables)})); } - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - auto statistics_mr = rmm::mr::make_statistics_adaptor(mr); - - rmm::mr::set_current_device_resource(&statistics_mr); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::chunked_parquet_writer_options opts = @@ -90,10 +83,9 @@ void PQ_write_chunked(benchmark::State& state) }); writer.close(); } - rmm::mr::set_current_device_resource(mr); state.SetBytesProcessed(static_cast(state.iterations()) * state.range(0)); - state.counters["peak_memory_usage"] = statistics_mr.get_bytes_counter().peak; + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } #define PWBM_BENCHMARK_DEFINE(name, size, num_columns) \ From 4662f6e2855989f8645cab72adf66062a85d590c Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Fri, 16 Jul 2021 23:16:49 +0530 Subject: [PATCH 09/10] Remove extra header --- cpp/benchmarks/io/csv/csv_reader_benchmark.cpp | 2 -- cpp/benchmarks/io/csv/csv_writer_benchmark.cpp | 2 -- cpp/benchmarks/io/orc/orc_reader_benchmark.cpp | 2 -- cpp/benchmarks/io/orc/orc_writer_benchmark.cpp | 2 -- cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp | 2 -- cpp/benchmarks/io/parquet/parquet_writer_chunks_benchmark.cpp | 2 -- 6 files changed, 12 deletions(-) diff --git a/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp b/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp index b796b284d0a..3f5549a3148 100644 --- a/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp +++ b/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp @@ -23,8 +23,6 @@ #include -#include - // to enable, run cmake with -DBUILD_BENCHMARKS=ON constexpr size_t data_size = 256 << 20; diff --git a/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp b/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp index c25b56f88dd..fdd7c63eece 100644 --- a/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp +++ b/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp @@ -23,8 +23,6 @@ #include -#include - // to enable, run cmake with -DBUILD_BENCHMARKS=ON constexpr size_t data_size = 256 << 20; diff --git a/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp b/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp index 03f5832e500..549605fbaee 100644 --- a/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp @@ -23,8 +23,6 @@ #include -#include - // to enable, run cmake with -DBUILD_BENCHMARKS=ON constexpr int64_t data_size = 512 << 20; diff --git a/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp b/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp index fbd560071bd..de5dd2c7b9d 100644 --- a/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp +++ b/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp @@ -23,8 +23,6 @@ #include -#include - // to enable, run cmake with -DBUILD_BENCHMARKS=ON constexpr int64_t data_size = 512 << 20; diff --git a/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp b/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp index 42c8b1dc641..045aa0e043b 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp @@ -23,8 +23,6 @@ #include -#include - // to enable, run cmake with -DBUILD_BENCHMARKS=ON constexpr size_t data_size = 512 << 20; diff --git a/cpp/benchmarks/io/parquet/parquet_writer_chunks_benchmark.cpp b/cpp/benchmarks/io/parquet/parquet_writer_chunks_benchmark.cpp index 90758088a4f..0041af80a15 100644 --- a/cpp/benchmarks/io/parquet/parquet_writer_chunks_benchmark.cpp +++ b/cpp/benchmarks/io/parquet/parquet_writer_chunks_benchmark.cpp @@ -29,8 +29,6 @@ #include -#include - // to enable, run cmake with -DBUILD_BENCHMARKS=ON constexpr int64_t data_size = 512 << 20; From baa41504b203072b1656c334730000cd7e443b7e Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Sat, 17 Jul 2021 01:46:46 +0530 Subject: [PATCH 10/10] Update cpp/benchmarks/fixture/benchmark_fixture.hpp Co-authored-by: Vukasin Milovanovic --- cpp/benchmarks/fixture/benchmark_fixture.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/fixture/benchmark_fixture.hpp b/cpp/benchmarks/fixture/benchmark_fixture.hpp index 8bfd9bfdfaa..8476a137c12 100644 --- a/cpp/benchmarks/fixture/benchmark_fixture.hpp +++ b/cpp/benchmarks/fixture/benchmark_fixture.hpp @@ -102,7 +102,7 @@ class memory_stats_logger { ~memory_stats_logger() { rmm::mr::set_current_device_resource(existing_mr); } - size_t peak_memory_usage() { return statistics_mr.get_bytes_counter().peak; } + size_t peak_memory_usage() const noexcept { return statistics_mr.get_bytes_counter().peak; } private: rmm::mr::device_memory_resource* existing_mr;