Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add peak memory usage tracking to cuIO benchmarks #7770

Merged
merged 14 commits into from
Jul 19, 2021
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 160 additions & 0 deletions cpp/benchmarks/common/memory_tracking_resource.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <rmm/mr/device/device_memory_resource.hpp>

namespace cudf {

/**
* @brief Resource that uses `Upstream` to allocate memory and tracks the current and peak memory
* allocated using this resource
*
* An instance of this resource can be constructed with an existing, upstream resource in order to
* satisfy allocation requests and track memory use.
*
* @tparam Upstream Type of the upstream resource used for allocation/deallocation.
*/
template <typename Upstream>
class memory_tracking_resource final : public rmm::mr::device_memory_resource {
public:
/**
* @brief Construct a new tracking resource adaptor using `upstream` to satisfy allocation
* requests and tracking information about each allocation/free to the members
* current_allocated_size_ and max_allocated_size_.
*
* @throws `rmm::logic_error` if `upstream == nullptr`
*
* @param upstream The resource used for allocating/deallocating device memory
*/
memory_tracking_resource(Upstream* upstream) : upstream_{upstream}
{
RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
}

memory_tracking_resource() = delete;
~memory_tracking_resource() = default;
memory_tracking_resource(memory_tracking_resource const&) = delete;
memory_tracking_resource(memory_tracking_resource&&) = default;
memory_tracking_resource& operator=(memory_tracking_resource const&) = delete;
memory_tracking_resource& operator=(memory_tracking_resource&&) = default;

/**
* @brief Return pointer to the upstream resource.
*
* @return Upstream* Pointer to the upstream resource.
*/
Upstream* get_upstream() const noexcept { return upstream_; }

/**
* @brief Checks whether the upstream resource supports streams.
*
* @return true The upstream resource supports streams
* @return false The upstream resource does not support streams.
*/
bool supports_streams() const noexcept override { return upstream_->supports_streams(); }

/**
* @brief Query whether the resource supports the get_mem_info API.
*
* @return bool true if the upstream resource supports get_mem_info, false otherwise.
*/
bool supports_get_mem_info() const noexcept override
{
return upstream_->supports_get_mem_info();
}

size_t max_allocated_size() const noexcept { return max_allocated_size_; }
size_t current_allocated_size() const noexcept { return current_allocated_size_; }

private:
/**
* @brief Allocates memory of size at least `bytes` using the upstream resource and updates the
* size of memory in use.
*
* If the upstream allocation is successful updates the current total memory and peak memory
* allocated with this resource
*
* The returned pointer has at least 256B alignment.
*
* @param bytes The size, in bytes, of the allocation
* @param stream Stream on which to perform the allocation
* @return void* Pointer to the newly allocated memory
*/
void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override
{
auto const p = upstream_->allocate(bytes, stream);
current_allocated_size_ += bytes;
max_allocated_size_ = std::max(current_allocated_size_, max_allocated_size_);
return p;
}

/**
* @brief Free allocation of size `bytes` pointed to by `p` and log the deallocation.
*
* Updates the current total memory and peak memory allocated with this resource
*
* @param p Pointer to be deallocated
* @param bytes Size of the allocation
* @param stream Stream on which to perform the deallocation
*/
void do_deallocate(void* p, std::size_t bytes, rmm::cuda_stream_view stream) override
{
current_allocated_size_ -= bytes;
upstream_->deallocate(p, bytes, stream);
}

/**
* @brief Compare the upstream resource to another.
*
* @param other The other resource to compare to
* @return true If the two resources are equivalent
* @return false If the two resources are not equal
*/
bool do_is_equal(device_memory_resource const& other) const noexcept override
{
if (this == &other)
return true;
else {
memory_tracking_resource<Upstream> const* cast =
dynamic_cast<memory_tracking_resource<Upstream> const*>(&other);
if (cast != nullptr)
return upstream_->is_equal(*cast->get_upstream());
else
return upstream_->is_equal(other);
}
}

/**
* @brief Get free and available memory from upstream resource.
*
* @param stream Stream on which to get the mem info.
* @return std::pair contaiing free_size and total_size of memory
*/
std::pair<size_t, size_t> do_get_mem_info(rmm::cuda_stream_view stream) const override
{
return upstream_->get_mem_info(stream);
}

size_t current_allocated_size_ = 0;
size_t max_allocated_size_ = 0;

Upstream* upstream_; ///< The upstream resource used for satisfying
///< allocation requests
};

} // namespace cudf
4 changes: 3 additions & 1 deletion cpp/benchmarks/fixture/benchmark_fixture.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,6 +14,8 @@
* limitations under the License.
*/

#pragma once
devavret marked this conversation as resolved.
Show resolved Hide resolved

#include <benchmark/benchmark.h>
#include <rmm/mr/device/cuda_memory_resource.hpp>
#include <rmm/mr/device/owning_wrapper.hpp>
Expand Down
16 changes: 15 additions & 1 deletion cpp/benchmarks/io/csv/csv_reader_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,6 +17,7 @@
#include <benchmark/benchmark.h>

#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/common/memory_tracking_resource.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/io/cuio_benchmark_common.hpp>
#include <benchmarks/synchronization/synchronization.hpp>
Expand Down Expand Up @@ -51,12 +52,18 @@ void BM_csv_read_varying_input(benchmark::State& state)
cudf_io::csv_reader_options const read_options =
cudf_io::csv_reader_options::builder(source_sink.make_source_info());

rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource();
cudf::memory_tracking_resource<rmm::mr::device_memory_resource> tracking_mr(mr);

rmm::mr::set_current_device_resource(&tracking_mr);
for (auto _ : state) {
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
cudf_io::read_csv(read_options);
}
rmm::mr::set_current_device_resource(mr);

state.SetBytesProcessed(data_size * state.iterations());
state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size();
}

void BM_csv_read_varying_options(benchmark::State& state)
Expand Down Expand Up @@ -94,6 +101,10 @@ void BM_csv_read_varying_options(benchmark::State& state)

size_t const chunk_size = csv_data.size() / num_chunks;
cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource();
cudf::memory_tracking_resource<rmm::mr::device_memory_resource> tracking_mr(mr);

rmm::mr::set_current_device_resource(&tracking_mr);
for (auto _ : state) {
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
for (int32_t chunk = 0; chunk < num_chunks; ++chunk) {
Expand Down Expand Up @@ -124,8 +135,11 @@ void BM_csv_read_varying_options(benchmark::State& state)
cudf_io::read_csv(read_options);
}
}
rmm::mr::set_current_device_resource(mr);

auto const data_processed = data_size * cols_to_read.size() / view.num_columns();
state.SetBytesProcessed(data_processed * state.iterations());
state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size();
}

#define CSV_RD_BM_INPUTS_DEFINE(name, type_or_group, src_type) \
Expand Down
15 changes: 14 additions & 1 deletion cpp/benchmarks/io/csv/csv_writer_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,6 +17,7 @@
#include <benchmark/benchmark.h>

#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/common/memory_tracking_resource.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/io/cuio_benchmark_common.hpp>
#include <benchmarks/synchronization/synchronization.hpp>
Expand All @@ -42,6 +43,10 @@ void BM_csv_write_varying_inout(benchmark::State& state)
auto const view = tbl->view();

cuio_source_sink_pair source_sink(sink_type);
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource();
cudf::memory_tracking_resource<rmm::mr::device_memory_resource> tracking_mr(mr);

rmm::mr::set_current_device_resource(&tracking_mr);
for (auto _ : state) {
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
cudf_io::csv_writer_options options =
Expand All @@ -50,8 +55,10 @@ void BM_csv_write_varying_inout(benchmark::State& state)
.rows_per_chunk(1 << 14); // TODO: remove once default is sensible
cudf_io::write_csv(options);
}
rmm::mr::set_current_device_resource(mr);

state.SetBytesProcessed(data_size * state.iterations());
state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size();
}

void BM_csv_write_varying_options(benchmark::State& state)
Expand All @@ -69,6 +76,10 @@ void BM_csv_write_varying_options(benchmark::State& state)

std::string const na_per(na_per_len, '#');
std::vector<char> csv_data;
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource();
cudf::memory_tracking_resource<rmm::mr::device_memory_resource> tracking_mr(mr);

rmm::mr::set_current_device_resource(&tracking_mr);
for (auto _ : state) {
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
cudf_io::csv_writer_options options =
Expand All @@ -78,8 +89,10 @@ void BM_csv_write_varying_options(benchmark::State& state)
.rows_per_chunk(rows_per_chunk);
cudf_io::write_csv(options);
}
rmm::mr::set_current_device_resource(mr);

state.SetBytesProcessed(data_size * state.iterations());
state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size();
}

#define CSV_WR_BM_INOUTS_DEFINE(name, type_or_group, sink_type) \
Expand Down
16 changes: 15 additions & 1 deletion cpp/benchmarks/io/orc/orc_reader_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,6 +17,7 @@
#include <benchmark/benchmark.h>

#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/common/memory_tracking_resource.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/io/cuio_benchmark_common.hpp>
#include <benchmarks/synchronization/synchronization.hpp>
Expand Down Expand Up @@ -58,12 +59,18 @@ void BM_orc_read_varying_input(benchmark::State& state)
cudf_io::orc_reader_options read_opts =
cudf_io::orc_reader_options::builder(source_sink.make_source_info());

rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource();
cudf::memory_tracking_resource<rmm::mr::device_memory_resource> tracking_mr(mr);

rmm::mr::set_current_device_resource(&tracking_mr);
for (auto _ : state) {
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
cudf_io::read_orc(read_opts);
}
rmm::mr::set_current_device_resource(mr);

state.SetBytesProcessed(data_size * state.iterations());
state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size();
}

std::vector<std::string> get_col_names(std::vector<char> const& orc_data)
Expand Down Expand Up @@ -112,6 +119,10 @@ void BM_orc_read_varying_options(benchmark::State& state)

auto const num_stripes = data_size / (64 << 20);
cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource();
cudf::memory_tracking_resource<rmm::mr::device_memory_resource> tracking_mr(mr);

rmm::mr::set_current_device_resource(&tracking_mr);
for (auto _ : state) {
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0

Expand Down Expand Up @@ -141,8 +152,11 @@ void BM_orc_read_varying_options(benchmark::State& state)

CUDF_EXPECTS(rows_read == view.num_rows(), "Benchmark did not read the entire table");
}
rmm::mr::set_current_device_resource(mr);

auto const data_processed = data_size * cols_to_read.size() / view.num_columns();
state.SetBytesProcessed(data_processed * state.iterations());
state.counters["peak_memory_usage"] = tracking_mr.max_allocated_size();
}

#define ORC_RD_BM_INPUTS_DEFINE(name, type_or_group, src_type) \
Expand Down
Loading