From fdf4901f26637f3d66615c65cf5da670df20a55d Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Mon, 19 Jul 2021 14:21:03 +0530 Subject: [PATCH] Add peak memory usage tracking to cuIO benchmarks (#7770) Uses `rmm::mr::statistics_resource_adapter` to track peak memory usage in cuIO benchmarks. Authors: - Devavret Makkar (https://github.com/devavret) Approvers: - David Wendt (https://github.com/davidwendt) - Mark Harris (https://github.com/harrism) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/7770 --- cpp/benchmarks/fixture/benchmark_fixture.hpp | 23 ++++++++++++++++++- .../io/csv/csv_reader_benchmark.cpp | 7 +++++- .../io/csv/csv_writer_benchmark.cpp | 6 ++++- .../io/orc/orc_reader_benchmark.cpp | 7 +++++- .../io/orc/orc_writer_benchmark.cpp | 6 ++++- .../io/parquet/parquet_reader_benchmark.cpp | 7 +++++- .../io/parquet/parquet_writer_benchmark.cpp | 6 ++++- .../parquet_writer_chunks_benchmark.cpp | 4 ++++ 8 files changed, 59 insertions(+), 7 deletions(-) diff --git a/cpp/benchmarks/fixture/benchmark_fixture.hpp b/cpp/benchmarks/fixture/benchmark_fixture.hpp index dd1bbcba0b4..8476a137c12 100644 --- a/cpp/benchmarks/fixture/benchmark_fixture.hpp +++ b/cpp/benchmarks/fixture/benchmark_fixture.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,11 +14,14 @@ * limitations under the License. */ +#pragma once + #include #include #include #include #include +#include namespace cudf { @@ -88,4 +91,22 @@ class benchmark : public ::benchmark::Fixture { std::shared_ptr mr; }; +class memory_stats_logger { + public: + memory_stats_logger() + : existing_mr(rmm::mr::get_current_device_resource()), + statistics_mr(rmm::mr::make_statistics_adaptor(existing_mr)) + { + rmm::mr::set_current_device_resource(&statistics_mr); + } + + ~memory_stats_logger() { rmm::mr::set_current_device_resource(existing_mr); } + + size_t peak_memory_usage() const noexcept { return statistics_mr.get_bytes_counter().peak; } + + private: + rmm::mr::device_memory_resource* existing_mr; + rmm::mr::statistics_resource_adaptor statistics_mr; +}; + } // namespace cudf diff --git a/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp b/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp index a3ee1a3f333..3f5549a3148 100644 --- a/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp +++ b/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,12 +51,14 @@ void BM_csv_read_varying_input(benchmark::State& state) cudf_io::csv_reader_options const read_options = cudf_io::csv_reader_options::builder(source_sink.make_source_info()); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::read_csv(read_options); } state.SetBytesProcessed(data_size * state.iterations()); + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } void BM_csv_read_varying_options(benchmark::State& state) @@ -94,6 +96,7 @@ void BM_csv_read_varying_options(benchmark::State& state) size_t const chunk_size = csv_data.size() / num_chunks; cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks; + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 for (int32_t chunk = 0; chunk < num_chunks; ++chunk) { @@ -124,8 +127,10 @@ void BM_csv_read_varying_options(benchmark::State& state) cudf_io::read_csv(read_options); } } + auto const data_processed = data_size * cols_to_read.size() / view.num_columns(); state.SetBytesProcessed(data_processed * state.iterations()); + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } #define CSV_RD_BM_INPUTS_DEFINE(name, type_or_group, src_type) \ diff --git a/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp b/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp index fcb9155e646..fdd7c63eece 100644 --- a/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp +++ b/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,6 +42,7 @@ void BM_csv_write_varying_inout(benchmark::State& state) auto const view = tbl->view(); cuio_source_sink_pair source_sink(sink_type); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::csv_writer_options options = @@ -52,6 +53,7 @@ void BM_csv_write_varying_inout(benchmark::State& state) } state.SetBytesProcessed(data_size * state.iterations()); + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } void BM_csv_write_varying_options(benchmark::State& state) @@ -69,6 +71,7 @@ void BM_csv_write_varying_options(benchmark::State& state) std::string const na_per(na_per_len, '#'); std::vector csv_data; + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::csv_writer_options options = @@ -80,6 +83,7 @@ void BM_csv_write_varying_options(benchmark::State& state) } state.SetBytesProcessed(data_size * state.iterations()); + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } #define CSV_WR_BM_INOUTS_DEFINE(name, type_or_group, sink_type) \ diff --git a/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp b/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp index bc1aef11784..549605fbaee 100644 --- a/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,12 +58,14 @@ void BM_orc_read_varying_input(benchmark::State& state) cudf_io::orc_reader_options read_opts = cudf_io::orc_reader_options::builder(source_sink.make_source_info()); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::read_orc(read_opts); } state.SetBytesProcessed(data_size * state.iterations()); + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } std::vector get_col_names(std::vector const& orc_data) @@ -110,6 +112,7 @@ void BM_orc_read_varying_options(benchmark::State& state) auto const num_stripes = data_size / (64 << 20); cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks; + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 @@ -139,8 +142,10 @@ void BM_orc_read_varying_options(benchmark::State& state) CUDF_EXPECTS(rows_read == view.num_rows(), "Benchmark did not read the entire table"); } + auto const data_processed = data_size * cols_to_read.size() / view.num_columns(); state.SetBytesProcessed(data_processed * state.iterations()); + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } #define ORC_RD_BM_INPUTS_DEFINE(name, type_or_group, src_type) \ diff --git a/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp b/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp index bddfc3dfaa2..de5dd2c7b9d 100644 --- a/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp +++ b/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,6 +50,7 @@ void BM_orc_write_varying_inout(benchmark::State& state) auto const view = tbl->view(); cuio_source_sink_pair source_sink(sink_type); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::orc_writer_options options = @@ -59,6 +60,7 @@ void BM_orc_write_varying_inout(benchmark::State& state) } state.SetBytesProcessed(data_size * state.iterations()); + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } void BM_orc_write_varying_options(benchmark::State& state) @@ -75,6 +77,7 @@ void BM_orc_write_varying_options(benchmark::State& state) auto const view = tbl->view(); cuio_source_sink_pair source_sink(io_type::FILEPATH); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::orc_writer_options const options = @@ -85,6 +88,7 @@ void BM_orc_write_varying_options(benchmark::State& state) } state.SetBytesProcessed(data_size * state.iterations()); + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } #define ORC_WR_BM_INOUTS_DEFINE(name, type_or_group, sink_type) \ diff --git a/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp b/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp index 8fc8b29d19d..045aa0e043b 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,12 +58,14 @@ void BM_parq_read_varying_input(benchmark::State& state) cudf_io::parquet_reader_options read_opts = cudf_io::parquet_reader_options::builder(source_sink.make_source_info()); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer const raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::read_parquet(read_opts); } state.SetBytesProcessed(data_size * state.iterations()); + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } std::vector get_col_names(std::vector const& parquet_data) @@ -112,6 +114,7 @@ void BM_parq_read_varying_options(benchmark::State& state) auto const num_row_groups = data_size / (128 << 20); cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks; + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 @@ -141,8 +144,10 @@ void BM_parq_read_varying_options(benchmark::State& state) CUDF_EXPECTS(rows_read == view.num_rows(), "Benchmark did not read the entire table"); } + auto const data_processed = data_size * cols_to_read.size() / view.num_columns(); state.SetBytesProcessed(data_processed * state.iterations()); + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } #define PARQ_RD_BM_INPUTS_DEFINE(name, type_or_group, src_type) \ diff --git a/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp b/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp index d17e7b126c7..b4c11179c35 100644 --- a/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp +++ b/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,6 +50,7 @@ void BM_parq_write_varying_inout(benchmark::State& state) auto const view = tbl->view(); cuio_source_sink_pair source_sink(sink_type); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::parquet_writer_options opts = @@ -59,6 +60,7 @@ void BM_parq_write_varying_inout(benchmark::State& state) } state.SetBytesProcessed(data_size * state.iterations()); + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } void BM_parq_write_varying_options(benchmark::State& state) @@ -76,6 +78,7 @@ void BM_parq_write_varying_options(benchmark::State& state) auto const view = tbl->view(); cuio_source_sink_pair source_sink(io_type::FILEPATH); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::parquet_writer_options const options = @@ -87,6 +90,7 @@ void BM_parq_write_varying_options(benchmark::State& state) } state.SetBytesProcessed(data_size * state.iterations()); + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } #define PARQ_WR_BM_INOUTS_DEFINE(name, type_or_group, sink_type) \ diff --git a/cpp/benchmarks/io/parquet/parquet_writer_chunks_benchmark.cpp b/cpp/benchmarks/io/parquet/parquet_writer_chunks_benchmark.cpp index b38dda4d17e..0041af80a15 100644 --- a/cpp/benchmarks/io/parquet/parquet_writer_chunks_benchmark.cpp +++ b/cpp/benchmarks/io/parquet/parquet_writer_chunks_benchmark.cpp @@ -47,6 +47,7 @@ void PQ_write(benchmark::State& state) auto tbl = create_random_table({cudf::type_id::INT32}, num_cols, table_size_bytes{data_size}); cudf::table_view view = tbl->view(); + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::parquet_writer_options opts = @@ -55,6 +56,7 @@ void PQ_write(benchmark::State& state) } state.SetBytesProcessed(static_cast(state.iterations()) * state.range(0)); + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } void PQ_write_chunked(benchmark::State& state) @@ -68,6 +70,7 @@ void PQ_write_chunked(benchmark::State& state) {cudf::type_id::INT32}, num_cols, table_size_bytes{size_t(data_size / num_tables)})); } + auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 cudf_io::chunked_parquet_writer_options opts = @@ -80,6 +83,7 @@ void PQ_write_chunked(benchmark::State& state) } state.SetBytesProcessed(static_cast(state.iterations()) * state.range(0)); + state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage(); } #define PWBM_BENCHMARK_DEFINE(name, size, num_columns) \