From 0ed7725416879a824ee5b96292eda2d1048a9ada Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Wed, 11 Oct 2023 00:06:23 +0200 Subject: [PATCH] Add `bytes_per_second` to shift benchmark (#13950) Adds `bytes_per_second` to `SHIFT_BENCH`. This patch relates to #13735. Authors: - Martin Marenz (https://github.com/Blonck) Approvers: - Karthikeyan (https://github.com/karthikeyann) - Nghia Truong (https://github.com/ttnghia) - Mark Harris (https://github.com/harrism) URL: https://github.com/rapidsai/cudf/pull/13950 --- cpp/benchmarks/copying/shift.cu | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/cpp/benchmarks/copying/shift.cu b/cpp/benchmarks/copying/shift.cu index 460100a8fe9..e1169e3bcd6 100644 --- a/cpp/benchmarks/copying/shift.cu +++ b/cpp/benchmarks/copying/shift.cu @@ -56,18 +56,32 @@ static void BM_shift(benchmark::State& state) cudf::size_type size = state.range(0); cudf::size_type offset = size * (static_cast(shift_factor) / 100.0); - auto const input_table = - create_sequence_table({cudf::type_to_id()}, - row_count{size}, - use_validity ? std::optional{1.0} : std::nullopt); + auto constexpr column_type_id = cudf::type_id::INT32; + using column_type = cudf::id_to_type; + + auto const input_table = create_sequence_table( + {column_type_id}, row_count{size}, use_validity ? std::optional{1.0} : std::nullopt); cudf::column_view input{input_table->get_column(0)}; - auto fill = use_validity ? make_scalar() : make_scalar(777); + auto fill = use_validity ? make_scalar() : make_scalar(777); for (auto _ : state) { cuda_event_timer raii(state, true); auto output = cudf::shift(input, offset, *fill); } + + auto const elems_read = (size - offset); + auto const bytes_read = elems_read * sizeof(column_type); + + // If 'use_validity' is false, the fill value is a number, and the entire column + // (excluding the null bitmask) needs to be written. On the other hand, if 'use_validity' + // is true, only the elements that can be shifted are written, along with the full null bitmask. + auto const elems_written = use_validity ? (size - offset) : size; + auto const bytes_written = elems_written * sizeof(column_type); + auto const null_bytes = use_validity ? 2 * cudf::bitmask_allocation_size_bytes(size) : 0; + + state.SetBytesProcessed(static_cast(state.iterations()) * + (bytes_written + bytes_read + null_bytes)); } class Shift : public cudf::benchmark {};