Skip to content

Commit

Permalink
Add gbenchmarks for string substrngs functions
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Mar 15, 2021
1 parent 325d5b8 commit 2458329
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 0 deletions.
1 change: 1 addition & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -189,4 +189,5 @@ ConfigureBench(STRINGS_BENCH
string/find_benchmark.cpp
string/replace_benchmark.cpp
string/split_benchmark.cpp
string/substring_benchmark.cpp
string/url_decode_benchmark.cpp)
2 changes: 2 additions & 0 deletions cpp/benchmarks/string/string_bench_args.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

#include <benchmark/benchmark.h>

#include <cudf/types.hpp>

/**
* @brief Generate row count and row length argument ranges for a string benchmark.
*
Expand Down
93 changes: 93 additions & 0 deletions cpp/benchmarks/string/substring_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "string_bench_args.hpp"

#include <benchmark/benchmark.h>
#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/strings/substring.hpp>
#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>

#include <limits>

#include <thrust/iterator/constant_iterator.h>

class StringSubstring : public cudf::benchmark {
};

enum substring_type { position, multi_position, delimiter, multi_delimiter };

static void BM_substring(benchmark::State& state, substring_type rt)
{
cudf::size_type const n_rows{static_cast<cudf::size_type>(state.range(0))};
cudf::size_type const max_str_length{static_cast<cudf::size_type>(state.range(1))};
data_profile table_profile;
table_profile.set_distribution_params(
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
auto const table =
create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile);
cudf::strings_column_view input(table->view().column(0));
auto starts_itr = thrust::constant_iterator<cudf::size_type>(1);
auto stops_itr = thrust::constant_iterator<cudf::size_type>(max_str_length / 2);
cudf::test::fixed_width_column_wrapper<int32_t> starts(starts_itr, starts_itr + n_rows);
cudf::test::fixed_width_column_wrapper<int32_t> stops(stops_itr, stops_itr + n_rows);
auto delim_itr = thrust::constant_iterator<std::string>(" ");
cudf::test::strings_column_wrapper delimiters(delim_itr, delim_itr + n_rows);

for (auto _ : state) {
cuda_event_timer raii(state, true, 0);
switch (rt) {
case position: cudf::strings::slice_strings(input, 1, max_str_length / 2); break;
case multi_position: cudf::strings::slice_strings(input, starts, stops); break;
case delimiter: cudf::strings::slice_strings(input, std::string{" "}, 1); break;
case multi_delimiter:
cudf::strings::slice_strings(input, cudf::strings_column_view(delimiters), 1);
break;
}
}

state.SetBytesProcessed(state.iterations() * input.chars_size());
}

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 12;
int const max_rows = 1 << 24;
int const row_mult = 8;
int const min_rowlen = 1 << 5;
int const max_rowlen = 1 << 13;
int const len_mult = 4;
generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult);
}

#define STRINGS_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(StringSubstring, name) \
(::benchmark::State & st) { BM_substring(st, substring_type::name); } \
BENCHMARK_REGISTER_F(StringSubstring, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

STRINGS_BENCHMARK_DEFINE(position)
STRINGS_BENCHMARK_DEFINE(multi_position)
STRINGS_BENCHMARK_DEFINE(delimiter)
STRINGS_BENCHMARK_DEFINE(multi_delimiter)

0 comments on commit 2458329

Please sign in to comment.