diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index d3de9b39977..8e5ea900efa 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -360,8 +360,6 @@ ConfigureNVBench( # ################################################################################################## # * strings benchmark ------------------------------------------------------------------- -ConfigureBench(STRINGS_BENCH string/factory.cu) - ConfigureNVBench( STRINGS_NVBENCH string/case.cpp @@ -377,6 +375,7 @@ ConfigureNVBench( string/copy_range.cpp string/count.cpp string/extract.cpp + string/factory.cpp string/filter.cpp string/find.cpp string/find_multiple.cpp diff --git a/cpp/benchmarks/string/factory.cpp b/cpp/benchmarks/string/factory.cpp new file mode 100644 index 00000000000..03870b0ae23 --- /dev/null +++ b/cpp/benchmarks/string/factory.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include + +#include + +#include + +#include + +static void bench_factory(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const min_width = static_cast(state.get_int64("min_width")); + auto const max_width = static_cast(state.get_int64("max_width")); + + data_profile const profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width); + auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile); + auto const sv = cudf::strings_column_view(column->view()); + + auto stream = cudf::get_default_stream(); + auto mr = cudf::get_current_device_resource_ref(); + auto d_strings = cudf::strings::detail::create_string_vector_from_column(sv, stream, mr); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + auto chars_size = sv.chars_size(stream); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(chars_size); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::make_strings_column(d_strings, cudf::string_view{nullptr, 0}); + }); +} + +NVBENCH_BENCH(bench_factory) + .set_name("factory") + .add_int64_axis("min_width", {0}) + .add_int64_axis("max_width", {32, 64, 128, 256}) + .add_int64_axis("num_rows", {32768, 262144, 2097152}); diff --git a/cpp/benchmarks/string/factory.cu b/cpp/benchmarks/string/factory.cu deleted file mode 100644 index c4e74c4d97e..00000000000 --- a/cpp/benchmarks/string/factory.cu +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "string_bench_args.hpp" - -#include -#include -#include - -#include - -#include -#include -#include - -#include - -#include -#include -#include - -#include - -namespace { -using string_pair = thrust::pair; -struct string_view_to_pair { - __device__ string_pair operator()(thrust::pair const& p) - { - return (p.second) ? string_pair{p.first.data(), p.first.size_bytes()} : string_pair{nullptr, 0}; - } -}; -} // namespace - -class StringsFactory : public cudf::benchmark {}; - -static void BM_factory(benchmark::State& state) -{ - cudf::size_type const n_rows{static_cast(state.range(0))}; - cudf::size_type const max_str_length{static_cast(state.range(1))}; - data_profile const profile = data_profile_builder().distribution( - cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); - auto d_column = cudf::column_device_view::create(column->view()); - rmm::device_uvector pairs(d_column->size(), cudf::get_default_stream()); - thrust::transform(thrust::device, - d_column->pair_begin(), - d_column->pair_end(), - pairs.data(), - string_view_to_pair{}); - - for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::get_default_stream()); - cudf::make_strings_column(pairs, cudf::get_default_stream()); - } - - cudf::strings_column_view input(column->view()); - state.SetBytesProcessed(state.iterations() * input.chars_size(cudf::get_default_stream())); -} - -static void generate_bench_args(benchmark::internal::Benchmark* b) -{ - int const min_rows = 1 << 12; - int const max_rows = 1 << 24; - int const row_mult = 8; - int const min_rowlen = 1 << 5; - int const max_rowlen = 1 << 13; - int const len_mult = 4; - generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); -} - -#define STRINGS_BENCHMARK_DEFINE(name) \ - BENCHMARK_DEFINE_F(StringsFactory, name) \ - (::benchmark::State & st) { BM_factory(st); } \ - BENCHMARK_REGISTER_F(StringsFactory, name) \ - ->Apply(generate_bench_args) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); - -STRINGS_BENCHMARK_DEFINE(factory)