From 245832950deaa7db324ea66f5be52f000366c4ee Mon Sep 17 00:00:00 2001 From: davidwendt Date: Mon, 15 Mar 2021 17:47:27 -0400 Subject: [PATCH 1/2] Add gbenchmarks for string substrngs functions --- cpp/benchmarks/CMakeLists.txt | 1 + cpp/benchmarks/string/string_bench_args.hpp | 2 + cpp/benchmarks/string/substring_benchmark.cpp | 93 +++++++++++++++++++ 3 files changed, 96 insertions(+) create mode 100644 cpp/benchmarks/string/substring_benchmark.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index dfc340b1459..73234bf3b20 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -189,4 +189,5 @@ ConfigureBench(STRINGS_BENCH string/find_benchmark.cpp string/replace_benchmark.cpp string/split_benchmark.cpp + string/substring_benchmark.cpp string/url_decode_benchmark.cpp) diff --git a/cpp/benchmarks/string/string_bench_args.hpp b/cpp/benchmarks/string/string_bench_args.hpp index f81f859de74..9c709b064dd 100644 --- a/cpp/benchmarks/string/string_bench_args.hpp +++ b/cpp/benchmarks/string/string_bench_args.hpp @@ -17,6 +17,8 @@ #include +#include + /** * @brief Generate row count and row length argument ranges for a string benchmark. * diff --git a/cpp/benchmarks/string/substring_benchmark.cpp b/cpp/benchmarks/string/substring_benchmark.cpp new file mode 100644 index 00000000000..d47c42e45be --- /dev/null +++ b/cpp/benchmarks/string/substring_benchmark.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "string_bench_args.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +class StringSubstring : public cudf::benchmark { +}; + +enum substring_type { position, multi_position, delimiter, multi_delimiter }; + +static void BM_substring(benchmark::State& state, substring_type rt) +{ + cudf::size_type const n_rows{static_cast(state.range(0))}; + cudf::size_type const max_str_length{static_cast(state.range(1))}; + data_profile table_profile; + table_profile.set_distribution_params( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const table = + create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + cudf::strings_column_view input(table->view().column(0)); + auto starts_itr = thrust::constant_iterator(1); + auto stops_itr = thrust::constant_iterator(max_str_length / 2); + cudf::test::fixed_width_column_wrapper starts(starts_itr, starts_itr + n_rows); + cudf::test::fixed_width_column_wrapper stops(stops_itr, stops_itr + n_rows); + auto delim_itr = thrust::constant_iterator(" "); + cudf::test::strings_column_wrapper delimiters(delim_itr, delim_itr + n_rows); + + for (auto _ : state) { + cuda_event_timer raii(state, true, 0); + switch (rt) { + case position: cudf::strings::slice_strings(input, 1, max_str_length / 2); break; + case multi_position: cudf::strings::slice_strings(input, starts, stops); break; + case delimiter: cudf::strings::slice_strings(input, std::string{" "}, 1); break; + case multi_delimiter: + cudf::strings::slice_strings(input, cudf::strings_column_view(delimiters), 1); + break; + } + } + + state.SetBytesProcessed(state.iterations() * input.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_mult = 8; + int const min_rowlen = 1 << 5; + int const max_rowlen = 1 << 13; + int const len_mult = 4; + generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); +} + +#define STRINGS_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(StringSubstring, name) \ + (::benchmark::State & st) { BM_substring(st, substring_type::name); } \ + BENCHMARK_REGISTER_F(StringSubstring, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +STRINGS_BENCHMARK_DEFINE(position) +STRINGS_BENCHMARK_DEFINE(multi_position) +STRINGS_BENCHMARK_DEFINE(delimiter) +STRINGS_BENCHMARK_DEFINE(multi_delimiter) From b60ab0cfacb57fa7534654dff09af507bfcba442 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Mon, 15 Mar 2021 17:47:58 -0400 Subject: [PATCH 2/2] use more efficient make_strings_children utility --- cpp/src/strings/substring.cu | 136 ++++++++++++++++------------------- 1 file changed, 61 insertions(+), 75 deletions(-) diff --git a/cpp/src/strings/substring.cu b/cpp/src/strings/substring.cu index 68080c0eb89..f712b0cb6aa 100644 --- a/cpp/src/strings/substring.cu +++ b/cpp/src/strings/substring.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,17 +43,25 @@ namespace { * using the provided start, stop, and step parameters. */ struct substring_fn { - const column_device_view d_column; - numeric_scalar_device_view d_start, d_stop, d_step; - const int32_t* d_offsets{}; + column_device_view const d_column; + numeric_scalar_device_view const d_start; + numeric_scalar_device_view const d_stop; + numeric_scalar_device_view const d_step; + int32_t* d_offsets{}; char* d_chars{}; - __device__ cudf::size_type operator()(size_type idx) + __device__ void operator()(size_type idx) { - if (d_column.is_null(idx)) return 0; // null string - string_view d_str = d_column.template element(idx); + if (d_column.is_null(idx)) { + if (!d_chars) d_offsets[idx] = 0; + return; + } + auto const d_str = d_column.template element(idx); auto const length = d_str.length(); - if (length == 0) return 0; // empty string + if (length == 0) { + if (!d_chars) d_offsets[idx] = 0; + return; + } size_type const step = d_step.is_valid() ? d_step.value() : 1; auto const begin = [&] { // always inclusive // when invalid, default depends on step @@ -88,7 +96,7 @@ struct substring_fn { if (d_buffer) d_buffer += from_char_utf8(*itr, d_buffer); itr += step; } - return bytes; + if (!d_chars) d_offsets[idx] = bytes; } }; @@ -103,42 +111,26 @@ std::unique_ptr slice_strings( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { - size_type strings_count = strings.size(); - if (strings_count == 0) return make_empty_strings_column(stream, mr); + if (strings.is_empty()) return make_empty_strings_column(stream, mr); if (step.is_valid()) CUDF_EXPECTS(step.value(stream) != 0, "Step parameter must not be 0"); - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - auto d_start = get_scalar_device_view(const_cast&>(start)); - auto d_stop = get_scalar_device_view(const_cast&>(stop)); - auto d_step = get_scalar_device_view(const_cast&>(step)); - - // copy the null mask - rmm::device_buffer null_mask = cudf::detail::copy_bitmask(strings.parent(), stream, mr); - - // build offsets column - auto offsets_transformer_itr = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), substring_fn{d_column, d_start, d_stop, d_step}); - auto offsets_column = make_offsets_child_column( - offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr); - auto d_new_offsets = offsets_column->view().data(); - - // build chars column - auto bytes = cudf::detail::get_value(offsets_column->view(), strings_count, stream); - auto chars_column = strings::detail::create_chars_child_column( - strings_count, strings.null_count(), bytes, stream, mr); - auto d_chars = chars_column->mutable_view().data(); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - strings_count, - substring_fn{d_column, d_start, d_stop, d_step, d_new_offsets, d_chars}); + auto const d_column = column_device_view::create(strings.parent(), stream); + auto const d_start = get_scalar_device_view(const_cast&>(start)); + auto const d_stop = get_scalar_device_view(const_cast&>(stop)); + auto const d_step = get_scalar_device_view(const_cast&>(step)); - return make_strings_column(strings_count, - std::move(offsets_column), - std::move(chars_column), + auto children = make_strings_children(substring_fn{*d_column, d_start, d_stop, d_step}, + strings.size(), + strings.null_count(), + stream, + mr); + + return make_strings_column(strings.size(), + std::move(children.first), + std::move(children.second), strings.null_count(), - std::move(null_mask), + cudf::detail::copy_bitmask(strings.parent(), stream, mr), stream, mr); } @@ -166,25 +158,33 @@ namespace { * This both calculates the output size and executes the substring. */ struct substring_from_fn { - const column_device_view d_column; - const cudf::detail::input_indexalator starts; - const cudf::detail::input_indexalator stops; - const int32_t* d_offsets{}; + column_device_view const d_column; + cudf::detail::input_indexalator const starts; + cudf::detail::input_indexalator const stops; + int32_t* d_offsets{}; char* d_chars{}; - __device__ size_type operator()(size_type idx) + __device__ void operator()(size_type idx) { - if (d_column.is_null(idx)) return 0; // null string - string_view d_str = d_column.template element(idx); + if (d_column.is_null(idx)) { + if (!d_chars) d_offsets[idx] = 0; + return; + } + auto const d_str = d_column.template element(idx); auto const length = d_str.length(); auto const start = starts[idx]; - if (start >= length) return 0; // empty string + if (start >= length) { + if (!d_chars) d_offsets[idx] = 0; + return; + } auto const stop = stops[idx]; auto const end = (((stop < 0) || (stop > length)) ? length : stop); - string_view d_substr = d_str.substr(start, end - start); - if (d_chars) memcpy(d_chars + d_offsets[idx], d_substr.data(), d_substr.size_bytes()); - return d_substr.size_bytes(); + auto const d_substr = d_str.substr(start, end - start); + if (d_chars) + memcpy(d_chars + d_offsets[idx], d_substr.data(), d_substr.size_bytes()); + else + d_offsets[idx] = d_substr.size_bytes(); } }; @@ -212,32 +212,18 @@ std::unique_ptr compute_substrings_from_fn(column_device_view const& d_c auto strings_count = d_column.size(); // Copy the null mask - rmm::device_buffer null_mask{0, stream, mr}; - if (d_column.nullable()) - null_mask = rmm::device_buffer( - d_column.null_mask(), cudf::bitmask_allocation_size_bytes(strings_count), stream, mr); - - // Build offsets column - auto offsets_transformer_itr = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), substring_from_fn{d_column, starts, stops}); - auto offsets_column = cudf::strings::detail::make_offsets_child_column( - offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr); - auto d_new_offsets = offsets_column->view().data(); - - // Build chars column - auto bytes = cudf::detail::get_value(offsets_column->view(), strings_count, stream); - auto chars_column = - cudf::strings::detail::create_chars_child_column(strings_count, null_count, bytes, stream, mr); - auto chars_view = chars_column->mutable_view(); - auto d_chars = chars_view.template data(); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - strings_count, - substring_from_fn{d_column, starts, stops, d_new_offsets, d_chars}); + rmm::device_buffer null_mask = + !d_column.nullable() + ? rmm::device_buffer{0, stream, mr} + : rmm::device_buffer( + d_column.null_mask(), cudf::bitmask_allocation_size_bytes(strings_count), stream, mr); + + auto children = make_strings_children( + substring_from_fn{d_column, starts, stops}, strings_count, null_count, stream, mr); return make_strings_column(strings_count, - std::move(offsets_column), - std::move(chars_column), + std::move(children.first), + std::move(children.second), null_count, std::move(null_mask), stream,