From 862559f5c7a51f7cd060093852c810e8f59b18f7 Mon Sep 17 00:00:00 2001 From: David <45795991+davidwendt@users.noreply.github.com> Date: Fri, 26 Feb 2021 13:48:52 -0500 Subject: [PATCH] Add gbenchmarks for strings filter functions (#7438) Reference #5698 This creates a gbenchmark for the `cudf::strings::filter_characters`, `cudf::strings::filter_characters_of_type`, and `cudf::strings::strip` functions. This PR also includes changes to `strip.cu` and `filter_chars` to use the more efficient `make_strings_children` utility. This improved performance on these functions by 2x on average. Authors: - David (@davidwendt) Approvers: - Keith Kraus (@kkraus14) - Conor Hoekstra (@codereport) - Karthikeyan (@karthikeyann) URL: https://github.com/rapidsai/cudf/pull/7438 --- cpp/benchmarks/CMakeLists.txt | 1 + cpp/benchmarks/string/filter_benchmark.cpp | 93 ++++++++++++++++ cpp/src/strings/filter_chars.cu | 80 +++++++------- cpp/src/strings/strip.cu | 123 ++++++++------------- 4 files changed, 178 insertions(+), 119 deletions(-) create mode 100644 cpp/benchmarks/string/filter_benchmark.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 001a4cce122..d3d76ffed28 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -309,6 +309,7 @@ set(STRINGS_BENCH_SRC "${CMAKE_CURRENT_SOURCE_DIR}/string/contains_benchmark.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/string/convert_durations_benchmark.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/string/copy_benchmark.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/string/filter_benchmark.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/string/find_benchmark.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/string/replace_benchmark.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/string/split_benchmark.cpp" diff --git a/cpp/benchmarks/string/filter_benchmark.cpp b/cpp/benchmarks/string/filter_benchmark.cpp new file mode 100644 index 00000000000..123c5597df9 --- /dev/null +++ b/cpp/benchmarks/string/filter_benchmark.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +enum FilterAPI { filter, filter_chars, strip }; + +class StringFilterChars : public cudf::benchmark { +}; + +static void BM_filter_chars(benchmark::State& state, FilterAPI api) +{ + cudf::size_type const n_rows{static_cast(state.range(0))}; + cudf::size_type const max_str_length{static_cast(state.range(1))}; + data_profile table_profile; + table_profile.set_distribution_params( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const table = + create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + cudf::strings_column_view input(table->view().column(0)); + + auto const types = cudf::strings::string_character_types::SPACE; + std::vector> filter_table{ + {cudf::char_utf8{'a'}, cudf::char_utf8{'c'}}}; + + for (auto _ : state) { + cuda_event_timer raii(state, true, 0); + switch (api) { + case filter: cudf::strings::filter_characters_of_type(input, types); break; + case filter_chars: cudf::strings::filter_characters(input, filter_table); break; + case strip: cudf::strings::strip(input); break; + } + } + + state.SetBytesProcessed(state.iterations() * input.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_mult = 8; + int const min_rowlen = 1 << 5; + int const max_rowlen = 1 << 13; + int const len_mult = 4; + for (int row_count = min_rows; row_count <= max_rows; row_count *= row_mult) { + for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) { + // avoid generating combinations that exceed the cudf column limit + size_t total_chars = static_cast(row_count) * rowlen; + if (total_chars < std::numeric_limits::max()) { + b->Args({row_count, rowlen}); + } + } + } +} + +#define STRINGS_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(StringFilterChars, name) \ + (::benchmark::State & st) { BM_filter_chars(st, FilterAPI::name); } \ + BENCHMARK_REGISTER_F(StringFilterChars, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +STRINGS_BENCHMARK_DEFINE(filter) +STRINGS_BENCHMARK_DEFINE(filter_chars) +STRINGS_BENCHMARK_DEFINE(strip) diff --git a/cpp/src/strings/filter_chars.cu b/cpp/src/strings/filter_chars.cu index 5a0b409fcf2..7ed77a830ad 100644 --- a/cpp/src/strings/filter_chars.cu +++ b/cpp/src/strings/filter_chars.cu @@ -49,10 +49,10 @@ namespace { struct filter_fn { column_device_view const d_strings; filter_type keep_characters; - rmm::device_vector::iterator table_begin; - rmm::device_vector::iterator table_end; + rmm::device_uvector::iterator table_begin; + rmm::device_uvector::iterator table_end; string_view const d_replacement; - int32_t const* d_offsets{}; + int32_t* d_offsets{}; char* d_chars{}; /** @@ -78,23 +78,28 @@ struct filter_fn { * This is also used to calculate the size of the output. * * @param idx Index of the current string to process. - * @return The size of the output for this string. */ - __device__ size_type operator()(size_type idx) + __device__ void operator()(size_type idx) { - if (d_strings.is_null(idx)) return 0; - string_view d_str = d_strings.element(idx); - size_type nbytes = d_str.size_bytes(); - auto const in_ptr = d_str.data(); - auto out_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr; + if (d_strings.is_null(idx)) { + if (!d_chars) d_offsets[idx] = 0; + return; + } + auto const d_str = d_strings.element(idx); + + auto nbytes = d_str.size_bytes(); + auto out_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr; for (auto itr = d_str.begin(); itr != d_str.end(); ++itr) { - auto const char_size = bytes_in_char_utf8(*itr); - string_view const d_newchar = - remove_char(*itr) ? d_replacement : string_view(in_ptr + itr.byte_offset(), char_size); - nbytes += d_newchar.size_bytes() - char_size; - if (out_ptr) out_ptr = cudf::strings::detail::copy_string(out_ptr, d_newchar); + auto const char_size = bytes_in_char_utf8(*itr); + string_view const d_newchar = remove_char(*itr) + ? d_replacement + : string_view(d_str.data() + itr.byte_offset(), char_size); + if (out_ptr) + out_ptr = cudf::strings::detail::copy_string(out_ptr, d_newchar); + else + nbytes += d_newchar.size_bytes() - char_size; } - return nbytes; + if (!out_ptr) d_offsets[idx] = nbytes; } }; @@ -123,36 +128,25 @@ std::unique_ptr filter_characters( characters_to_filter.begin(), characters_to_filter.end(), htable.begin(), [](auto entry) { return char_range{entry.first, entry.second}; }); - rmm::device_vector table(htable); // copy filter table to device memory - - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_strings = *strings_column; - - // create null mask - rmm::device_buffer null_mask = cudf::detail::copy_bitmask(strings.parent(), stream, mr); - - // create offsets column - filter_fn ffn{d_strings, keep_characters, table.begin(), table.end(), d_replacement}; - auto offsets_transformer_itr = cudf::detail::make_counting_transform_iterator(0, ffn); - auto offsets_column = make_offsets_child_column( - offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr); - ffn.d_offsets = offsets_column->view().data(); - - // build chars column - size_type bytes = cudf::detail::get_value(offsets_column->view(), strings_count, stream); - auto chars_column = strings::detail::create_chars_child_column( - strings_count, strings.null_count(), bytes, stream, mr); - ffn.d_chars = chars_column->mutable_view().data(); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - strings_count, - ffn); + rmm::device_uvector table(table_size, stream); + CUDA_TRY(cudaMemcpyAsync(table.data(), + htable.data(), + table_size * sizeof(char_range), + cudaMemcpyHostToDevice, + stream.value())); + + auto d_strings = column_device_view::create(strings.parent(), stream); + + // this utility calls the strip_fn to build the offsets and chars columns + filter_fn ffn{*d_strings, keep_characters, table.begin(), table.end(), d_replacement}; + auto children = cudf::strings::detail::make_strings_children( + ffn, strings.size(), strings.null_count(), stream, mr); return make_strings_column(strings_count, - std::move(offsets_column), - std::move(chars_column), + std::move(children.first), + std::move(children.second), strings.null_count(), - std::move(null_mask), + cudf::detail::copy_bitmask(strings.parent(), stream, mr), stream, mr); } diff --git a/cpp/src/strings/strip.cu b/cpp/src/strings/strip.cu index f9e8463afd7..3ffa331ba49 100644 --- a/cpp/src/strings/strip.cu +++ b/cpp/src/strings/strip.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,16 +35,6 @@ namespace cudf { namespace strings { namespace detail { namespace { -/** - * @brief Used as template parameter to divide size calculation from - * the actual string operation within a function. - * - * Useful when most of the logic is identical for both passes. - */ -enum TwoPass { - SizeOnly = 0, ///< calculate the size only - ExecuteOp ///< run the string operation -}; /** * @brief Strip characters from the beginning and/or end of a string. @@ -53,51 +43,52 @@ enum TwoPass { * of any characters found in d_to_strip or whitespace if * d_to_strip is empty. * - * @tparam Pass Allows computing only the size of the output - * or writing the output to device memory. */ -template struct strip_fn { column_device_view const d_strings; - strip_type stype; // right, left, or both - string_view d_to_strip; - int32_t const* d_offsets{}; + strip_type const stype; // right, left, or both + string_view const d_to_strip; + int32_t* d_offsets{}; char* d_chars{}; - __device__ bool is_strip_character(char_utf8 chr) - { - return d_to_strip.empty() ? (chr <= ' ') : // whitespace check - thrust::any_of( - thrust::seq, d_to_strip.begin(), d_to_strip.end(), [chr] __device__(char_utf8 c) { - return c == chr; - }); - } - - __device__ size_type operator()(size_type idx) + __device__ void operator()(size_type idx) { - if (d_strings.is_null(idx)) return 0; - string_view d_str = d_strings.element(idx); - size_type length = d_str.length(); - size_type left_offset = 0; - auto itr = d_str.begin(); - if (stype == strip_type::LEFT || stype == strip_type::BOTH) { - for (; itr != d_str.end();) { - if (!is_strip_character(*itr++)) break; - left_offset = itr.byte_offset(); - } + if (d_strings.is_null(idx)) { + if (!d_chars) d_offsets[idx] = 0; + return; } + auto const d_str = d_strings.element(idx); + + auto is_strip_character = [d_to_strip = d_to_strip] __device__(char_utf8 chr) -> bool { + return d_to_strip.empty() ? (chr <= ' ') : // whitespace check + thrust::any_of( + thrust::seq, d_to_strip.begin(), d_to_strip.end(), [chr] __device__(char_utf8 c) { + return c == chr; + }); + }; + + size_type const left_offset = [&] { + if (stype != strip_type::LEFT && stype != strip_type::BOTH) return 0; + auto const itr = + thrust::find_if_not(thrust::seq, d_str.begin(), d_str.end(), is_strip_character); + return itr != d_str.end() ? itr.byte_offset() : d_str.size_bytes(); + }(); + size_type right_offset = d_str.size_bytes(); if (stype == strip_type::RIGHT || stype == strip_type::BOTH) { - itr = d_str.end(); + auto const length = d_str.length(); + auto itr = d_str.end(); for (size_type n = 0; n < length; ++n) { if (!is_strip_character(*(--itr))) break; right_offset = itr.byte_offset(); } } - size_type bytes = 0; - if (right_offset > left_offset) bytes = right_offset - left_offset; - if (Pass == ExecuteOp) memcpy(d_chars + d_offsets[idx], d_str.data() + left_offset, bytes); - return bytes; + + auto const bytes = (right_offset > left_offset) ? right_offset - left_offset : 0; + if (d_chars) + memcpy(d_chars + d_offsets[idx], d_str.data() + left_offset, bytes); + else + d_offsets[idx] = bytes; } }; @@ -110,42 +101,22 @@ std::unique_ptr strip( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { - auto strings_count = strings.size(); - if (strings_count == 0) return detail::make_empty_strings_column(stream, mr); + if (strings.is_empty()) return detail::make_empty_strings_column(stream, mr); CUDF_EXPECTS(to_strip.is_valid(), "Parameter to_strip must be valid"); - string_view d_to_strip(to_strip.data(), to_strip.size()); - - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - size_type null_count = strings.null_count(); - - // copy null mask - rmm::device_buffer null_mask = cudf::detail::copy_bitmask(strings.parent(), stream, mr); - - // build offsets column -- calculate the size of each output string - auto offsets_transformer_itr = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), strip_fn{d_column, stype, d_to_strip}); - auto offsets_column = make_offsets_child_column( - offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr); - auto offsets_view = offsets_column->view(); - auto d_offsets = offsets_view.data(); - - // build the chars column -- convert characters based on case_flag parameter - size_type bytes = thrust::device_pointer_cast(d_offsets)[strings_count]; - auto chars_column = create_chars_child_column(strings_count, null_count, bytes, stream, mr); - auto chars_view = chars_column->mutable_view(); - auto d_chars = chars_view.data(); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - strings_count, - strip_fn{d_column, stype, d_to_strip, d_offsets, d_chars}); - - return make_strings_column(strings_count, - std::move(offsets_column), - std::move(chars_column), - null_count, - std::move(null_mask), + string_view const d_to_strip(to_strip.data(), to_strip.size()); + + auto const d_column = column_device_view::create(strings.parent(), stream); + + // this utility calls the strip_fn to build the offsets and chars columns + auto children = cudf::strings::detail::make_strings_children( + strip_fn{*d_column, stype, d_to_strip}, strings.size(), strings.null_count(), stream, mr); + + return make_strings_column(strings.size(), + std::move(children.first), + std::move(children.second), + strings.null_count(), + cudf::detail::copy_bitmask(strings.parent(), stream, mr), stream, mr); }