Skip to content

Commit

Permalink
Add gbenchmarks for strings filter functions (#7438)
Browse files Browse the repository at this point in the history
Reference #5698
This creates a gbenchmark for the `cudf::strings::filter_characters`, `cudf::strings::filter_characters_of_type`,  and `cudf::strings::strip` functions.

This PR also includes changes to `strip.cu` and `filter_chars` to use the more efficient `make_strings_children` utility. This improved performance on these functions by 2x on average.

Authors:
  - David (@davidwendt)

Approvers:
  - Keith Kraus (@kkraus14)
  - Conor Hoekstra (@codereport)
  - Karthikeyan (@karthikeyann)

URL: #7438
  • Loading branch information
davidwendt authored Feb 26, 2021
1 parent 6b579c7 commit 862559f
Show file tree
Hide file tree
Showing 4 changed files with 178 additions and 119 deletions.
1 change: 1 addition & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ set(STRINGS_BENCH_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/string/contains_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/string/convert_durations_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/string/copy_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/string/filter_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/string/find_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/string/replace_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/string/split_benchmark.cpp"
Expand Down
93 changes: 93 additions & 0 deletions cpp/benchmarks/string/filter_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmark/benchmark.h>
#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/char_types/char_types.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/strings/strip.hpp>
#include <cudf/strings/translate.hpp>
#include <cudf_test/column_wrapper.hpp>

#include <limits>
#include <vector>

enum FilterAPI { filter, filter_chars, strip };

class StringFilterChars : public cudf::benchmark {
};

static void BM_filter_chars(benchmark::State& state, FilterAPI api)
{
cudf::size_type const n_rows{static_cast<cudf::size_type>(state.range(0))};
cudf::size_type const max_str_length{static_cast<cudf::size_type>(state.range(1))};
data_profile table_profile;
table_profile.set_distribution_params(
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
auto const table =
create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile);
cudf::strings_column_view input(table->view().column(0));

auto const types = cudf::strings::string_character_types::SPACE;
std::vector<std::pair<cudf::char_utf8, cudf::char_utf8>> filter_table{
{cudf::char_utf8{'a'}, cudf::char_utf8{'c'}}};

for (auto _ : state) {
cuda_event_timer raii(state, true, 0);
switch (api) {
case filter: cudf::strings::filter_characters_of_type(input, types); break;
case filter_chars: cudf::strings::filter_characters(input, filter_table); break;
case strip: cudf::strings::strip(input); break;
}
}

state.SetBytesProcessed(state.iterations() * input.chars_size());
}

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 12;
int const max_rows = 1 << 24;
int const row_mult = 8;
int const min_rowlen = 1 << 5;
int const max_rowlen = 1 << 13;
int const len_mult = 4;
for (int row_count = min_rows; row_count <= max_rows; row_count *= row_mult) {
for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) {
// avoid generating combinations that exceed the cudf column limit
size_t total_chars = static_cast<size_t>(row_count) * rowlen;
if (total_chars < std::numeric_limits<cudf::size_type>::max()) {
b->Args({row_count, rowlen});
}
}
}
}

#define STRINGS_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(StringFilterChars, name) \
(::benchmark::State & st) { BM_filter_chars(st, FilterAPI::name); } \
BENCHMARK_REGISTER_F(StringFilterChars, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

STRINGS_BENCHMARK_DEFINE(filter)
STRINGS_BENCHMARK_DEFINE(filter_chars)
STRINGS_BENCHMARK_DEFINE(strip)
80 changes: 37 additions & 43 deletions cpp/src/strings/filter_chars.cu
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ namespace {
struct filter_fn {
column_device_view const d_strings;
filter_type keep_characters;
rmm::device_vector<char_range>::iterator table_begin;
rmm::device_vector<char_range>::iterator table_end;
rmm::device_uvector<char_range>::iterator table_begin;
rmm::device_uvector<char_range>::iterator table_end;
string_view const d_replacement;
int32_t const* d_offsets{};
int32_t* d_offsets{};
char* d_chars{};

/**
Expand All @@ -78,23 +78,28 @@ struct filter_fn {
* This is also used to calculate the size of the output.
*
* @param idx Index of the current string to process.
* @return The size of the output for this string.
*/
__device__ size_type operator()(size_type idx)
__device__ void operator()(size_type idx)
{
if (d_strings.is_null(idx)) return 0;
string_view d_str = d_strings.element<string_view>(idx);
size_type nbytes = d_str.size_bytes();
auto const in_ptr = d_str.data();
auto out_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr;
if (d_strings.is_null(idx)) {
if (!d_chars) d_offsets[idx] = 0;
return;
}
auto const d_str = d_strings.element<string_view>(idx);

auto nbytes = d_str.size_bytes();
auto out_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr;
for (auto itr = d_str.begin(); itr != d_str.end(); ++itr) {
auto const char_size = bytes_in_char_utf8(*itr);
string_view const d_newchar =
remove_char(*itr) ? d_replacement : string_view(in_ptr + itr.byte_offset(), char_size);
nbytes += d_newchar.size_bytes() - char_size;
if (out_ptr) out_ptr = cudf::strings::detail::copy_string(out_ptr, d_newchar);
auto const char_size = bytes_in_char_utf8(*itr);
string_view const d_newchar = remove_char(*itr)
? d_replacement
: string_view(d_str.data() + itr.byte_offset(), char_size);
if (out_ptr)
out_ptr = cudf::strings::detail::copy_string(out_ptr, d_newchar);
else
nbytes += d_newchar.size_bytes() - char_size;
}
return nbytes;
if (!out_ptr) d_offsets[idx] = nbytes;
}
};

Expand Down Expand Up @@ -123,36 +128,25 @@ std::unique_ptr<column> filter_characters(
characters_to_filter.begin(), characters_to_filter.end(), htable.begin(), [](auto entry) {
return char_range{entry.first, entry.second};
});
rmm::device_vector<char_range> table(htable); // copy filter table to device memory

auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_strings = *strings_column;

// create null mask
rmm::device_buffer null_mask = cudf::detail::copy_bitmask(strings.parent(), stream, mr);

// create offsets column
filter_fn ffn{d_strings, keep_characters, table.begin(), table.end(), d_replacement};
auto offsets_transformer_itr = cudf::detail::make_counting_transform_iterator(0, ffn);
auto offsets_column = make_offsets_child_column(
offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr);
ffn.d_offsets = offsets_column->view().data<int32_t>();

// build chars column
size_type bytes = cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = strings::detail::create_chars_child_column(
strings_count, strings.null_count(), bytes, stream, mr);
ffn.d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<cudf::size_type>(0),
strings_count,
ffn);
rmm::device_uvector<char_range> table(table_size, stream);
CUDA_TRY(cudaMemcpyAsync(table.data(),
htable.data(),
table_size * sizeof(char_range),
cudaMemcpyHostToDevice,
stream.value()));

auto d_strings = column_device_view::create(strings.parent(), stream);

// this utility calls the strip_fn to build the offsets and chars columns
filter_fn ffn{*d_strings, keep_characters, table.begin(), table.end(), d_replacement};
auto children = cudf::strings::detail::make_strings_children(
ffn, strings.size(), strings.null_count(), stream, mr);

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
std::move(children.first),
std::move(children.second),
strings.null_count(),
std::move(null_mask),
cudf::detail::copy_bitmask(strings.parent(), stream, mr),
stream,
mr);
}
Expand Down
123 changes: 47 additions & 76 deletions cpp/src/strings/strip.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -35,16 +35,6 @@ namespace cudf {
namespace strings {
namespace detail {
namespace {
/**
* @brief Used as template parameter to divide size calculation from
* the actual string operation within a function.
*
* Useful when most of the logic is identical for both passes.
*/
enum TwoPass {
SizeOnly = 0, ///< calculate the size only
ExecuteOp ///< run the string operation
};

/**
* @brief Strip characters from the beginning and/or end of a string.
Expand All @@ -53,51 +43,52 @@ enum TwoPass {
* of any characters found in d_to_strip or whitespace if
* d_to_strip is empty.
*
* @tparam Pass Allows computing only the size of the output
* or writing the output to device memory.
*/
template <TwoPass Pass = SizeOnly>
struct strip_fn {
column_device_view const d_strings;
strip_type stype; // right, left, or both
string_view d_to_strip;
int32_t const* d_offsets{};
strip_type const stype; // right, left, or both
string_view const d_to_strip;
int32_t* d_offsets{};
char* d_chars{};

__device__ bool is_strip_character(char_utf8 chr)
{
return d_to_strip.empty() ? (chr <= ' ') : // whitespace check
thrust::any_of(
thrust::seq, d_to_strip.begin(), d_to_strip.end(), [chr] __device__(char_utf8 c) {
return c == chr;
});
}

__device__ size_type operator()(size_type idx)
__device__ void operator()(size_type idx)
{
if (d_strings.is_null(idx)) return 0;
string_view d_str = d_strings.element<string_view>(idx);
size_type length = d_str.length();
size_type left_offset = 0;
auto itr = d_str.begin();
if (stype == strip_type::LEFT || stype == strip_type::BOTH) {
for (; itr != d_str.end();) {
if (!is_strip_character(*itr++)) break;
left_offset = itr.byte_offset();
}
if (d_strings.is_null(idx)) {
if (!d_chars) d_offsets[idx] = 0;
return;
}
auto const d_str = d_strings.element<string_view>(idx);

auto is_strip_character = [d_to_strip = d_to_strip] __device__(char_utf8 chr) -> bool {
return d_to_strip.empty() ? (chr <= ' ') : // whitespace check
thrust::any_of(
thrust::seq, d_to_strip.begin(), d_to_strip.end(), [chr] __device__(char_utf8 c) {
return c == chr;
});
};

size_type const left_offset = [&] {
if (stype != strip_type::LEFT && stype != strip_type::BOTH) return 0;
auto const itr =
thrust::find_if_not(thrust::seq, d_str.begin(), d_str.end(), is_strip_character);
return itr != d_str.end() ? itr.byte_offset() : d_str.size_bytes();
}();

size_type right_offset = d_str.size_bytes();
if (stype == strip_type::RIGHT || stype == strip_type::BOTH) {
itr = d_str.end();
auto const length = d_str.length();
auto itr = d_str.end();
for (size_type n = 0; n < length; ++n) {
if (!is_strip_character(*(--itr))) break;
right_offset = itr.byte_offset();
}
}
size_type bytes = 0;
if (right_offset > left_offset) bytes = right_offset - left_offset;
if (Pass == ExecuteOp) memcpy(d_chars + d_offsets[idx], d_str.data() + left_offset, bytes);
return bytes;

auto const bytes = (right_offset > left_offset) ? right_offset - left_offset : 0;
if (d_chars)
memcpy(d_chars + d_offsets[idx], d_str.data() + left_offset, bytes);
else
d_offsets[idx] = bytes;
}
};

Expand All @@ -110,42 +101,22 @@ std::unique_ptr<column> strip(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
auto strings_count = strings.size();
if (strings_count == 0) return detail::make_empty_strings_column(stream, mr);
if (strings.is_empty()) return detail::make_empty_strings_column(stream, mr);

CUDF_EXPECTS(to_strip.is_valid(), "Parameter to_strip must be valid");
string_view d_to_strip(to_strip.data(), to_strip.size());

auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_column = *strings_column;
size_type null_count = strings.null_count();

// copy null mask
rmm::device_buffer null_mask = cudf::detail::copy_bitmask(strings.parent(), stream, mr);

// build offsets column -- calculate the size of each output string
auto offsets_transformer_itr = thrust::make_transform_iterator(
thrust::make_counting_iterator<size_type>(0), strip_fn<SizeOnly>{d_column, stype, d_to_strip});
auto offsets_column = make_offsets_child_column(
offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr);
auto offsets_view = offsets_column->view();
auto d_offsets = offsets_view.data<int32_t>();

// build the chars column -- convert characters based on case_flag parameter
size_type bytes = thrust::device_pointer_cast(d_offsets)[strings_count];
auto chars_column = create_chars_child_column(strings_count, null_count, bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
strings_count,
strip_fn<ExecuteOp>{d_column, stype, d_to_strip, d_offsets, d_chars});

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
null_count,
std::move(null_mask),
string_view const d_to_strip(to_strip.data(), to_strip.size());

auto const d_column = column_device_view::create(strings.parent(), stream);

// this utility calls the strip_fn to build the offsets and chars columns
auto children = cudf::strings::detail::make_strings_children(
strip_fn{*d_column, stype, d_to_strip}, strings.size(), strings.null_count(), stream, mr);

return make_strings_column(strings.size(),
std::move(children.first),
std::move(children.second),
strings.null_count(),
cudf::detail::copy_bitmask(strings.parent(), stream, mr),
stream,
mr);
}
Expand Down

0 comments on commit 862559f

Please sign in to comment.