Skip to content

Commit

Permalink
Add gbenchmark for nvtext ngrams functions (#7693)
Browse files Browse the repository at this point in the history
Reference #5696
Creates a gbenchmark for `nvtext::generate_ngrams()` and `nvtext::generate_character_ngrams()` functions.
The benchmarks measures various string lengths and number of rows.
The `nvtext::generate_ngrams()` was refactored to use the more efficient `make_strings_children` which improved its performance by about 50%.

Authors:
  - David (@davidwendt)

Approvers:
  - Nghia Truong (@ttnghia)
  - Mark Harris (@harrism)

URL: #7693
  • Loading branch information
davidwendt authored Mar 29, 2021
1 parent fe7ec85 commit d9103c4
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 27 deletions.
1 change: 1 addition & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ ConfigureBench(BINARYOP_BENCH binaryop/binaryop_benchmark.cu)
###################################################################################################
# - nvtext benchmark -------------------------------------------------------------------
ConfigureBench(TEXT_BENCH
text/ngrams_benchmark.cpp
text/normalize_benchmark.cpp
text/normalize_spaces_benchmark.cpp
text/replace_benchmark.cpp
Expand Down
76 changes: 76 additions & 0 deletions cpp/benchmarks/text/ngrams_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmark/benchmark.h>
#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/string/string_bench_args.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf_test/base_fixture.hpp>

#include <nvtext/generate_ngrams.hpp>

class TextNGrams : public cudf::benchmark {
};

enum class ngrams_type { tokens, characters };

static void BM_ngrams(benchmark::State& state, ngrams_type nt)
{
auto const n_rows = static_cast<cudf::size_type>(state.range(0));
auto const max_str_length = static_cast<cudf::size_type>(state.range(1));
data_profile table_profile;
table_profile.set_distribution_params(
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
auto const table =
create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile);
cudf::strings_column_view input(table->view().column(0));

for (auto _ : state) {
cuda_event_timer raii(state, true, 0);
switch (nt) {
case ngrams_type::tokens: nvtext::generate_ngrams(input); break;
case ngrams_type::characters: nvtext::generate_character_ngrams(input); break;
}
}

state.SetBytesProcessed(state.iterations() * input.chars_size());
}

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 12;
int const max_rows = 1 << 24;
int const row_mult = 8;
int const min_rowlen = 5;
int const max_rowlen = 40;
int const len_mult = 2;
generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult);
}

#define NVTEXT_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(TextNGrams, name) \
(::benchmark::State & st) { BM_ngrams(st, ngrams_type::name); } \
BENCHMARK_REGISTER_F(TextNGrams, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

NVTEXT_BENCHMARK_DEFINE(tokens)
NVTEXT_BENCHMARK_DEFINE(characters)
37 changes: 10 additions & 27 deletions cpp/src/text/generate_ngrams.cu
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ struct ngram_generator_fn {
cudf::column_device_view const d_strings;
cudf::size_type ngrams;
cudf::string_view const d_separator;
int32_t const* d_offsets{};
int32_t* d_offsets{};
char* d_chars{};

/**
Expand All @@ -62,7 +62,7 @@ struct ngram_generator_fn {
* @param idx Index of the kernel thread.
* @return Number of bytes required for the string for this thread.
*/
__device__ cudf::size_type operator()(cudf::size_type idx)
__device__ void operator()(cudf::size_type idx)
{
char* out_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr;
cudf::size_type bytes = 0;
Expand All @@ -74,7 +74,7 @@ struct ngram_generator_fn {
bytes += d_separator.size_bytes();
if (out_ptr) out_ptr = cudf::strings::detail::copy_string(out_ptr, d_separator);
}
return bytes;
if (!d_chars) d_offsets[idx] = bytes;
}
};

Expand Down Expand Up @@ -109,11 +109,11 @@ std::unique_ptr<cudf::column> generate_ngrams(
if (d_strings.is_null(idx)) return false;
return !d_strings.element<cudf::string_view>(idx).empty();
},
stream,
mr)
stream)
->release();
strings_count = table_offsets.front()->size() - 1;
return std::move(table_offsets.front());
auto result = std::move(table_offsets.front());
return result;
}(); // this allows freeing the temporary table_offsets

CUDF_EXPECTS(strings_count >= ngrams, "Insufficient number of strings to generate ngrams");
Expand All @@ -131,30 +131,13 @@ std::unique_ptr<cudf::column> generate_ngrams(
// compute the number of strings of ngrams
auto const ngrams_count = strings_count - ngrams + 1;

// build output offsets by computing the output bytes for each generated ngram
auto offsets_transformer_itr = cudf::detail::make_counting_transform_iterator(
0, ngram_generator_fn{d_strings, ngrams, d_separator});
auto offsets_column = cudf::strings::detail::make_offsets_child_column(
offsets_transformer_itr, offsets_transformer_itr + ngrams_count, stream, mr);
auto d_offsets = offsets_column->view().data<int32_t>();

// build the chars column
// generate the ngrams from the input strings and copy them into the chars data buffer
cudf::size_type const total_bytes = thrust::device_pointer_cast(d_offsets)[ngrams_count];
auto chars_column =
cudf::strings::detail::create_chars_child_column(ngrams_count, 0, total_bytes, stream, mr);
char* const d_chars = chars_column->mutable_view().data<char>();

thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<cudf::size_type>(0),
ngrams_count,
ngram_generator_fn{d_strings, ngrams, d_separator, d_offsets, d_chars});
chars_column->set_null_count(0);
auto children = cudf::strings::detail::make_strings_children(
ngram_generator_fn{d_strings, ngrams, d_separator}, ngrams_count, 0, stream, mr);

// make the output strings column from the offsets and chars column
return cudf::make_strings_column(ngrams_count,
std::move(offsets_column),
std::move(chars_column),
std::move(children.first),
std::move(children.second),
0,
rmm::device_buffer{0, stream, mr},
stream,
Expand Down

0 comments on commit d9103c4

Please sign in to comment.