diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 43ca6de11b4..5aa7e0132f8 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -175,6 +175,7 @@ ConfigureBench(BINARYOP_BENCH binaryop/binaryop_benchmark.cu) ################################################################################################### # - nvtext benchmark ------------------------------------------------------------------- ConfigureBench(TEXT_BENCH + text/ngrams_benchmark.cpp text/normalize_benchmark.cpp text/normalize_spaces_benchmark.cpp text/replace_benchmark.cpp diff --git a/cpp/benchmarks/text/ngrams_benchmark.cpp b/cpp/benchmarks/text/ngrams_benchmark.cpp new file mode 100644 index 00000000000..1fe8e3b7f2e --- /dev/null +++ b/cpp/benchmarks/text/ngrams_benchmark.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +class TextNGrams : public cudf::benchmark { +}; + +enum class ngrams_type { tokens, characters }; + +static void BM_ngrams(benchmark::State& state, ngrams_type nt) +{ + auto const n_rows = static_cast(state.range(0)); + auto const max_str_length = static_cast(state.range(1)); + data_profile table_profile; + table_profile.set_distribution_params( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const table = + create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + cudf::strings_column_view input(table->view().column(0)); + + for (auto _ : state) { + cuda_event_timer raii(state, true, 0); + switch (nt) { + case ngrams_type::tokens: nvtext::generate_ngrams(input); break; + case ngrams_type::characters: nvtext::generate_character_ngrams(input); break; + } + } + + state.SetBytesProcessed(state.iterations() * input.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_mult = 8; + int const min_rowlen = 5; + int const max_rowlen = 40; + int const len_mult = 2; + generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); +} + +#define NVTEXT_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(TextNGrams, name) \ + (::benchmark::State & st) { BM_ngrams(st, ngrams_type::name); } \ + BENCHMARK_REGISTER_F(TextNGrams, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +NVTEXT_BENCHMARK_DEFINE(tokens) +NVTEXT_BENCHMARK_DEFINE(characters) diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu index 3c583622ed8..4a41dacbd30 100644 --- a/cpp/src/text/generate_ngrams.cu +++ b/cpp/src/text/generate_ngrams.cu @@ -50,7 +50,7 @@ struct ngram_generator_fn { cudf::column_device_view const d_strings; cudf::size_type ngrams; cudf::string_view const d_separator; - int32_t const* d_offsets{}; + int32_t* d_offsets{}; char* d_chars{}; /** @@ -62,7 +62,7 @@ struct ngram_generator_fn { * @param idx Index of the kernel thread. * @return Number of bytes required for the string for this thread. */ - __device__ cudf::size_type operator()(cudf::size_type idx) + __device__ void operator()(cudf::size_type idx) { char* out_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr; cudf::size_type bytes = 0; @@ -74,7 +74,7 @@ struct ngram_generator_fn { bytes += d_separator.size_bytes(); if (out_ptr) out_ptr = cudf::strings::detail::copy_string(out_ptr, d_separator); } - return bytes; + if (!d_chars) d_offsets[idx] = bytes; } }; @@ -109,11 +109,11 @@ std::unique_ptr generate_ngrams( if (d_strings.is_null(idx)) return false; return !d_strings.element(idx).empty(); }, - stream, - mr) + stream) ->release(); strings_count = table_offsets.front()->size() - 1; - return std::move(table_offsets.front()); + auto result = std::move(table_offsets.front()); + return result; }(); // this allows freeing the temporary table_offsets CUDF_EXPECTS(strings_count >= ngrams, "Insufficient number of strings to generate ngrams"); @@ -131,30 +131,13 @@ std::unique_ptr generate_ngrams( // compute the number of strings of ngrams auto const ngrams_count = strings_count - ngrams + 1; - // build output offsets by computing the output bytes for each generated ngram - auto offsets_transformer_itr = cudf::detail::make_counting_transform_iterator( - 0, ngram_generator_fn{d_strings, ngrams, d_separator}); - auto offsets_column = cudf::strings::detail::make_offsets_child_column( - offsets_transformer_itr, offsets_transformer_itr + ngrams_count, stream, mr); - auto d_offsets = offsets_column->view().data(); - - // build the chars column - // generate the ngrams from the input strings and copy them into the chars data buffer - cudf::size_type const total_bytes = thrust::device_pointer_cast(d_offsets)[ngrams_count]; - auto chars_column = - cudf::strings::detail::create_chars_child_column(ngrams_count, 0, total_bytes, stream, mr); - char* const d_chars = chars_column->mutable_view().data(); - - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - ngrams_count, - ngram_generator_fn{d_strings, ngrams, d_separator, d_offsets, d_chars}); - chars_column->set_null_count(0); + auto children = cudf::strings::detail::make_strings_children( + ngram_generator_fn{d_strings, ngrams, d_separator}, ngrams_count, 0, stream, mr); // make the output strings column from the offsets and chars column return cudf::make_strings_column(ngrams_count, - std::move(offsets_column), - std::move(chars_column), + std::move(children.first), + std::move(children.second), 0, rmm::device_buffer{0, stream, mr}, stream,