From 8cd927f3736424bcc3b3d30c76f6f4c64fef441d Mon Sep 17 00:00:00 2001 From: David <45795991+davidwendt@users.noreply.github.com> Date: Thu, 11 Mar 2021 09:57:38 -0500 Subject: [PATCH] Change device_vector to device_uvector in nvtext source files (#7512) Reference #7287 This PR changes `cpp/src/text/*` source files to use `rmm::device_uvector` instead of `rmm:device_vector`. This allows keeping the memory operations on the provided kernel stream. Authors: - David (@davidwendt) Approvers: - Paul Taylor (@trxcllnt) - Mark Harris (@harrism) URL: https://github.com/rapidsai/cudf/pull/7512 --- cpp/src/text/generate_ngrams.cu | 14 ++++--------- cpp/src/text/ngrams_tokenize.cu | 35 +++++++++++++++++---------------- cpp/src/text/tokenize.cu | 9 ++++++--- 3 files changed, 28 insertions(+), 30 deletions(-) diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu index 79154232394..3c583622ed8 100644 --- a/cpp/src/text/generate_ngrams.cu +++ b/cpp/src/text/generate_ngrams.cu @@ -221,7 +221,7 @@ std::unique_ptr generate_character_ngrams(cudf::strings_column_vie auto const d_strings = *strings_column; // create a vector of ngram offsets for each string - rmm::device_vector ngram_offsets(strings_count + 1); + rmm::device_uvector ngram_offsets(strings_count + 1, stream); thrust::transform_exclusive_scan( rmm::exec_policy(stream), thrust::make_counting_iterator(0), @@ -235,14 +235,8 @@ std::unique_ptr generate_character_ngrams(cudf::strings_column_vie cudf::size_type{0}, thrust::plus()); - // total count is the last entry - auto const d_ngram_offsets = ngram_offsets.data().get(); - cudf::size_type total_ngrams = 0; - CUDA_TRY(cudaMemcpyAsync(&total_ngrams, - d_ngram_offsets + strings_count, - sizeof(cudf::size_type), - cudaMemcpyDeviceToHost, - stream.value())); + // total ngrams count is the last entry + cudf::size_type const total_ngrams = ngram_offsets.back_element(stream); CUDF_EXPECTS(total_ngrams > 0, "Insufficient number of characters in each string to generate ngrams"); @@ -254,7 +248,7 @@ std::unique_ptr generate_character_ngrams(cudf::strings_column_vie mr); auto d_offsets = offsets_column->mutable_view().data(); // compute the size of each ngram -- output goes in d_offsets - character_ngram_generator_fn generator{d_strings, ngrams, d_ngram_offsets, d_offsets}; + character_ngram_generator_fn generator{d_strings, ngrams, ngram_offsets.data(), d_offsets}; thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), strings_count, diff --git a/cpp/src/text/ngrams_tokenize.cu b/cpp/src/text/ngrams_tokenize.cu index 18bc86f6478..96b06e7a1eb 100644 --- a/cpp/src/text/ngrams_tokenize.cu +++ b/cpp/src/text/ngrams_tokenize.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -158,21 +158,22 @@ std::unique_ptr ngrams_tokenize( // first, get the number of tokens per string to get the token-offsets // Ex. token-counts = [3,2]; token-offsets = [0,3,5] - rmm::device_vector token_offsets(strings_count + 1); - auto d_token_offsets = token_offsets.data().get(); + rmm::device_uvector token_offsets(strings_count + 1, stream); + auto d_token_offsets = token_offsets.data(); thrust::transform_inclusive_scan(rmm::exec_policy(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(strings_count), d_token_offsets + 1, strings_tokenizer{d_strings, d_delimiter}, thrust::plus()); - CUDA_TRY(cudaMemsetAsync(d_token_offsets, 0, sizeof(int32_t), stream.value())); - auto total_tokens = token_offsets[strings_count]; // Ex. 5 tokens + int32_t const zero = 0; + token_offsets.set_element_async(0, zero, stream); + auto const total_tokens = token_offsets.back_element(stream); // Ex. 5 tokens // get the token positions (in bytes) per string // Ex. start/end pairs: [(0,1),(2,4),(5,8), (0,2),(3,4)] - rmm::device_vector token_positions(total_tokens); - auto d_token_positions = token_positions.data().get(); + rmm::device_uvector token_positions(total_tokens, stream); + auto d_token_positions = token_positions.data(); thrust::for_each_n( rmm::exec_policy(stream), thrust::make_counting_iterator(0), @@ -181,8 +182,8 @@ std::unique_ptr ngrams_tokenize( // compute the number of ngrams per string to get the total number of ngrams to generate // Ex. ngram-counts = [2,1]; ngram-offsets = [0,2,3]; total = 3 bigrams - rmm::device_vector ngram_offsets(strings_count + 1); - auto d_ngram_offsets = ngram_offsets.data().get(); + rmm::device_uvector ngram_offsets(strings_count + 1, stream); + auto d_ngram_offsets = ngram_offsets.data(); thrust::transform_inclusive_scan( rmm::exec_policy(stream), thrust::make_counting_iterator(0), @@ -193,8 +194,8 @@ std::unique_ptr ngrams_tokenize( return (token_count >= ngrams) ? token_count - ngrams + 1 : 0; }, thrust::plus()); - CUDA_TRY(cudaMemsetAsync(d_ngram_offsets, 0, sizeof(int32_t), stream.value())); - auto total_ngrams = ngram_offsets[strings_count]; + ngram_offsets.set_element_async(0, zero, stream); + auto const total_ngrams = ngram_offsets.back_element(stream); // Compute the total size of the ngrams for each string (not for each ngram) // Ex. 2 bigrams in 1st string total to 10 bytes; 1 bigram in 2nd string is 4 bytes @@ -204,8 +205,8 @@ std::unique_ptr ngrams_tokenize( // ngrams for each string. // Ex. bigram for first string produces 2 bigrams ("a_bb","bb_ccc") which // is built in memory like this: "a_bbbb_ccc" - rmm::device_vector chars_offsets(strings_count + 1); // output memory offsets - auto d_chars_offsets = chars_offsets.data().get(); // per input string + rmm::device_uvector chars_offsets(strings_count + 1, stream); // output memory offsets + auto d_chars_offsets = chars_offsets.data(); // per input string thrust::transform_inclusive_scan( rmm::exec_policy(stream), thrust::make_counting_iterator(0), @@ -213,11 +214,11 @@ std::unique_ptr ngrams_tokenize( d_chars_offsets + 1, ngram_builder_fn{d_strings, d_separator, ngrams, d_token_offsets, d_token_positions}, thrust::plus()); - CUDA_TRY(cudaMemsetAsync(d_chars_offsets, 0, sizeof(int32_t), stream.value())); - auto output_chars_size = chars_offsets[strings_count]; // Ex. 14 output bytes total + chars_offsets.set_element_async(0, zero, stream); + auto const output_chars_size = chars_offsets.back_element(stream); // Ex. 14 output bytes total - rmm::device_vector ngram_sizes(total_ngrams); // size in bytes of each - auto d_ngram_sizes = ngram_sizes.data().get(); // ngram to generate + rmm::device_uvector ngram_sizes(total_ngrams, stream); // size in bytes of each + auto d_ngram_sizes = ngram_sizes.data(); // ngram to generate // build chars column auto chars_column = cudf::strings::detail::create_chars_child_column( diff --git a/cpp/src/text/tokenize.cu b/cpp/src/text/tokenize.cu index 0ba51f7639f..1b7e457367e 100644 --- a/cpp/src/text/tokenize.cu +++ b/cpp/src/text/tokenize.cu @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -27,9 +28,10 @@ #include #include +#include #include -#include +#include #include namespace nvtext { @@ -75,7 +77,8 @@ std::unique_ptr tokenize_fn(cudf::size_type strings_count, d_token_counts.template begin(), d_token_counts.template end(), token_offsets.begin() + 1); - CUDA_TRY(cudaMemsetAsync(token_offsets.data(), 0, sizeof(int32_t), stream.value())); + int32_t const zero = 0; + token_offsets.set_element_async(0, zero, stream); auto const total_tokens = token_offsets.back_element(stream); // build a list of pointers to each token rmm::device_uvector tokens(total_tokens, stream); @@ -87,7 +90,7 @@ std::unique_ptr tokenize_fn(cudf::size_type strings_count, strings_count, tokenizer); // create the strings column using the tokens pointers - return cudf::make_strings_column(tokens, stream, mr); + return cudf::strings::detail::make_strings_column(tokens.begin(), tokens.end(), stream, mr); } } // namespace