diff --git a/cpp/include/nvtext/detail/tokenize.hpp b/cpp/include/nvtext/detail/tokenize.hpp index 9c1cdbd6310..38b49e63590 100644 --- a/cpp/include/nvtext/detail/tokenize.hpp +++ b/cpp/include/nvtext/detail/tokenize.hpp @@ -35,12 +35,10 @@ namespace detail { * @param mr Device memory resource used to allocate the returned column's device memory. * @return New strings columns of tokens. */ -std::unique_ptr tokenize( - cudf::strings_column_view const& strings, - cudf::string_scalar const& delimiter = cudf::string_scalar{""}, - // Move before delimiter? - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr tokenize(cudf::strings_column_view const& strings, + cudf::string_scalar const& delimiter, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc nvtext::tokenize(strings_column_view const&,strings_column_view @@ -52,11 +50,10 @@ std::unique_ptr tokenize( * @param mr Device memory resource used to allocate the returned column's device memory. * @return New strings columns of tokens. */ -std::unique_ptr tokenize( - cudf::strings_column_view const& strings, - cudf::strings_column_view const& delimiters, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr tokenize(cudf::strings_column_view const& strings, + cudf::strings_column_view const& delimiters, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc nvtext::count_tokens(strings_column_view const&, string_scalar @@ -69,12 +66,10 @@ std::unique_ptr tokenize( * @param mr Device memory resource used to allocate the returned column's device memory. * @return New INT32 column of token counts. */ -std::unique_ptr count_tokens( - cudf::strings_column_view const& strings, - cudf::string_scalar const& delimiter = cudf::string_scalar{""}, - // Move before delimiter? - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr count_tokens(cudf::strings_column_view const& strings, + cudf::string_scalar const& delimiter, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @copydoc nvtext::count_tokens(strings_column_view const&,strings_column_view @@ -86,11 +81,10 @@ std::unique_ptr count_tokens( * @param mr Device memory resource used to allocate the returned column's device memory. * @return New INT32 column of token counts. */ -std::unique_ptr count_tokens( - cudf::strings_column_view const& strings, - cudf::strings_column_view const& delimiters, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr count_tokens(cudf::strings_column_view const& strings, + cudf::strings_column_view const& delimiters, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace nvtext diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu index d5ff7b99344..be50ece28d5 100644 --- a/cpp/src/text/generate_ngrams.cu +++ b/cpp/src/text/generate_ngrams.cu @@ -84,12 +84,11 @@ struct ngram_generator_fn { } // namespace -std::unique_ptr generate_ngrams( - cudf::strings_column_view const& strings, - cudf::size_type ngrams = 2, - cudf::string_scalar const& separator = cudf::string_scalar{"_"}, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr generate_ngrams(cudf::strings_column_view const& strings, + cudf::size_type ngrams, + cudf::string_scalar const& separator, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(separator.is_valid(stream), "Parameter separator must be valid"); cudf::string_view const d_separator(separator.data(), separator.size()); diff --git a/cpp/src/text/ngrams_tokenize.cu b/cpp/src/text/ngrams_tokenize.cu index b0071ed9e88..f1ddcfdc6f8 100644 --- a/cpp/src/text/ngrams_tokenize.cu +++ b/cpp/src/text/ngrams_tokenize.cu @@ -134,13 +134,12 @@ struct ngram_builder_fn { // detail APIs -std::unique_ptr ngrams_tokenize( - cudf::strings_column_view const& strings, - cudf::size_type ngrams = 2, - cudf::string_scalar const& delimiter = cudf::string_scalar(""), - cudf::string_scalar const& separator = cudf::string_scalar{"_"}, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr ngrams_tokenize(cudf::strings_column_view const& strings, + cudf::size_type ngrams, + cudf::string_scalar const& delimiter, + cudf::string_scalar const& separator, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(delimiter.is_valid(stream), "Parameter delimiter must be valid"); cudf::string_view d_delimiter(delimiter.data(), delimiter.size()); diff --git a/cpp/src/text/normalize.cu b/cpp/src/text/normalize.cu index 2d5dd0ebbf8..2931370ac02 100644 --- a/cpp/src/text/normalize.cu +++ b/cpp/src/text/normalize.cu @@ -170,10 +170,9 @@ struct codepoint_to_utf8_fn { } // namespace // detail API -std::unique_ptr normalize_spaces( - cudf::strings_column_view const& strings, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr normalize_spaces(cudf::strings_column_view const& strings, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { if (strings.is_empty()) return cudf::make_empty_column(cudf::data_type{cudf::type_id::STRING});