diff --git a/cpp/include/cudf/strings/detail/scatter.cuh b/cpp/include/cudf/strings/detail/scatter.cuh index b6aa22cc316..f167206f36b 100644 --- a/cpp/include/cudf/strings/detail/scatter.cuh +++ b/cpp/include/cudf/strings/detail/scatter.cuh @@ -15,14 +15,13 @@ */ #pragma once -#include -#include -#include -#include +#include #include #include +#include #include +#include #include #include @@ -71,17 +70,9 @@ std::unique_ptr scatter( // do the scatter thrust::scatter(rmm::exec_policy(stream), begin, end, scatter_map, target_vector.begin()); - // build offsets column - auto offsets_column = child_offsets_from_string_vector(target_vector, stream, mr); - // build chars column - auto chars_column = - child_chars_from_string_vector(target_vector, offsets_column->view(), stream, mr); - - return make_strings_column(target.size(), - std::move(offsets_column), - std::move(chars_column), - UNKNOWN_NULL_COUNT, - cudf::detail::copy_bitmask(target.parent(), stream, mr)); + // build the output column + auto sv_span = cudf::device_span(target_vector); + return make_strings_column(sv_span, string_view{nullptr, 0}, stream, mr); } } // namespace detail diff --git a/cpp/include/cudf/strings/detail/utilities.cuh b/cpp/include/cudf/strings/detail/utilities.cuh index b9ea2d9ecff..bb7f29a4172 100644 --- a/cpp/include/cudf/strings/detail/utilities.cuh +++ b/cpp/include/cudf/strings/detail/utilities.cuh @@ -71,28 +71,6 @@ std::unique_ptr make_offsets_child_column( return offsets_column; } -/** - * @brief Creates an offsets column from a string_view iterator, and size. - * - * @tparam Iter Iterator type that returns string_view instances - * @param strings_begin Iterator to the beginning of the string_view sequence - * @param num_strings The number of string_view instances in the sequence - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return Child offsets column - */ -template -std::unique_ptr child_offsets_from_string_iterator( - Iter strings_begin, - cudf::size_type num_strings, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) -{ - auto transformer = [] __device__(string_view v) { return v.size_bytes(); }; - auto begin = thrust::make_transform_iterator(strings_begin, transformer); - return make_offsets_child_column(begin, begin + num_strings, stream, mr); -} - /** * @brief Copies input string data into a buffer and increments the pointer by the number of bytes * copied. diff --git a/cpp/include/cudf/strings/detail/utilities.hpp b/cpp/include/cudf/strings/detail/utilities.hpp index 6424841ba86..c4f9e547148 100644 --- a/cpp/include/cudf/strings/detail/utilities.hpp +++ b/cpp/include/cudf/strings/detail/utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,36 +45,11 @@ std::unique_ptr create_chars_child_column( * * @param strings Strings column instance. * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned vector's device memory. * @return Device vector of string_views */ rmm::device_uvector create_string_vector_from_column( - cudf::strings_column_view const strings, rmm::cuda_stream_view stream = rmm::cuda_stream_default); - -/** - * @brief Creates an offsets column from a string_view vector. - * - * @param strings Strings input data - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return Child offsets column - */ -std::unique_ptr child_offsets_from_string_vector( - cudf::device_span strings, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Creates a chars column from a string_view vector. - * - * @param strings Strings input data - * @param d_offsets Offsets vector for placing strings into column's memory. - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return Child chars column - */ -std::unique_ptr child_chars_from_string_vector( - cudf::device_span strings, - column_view const& offsets, + cudf::strings_column_view const strings, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/src/lists/copying/scatter_helper.cu b/cpp/src/lists/copying/scatter_helper.cu index adc1b95a9e6..fecf6e1c1a1 100644 --- a/cpp/src/lists/copying/scatter_helper.cu +++ b/cpp/src/lists/copying/scatter_helper.cu @@ -21,8 +21,7 @@ #include #include #include -#include -#include +#include #include #include @@ -253,39 +252,16 @@ struct list_child_constructor { auto lists_column = actual_list_row.get_column(); auto lists_offsets_ptr = lists_column.offsets().template data(); auto child_strings_column = lists_column.child(); - auto string_offsets_ptr = - child_strings_column.child(cudf::strings_column_view::offsets_column_index) - .template data(); - auto string_chars_ptr = - child_strings_column.child(cudf::strings_column_view::chars_column_index) - .template data(); - - auto strings_offset = lists_offsets_ptr[row_index] + intra_index; - auto char_offset = string_offsets_ptr[strings_offset]; - auto char_ptr = string_chars_ptr + char_offset; - auto string_size = - string_offsets_ptr[strings_offset + 1] - string_offsets_ptr[strings_offset]; - return string_view{char_ptr, string_size}; + auto strings_offset = lists_offsets_ptr[row_index] + intra_index; + + return child_strings_column.is_null(strings_offset) + ? string_view{nullptr, 0} + : child_strings_column.template element(strings_offset); }); // string_views should now have been populated with source and target references. - - auto string_offsets = cudf::strings::detail::child_offsets_from_string_iterator( - string_views.begin(), string_views.size(), stream, mr); - - auto string_chars = cudf::strings::detail::child_chars_from_string_vector( - string_views, string_offsets->view(), stream, mr); - auto child_null_mask = - source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable() - ? construct_child_nullmask( - list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr) - : std::make_pair(rmm::device_buffer{}, 0); - - return cudf::make_strings_column(num_child_rows, - std::move(string_offsets), - std::move(string_chars), - child_null_mask.second, // Null count. - std::move(child_null_mask.first)); + auto sv_span = cudf::device_span(string_views); + return cudf::make_strings_column(sv_span, string_view{nullptr, 0}, stream, mr); } /** diff --git a/cpp/src/strings/utilities.cu b/cpp/src/strings/utilities.cu index d7cc72fdfff..a7ef2afb47f 100644 --- a/cpp/src/strings/utilities.cu +++ b/cpp/src/strings/utilities.cu @@ -20,7 +20,6 @@ #include #include -#include #include #include @@ -28,12 +27,8 @@ #include #include -#include #include -#include -#include - -#include +#include namespace cudf { namespace strings { @@ -42,65 +37,27 @@ namespace detail { /** * @copydoc create_string_vector_from_column */ -rmm::device_uvector create_string_vector_from_column(cudf::strings_column_view strings, - rmm::cuda_stream_view stream) -{ - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - - rmm::device_uvector strings_vector(strings.size(), stream); - string_view* d_strings = strings_vector.data(); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - strings.size(), - [d_column, d_strings] __device__(size_type idx) { - if (d_column.is_null(idx)) - d_strings[idx] = string_view(nullptr, 0); - else - d_strings[idx] = d_column.element(idx); - }); - return strings_vector; -} - -/** - * @copydoc child_offsets_from_string_vector - */ -std::unique_ptr child_offsets_from_string_vector( - cudf::device_span strings, +rmm::device_uvector create_string_vector_from_column( + cudf::strings_column_view const input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return child_offsets_from_string_iterator(strings.begin(), strings.size(), stream, mr); -} + auto d_strings = column_device_view::create(input.parent(), stream); -/** - * @copydoc child_chars_from_string_vector - */ -std::unique_ptr child_chars_from_string_vector(cudf::device_span strings, - column_view const& offsets, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - auto const d_strings = strings.data(); - auto const bytes = cudf::detail::get_value(offsets, strings.size(), stream); - auto const d_offsets = offsets.data(); - - // create column - auto chars_column = create_chars_child_column(bytes, stream, mr); - // get it's view - auto d_chars = chars_column->mutable_view().data(); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - strings.size(), - [d_strings, d_offsets, d_chars] __device__(size_type idx) { - string_view const d_str = d_strings[idx]; - memcpy(d_chars + d_offsets[idx], d_str.data(), d_str.size_bytes()); - }); - - return chars_column; + auto strings_vector = rmm::device_uvector(input.size(), stream, mr); + + thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input.size()), + strings_vector.begin(), + [d_strings = *d_strings] __device__(size_type idx) { + return d_strings.is_null(idx) ? string_view{nullptr, 0} : d_strings.element(idx); + }); + + return strings_vector; } -// std::unique_ptr create_chars_child_column(cudf::size_type total_bytes, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr)