From ae8ee8a060f7956cd55cbf060cab4de400efcee5 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 7 Jun 2021 15:26:24 -0700 Subject: [PATCH] Refactor `scatter` for list columns (#8255) This PR refactors `scatter` for `LIST` type columns. Previously there were nested `for_each_n` when constructing child columns. The outer loop loops over the rows and the inner loops over the elements of each row. We can replace these loops with a single `transform` because we already have the offsets information of the column to construct. For each element, we first lookup the `unbound_list_view` it belongs to via binary searching the offset vector. Then the corresponding element to copy from can be retrieved by dereferencing bounded `list_view` with the proper intra index. Struct type refactor is different. Currently the implementation wraps every child in a lists column and dispatch to the list type specialization. This is fine, but the wrapping process now deep copies the list offsets and child column for dispatching. We can simplify it by just wrapping it with a view. Since `scatter.cuh` is included in many other files, separating scatter implementation detail can help reducing compilation time during refactoring the code. Most helper function is moved into `scatter_helper.cu`. Benchmarking code for scattering lists is added. Benchmark snapshot is below: ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------------------------------- ScatterLists/double_type_colesce_o/1024/64/manual_time -0.1073 -0.0926 110648 98781 129731 117724 ScatterLists/double_type_colesce_o/4096/64/manual_time -0.1177 -0.1015 113393 100045 132412 118971 ScatterLists/double_type_colesce_o/32768/64/manual_time -0.3785 -0.3391 167288 103962 185599 122663 ScatterLists/double_type_colesce_o/262144/64/manual_time -0.3175 -0.2834 171123 116785 188191 134865 ScatterLists/double_type_colesce_o/2097152/64/manual_time -0.2581 -0.2426 270225 200472 290363 219934 ScatterLists/double_type_colesce_o/16777216/64/manual_time -0.8464 -0.8438 6205089 953139 6224867 972548 ScatterLists/double_type_colesce_o/33554432/64/manual_time -0.8437 -0.8423 12087712 1889483 12107066 1909170 ScatterLists/double_type_colesce_o/1024/512/manual_time -0.3487 -0.3111 150169 97810 169463 116736 ScatterLists/double_type_colesce_o/4096/512/manual_time -0.3499 -0.3116 151978 98794 170918 117661 ScatterLists/double_type_colesce_o/32768/512/manual_time -0.4337 -0.3901 196663 111364 215048 131162 ScatterLists/double_type_colesce_o/262144/512/manual_time -0.8083 -0.7844 590691 113251 607891 131089 ScatterLists/double_type_colesce_o/2097152/512/manual_time -0.7018 -0.6815 641149 191192 661107 210559 ScatterLists/double_type_colesce_o/16777216/512/manual_time -0.6893 -0.6842 2581320 802057 2601542 821602 ScatterLists/double_type_colesce_o/33554432/512/manual_time -0.8277 -0.8259 9150244 1576769 9169846 1596137 ScatterLists/double_type_colesce_o/1024/2048/manual_time -0.6584 -0.6178 284006 97008 303179 115869 ScatterLists/double_type_colesce_o/4096/2048/manual_time -0.6648 -0.6250 289209 96934 308413 115647 ScatterLists/double_type_colesce_o/32768/2048/manual_time -0.7433 -0.7089 386115 99120 404566 117774 ScatterLists/double_type_colesce_o/262144/2048/manual_time -0.8214 -0.7984 611876 109305 629110 126803 ScatterLists/double_type_colesce_o/2097152/2048/manual_time -0.9107 -0.9024 2098263 187417 2118254 206798 ScatterLists/double_type_colesce_o/16777216/2048/manual_time -0.6869 -0.6816 2527109 791306 2546819 810805 ScatterLists/double_type_colesce_o/33554432/2048/manual_time -0.5102 -0.5070 3018595 1478458 3038315 1497923 ``` Authors: - Michael Wang (https://github.com/isVoid) Approvers: - Robert Maynard (https://github.com/robertmaynard) - AJ Schmidt (https://github.com/ajschmidt8) - David Wendt (https://github.com/davidwendt) - MithunR (https://github.com/mythrocks) URL: https://github.com/rapidsai/cudf/pull/8255 --- conda/recipes/libcudf/meta.yaml | 1 + cpp/CMakeLists.txt | 1 + cpp/benchmarks/CMakeLists.txt | 4 + .../lists/copying/scatter_lists_benchmark.cu | 131 ++++ cpp/include/cudf/lists/detail/scatter.cuh | 627 +----------------- .../cudf/lists/detail/scatter_helper.cuh | 148 +++++ cpp/src/lists/copying/scatter_helper.cu | 511 ++++++++++++++ 7 files changed, 805 insertions(+), 618 deletions(-) create mode 100644 cpp/benchmarks/lists/copying/scatter_lists_benchmark.cu create mode 100644 cpp/include/cudf/lists/detail/scatter_helper.cuh create mode 100644 cpp/src/lists/copying/scatter_helper.cu diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index dc41c439d27..139ceb1d6af 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -141,6 +141,7 @@ test: - test -f $PREFIX/include/cudf/lists/detail/drop_list_duplicates.hpp - test -f $PREFIX/include/cudf/lists/detail/interleave_columns.hpp - test -f $PREFIX/include/cudf/lists/detail/sorting.hpp + - test -f $PREFIX/include/cudf/lists/detail/scatter_helper.cuh - test -f $PREFIX/include/cudf/lists/combine.hpp - test -f $PREFIX/include/cudf/lists/count_elements.hpp - test -f $PREFIX/include/cudf/lists/explode.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 015c856d272..4b2e81edb9d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -274,6 +274,7 @@ add_library(cudf src/lists/copying/copying.cu src/lists/copying/gather.cu src/lists/copying/segmented_gather.cu + src/lists/copying/scatter_helper.cu src/lists/count_elements.cu src/lists/drop_list_duplicates.cu src/lists/explode.cu diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 41b6ddcc2df..e8ccb24f44c 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -67,6 +67,10 @@ ConfigureBench(GATHER_BENCH copying/gather_benchmark.cu) # - scatter benchmark ----------------------------------------------------------------------------- ConfigureBench(SCATTER_BENCH copying/scatter_benchmark.cu) +################################################################################################### +# - lists scatter benchmark ----------------------------------------------------------------------- +ConfigureBench(SCATTER_LISTS_BENCH lists/copying/scatter_lists_benchmark.cu) + ################################################################################################### # - contiguous_split benchmark ------------------------------------------------------------------- ConfigureBench(CONTIGUOUS_SPLIT_BENCH copying/contiguous_split_benchmark.cu) diff --git a/cpp/benchmarks/lists/copying/scatter_lists_benchmark.cu b/cpp/benchmarks/lists/copying/scatter_lists_benchmark.cu new file mode 100644 index 00000000000..49007fda7a3 --- /dev/null +++ b/cpp/benchmarks/lists/copying/scatter_lists_benchmark.cu @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include + +namespace cudf { + +class ScatterLists : public cudf::benchmark { +}; + +template +void BM_lists_scatter(::benchmark::State& state) +{ + auto stream = rmm::cuda_stream_default; + auto mr = rmm::mr::get_current_device_resource(); + + const size_type base_size{(size_type)state.range(0)}; + const size_type num_elements_per_row{(size_type)state.range(1)}; + const size_type num_rows = (size_type)ceil(double(base_size) / num_elements_per_row); + + auto source_base_col = make_fixed_width_column( + data_type{type_to_id()}, base_size, mask_state::UNALLOCATED, stream, mr); + auto target_base_col = make_fixed_width_column( + data_type{type_to_id()}, base_size, mask_state::UNALLOCATED, stream, mr); + thrust::sequence(rmm::exec_policy(stream), + source_base_col->mutable_view().begin(), + source_base_col->mutable_view().end()); + thrust::sequence(rmm::exec_policy(stream), + target_base_col->mutable_view().begin(), + target_base_col->mutable_view().end()); + + auto source_offsets = make_fixed_width_column( + data_type{type_to_id()}, num_rows + 1, mask_state::UNALLOCATED, stream, mr); + auto target_offsets = make_fixed_width_column( + data_type{type_to_id()}, num_rows + 1, mask_state::UNALLOCATED, stream, mr); + + thrust::sequence(rmm::exec_policy(stream), + source_offsets->mutable_view().begin(), + source_offsets->mutable_view().end(), + 0, + num_elements_per_row); + thrust::sequence(rmm::exec_policy(stream), + target_offsets->mutable_view().begin(), + target_offsets->mutable_view().end(), + 0, + num_elements_per_row); + + auto source = make_lists_column(num_rows, + std::move(source_offsets), + std::move(source_base_col), + 0, + cudf::create_null_mask(num_rows, mask_state::UNALLOCATED), + stream, + mr); + auto target = make_lists_column(num_rows, + std::move(target_offsets), + std::move(target_base_col), + 0, + cudf::create_null_mask(num_rows, mask_state::UNALLOCATED), + stream, + mr); + + auto scatter_map = make_fixed_width_column( + data_type{type_to_id()}, num_rows, mask_state::UNALLOCATED, stream, mr); + auto m_scatter_map = scatter_map->mutable_view(); + thrust::sequence(rmm::exec_policy(stream), + m_scatter_map.begin(), + m_scatter_map.end(), + num_rows - 1, + -1); + + if (not coalesce) { + thrust::default_random_engine g; + thrust::shuffle(rmm::exec_policy(stream), + m_scatter_map.begin(), + m_scatter_map.begin(), + g); + } + + for (auto _ : state) { + cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 + scatter(table_view{{*source}}, *scatter_map, table_view{{*target}}, false, mr); + } + + state.SetBytesProcessed(static_cast(state.iterations()) * state.range(0) * 2 * + sizeof(TypeParam)); +} + +#define SBM_BENCHMARK_DEFINE(name, type, coalesce) \ + BENCHMARK_DEFINE_F(ScatterLists, name)(::benchmark::State & state) \ + { \ + BM_lists_scatter(state); \ + } \ + BENCHMARK_REGISTER_F(ScatterLists, name) \ + ->RangeMultiplier(8) \ + ->Ranges({{1 << 10, 1 << 25}, {64, 2048}}) /* 1K-1B rows, 64-2048 elements */ \ + ->UseManualTime(); + +SBM_BENCHMARK_DEFINE(double_type_colesce_o, double, true); +SBM_BENCHMARK_DEFINE(double_type_colesce_x, double, false); + +} // namespace cudf diff --git a/cpp/include/cudf/lists/detail/scatter.cuh b/cpp/include/cudf/lists/detail/scatter.cuh index dac67545748..a440e456e25 100644 --- a/cpp/include/cudf/lists/detail/scatter.cuh +++ b/cpp/include/cudf/lists/detail/scatter.cuh @@ -19,131 +19,22 @@ #include #include #include -#include -#include -#include -#include +#include +#include #include #include #include -#include -#include #include #include #include -#include -#include -#include - #include namespace cudf { namespace lists { namespace detail { -namespace { - -/** - * @brief Holder for a list row's positional information, without - * also holding a reference to the list column. - * - * Analogous to the list_view, this class is default constructable, - * and can thus be stored in rmm::device_uvector. It is used to represent - * the results of a `scatter()` operation; a device_uvector may hold - * several instances of unbound_list_view, each with a flag indicating - * whether it came from the scatter source or target. Each instance - * may later be "bound" to the appropriate source/target column, to - * reconstruct the list_view. - */ -struct unbound_list_view { - /** - * @brief Flag type, indicating whether this list row originated from - * the source or target column, in `scatter()`. - */ - enum class label_type : bool { SOURCE, TARGET }; - - using lists_column_device_view = cudf::detail::lists_column_device_view; - using list_device_view = cudf::list_device_view; - - unbound_list_view() = default; - unbound_list_view(unbound_list_view const&) = default; - unbound_list_view(unbound_list_view&&) = default; - unbound_list_view& operator=(unbound_list_view const&) = default; - unbound_list_view& operator=(unbound_list_view&&) = default; - - /** - * @brief __device__ Constructor, for use from `scatter()`. - * - * @param scatter_source_label Whether the row came from source or target - * @param lists_column The actual source/target lists column - * @param row_index Index of the row in lists_column that this instance represents - */ - CUDA_DEVICE_CALLABLE unbound_list_view(label_type scatter_source_label, - cudf::detail::lists_column_device_view const& lists_column, - size_type const& row_index) - : _label{scatter_source_label}, _row_index{row_index} - { - _size = list_device_view{lists_column, row_index}.size(); - } - - /** - * @brief __device__ Constructor, for use when constructing the child column - * of a scattered list column - * - * @param scatter_source_label Whether the row came from source or target - * @param row_index Index of the row that this instance represents in the source/target column - * @param size The number of elements in this list row - */ - CUDA_DEVICE_CALLABLE unbound_list_view(label_type scatter_source_label, - size_type const& row_index, - size_type const& size) - : _label{scatter_source_label}, _row_index{row_index}, _size{size} - { - } - - /** - * @brief Returns number of elements in this list row. - */ - CUDA_DEVICE_CALLABLE size_type size() const { return _size; } - - /** - * @brief Returns whether this row came from the `scatter()` source or target - */ - CUDA_DEVICE_CALLABLE label_type label() const { return _label; } - - /** - * @brief Returns the index in the source/target column - */ - CUDA_DEVICE_CALLABLE size_type row_index() const { return _row_index; } - - /** - * @brief Binds to source/target column (depending on SOURCE/TARGET labels), - * to produce a bound list_view. - * - * @param scatter_source Source column for the scatter operation - * @param scatter_target Target column for the scatter operation - * @return A (bound) list_view for the row that this object represents - */ - CUDA_DEVICE_CALLABLE list_device_view - bind_to_column(lists_column_device_view const& scatter_source, - lists_column_device_view const& scatter_target) const - { - return list_device_view(_label == label_type::SOURCE ? scatter_source : scatter_target, - _row_index); - } - - private: - // Note: Cannot store reference to list column, because of storage in device_uvector. - // Only keep track of whether this list row came from the source or target of scatter. - - label_type _label{ - label_type::SOURCE}; // Whether this list row came from the scatter source or target. - size_type _row_index{}; // Row index in the Lists column. - size_type _size{}; // Number of elements in *this* list row. -}; - template rmm::device_uvector list_vector_from_column( unbound_list_view::label_type label, @@ -168,503 +59,6 @@ rmm::device_uvector list_vector_from_column( return vector; } -/** - * @brief Constructs null mask for a scattered list's child column - * - * @param parent_list_vector Vector of unbound_list_view, for parent lists column - * @param parent_list_offsets List column offsets for parent lists column - * @param source_lists Source lists column for scatter operation - * @param target_lists Target lists column for scatter operation - * @param num_child_rows Number of rows in child column - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate child column's null mask - * @return std::pair Child column's null mask and null row count - */ -std::pair construct_child_nullmask( - rmm::device_uvector const& parent_list_vector, - column_view const& parent_list_offsets, - cudf::detail::lists_column_device_view const& source_lists, - cudf::detail::lists_column_device_view const& target_lists, - size_type num_child_rows, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - auto is_valid_predicate = [d_list_vector = parent_list_vector.begin(), - d_offsets = parent_list_offsets.template data(), - d_offsets_size = parent_list_offsets.size(), - source_lists, - target_lists] __device__(auto const& i) { - auto list_start = - thrust::upper_bound(thrust::seq, d_offsets, d_offsets + d_offsets_size, i) - 1; - auto list_index = list_start - d_offsets; - auto element_index = i - *list_start; - - auto list_row = d_list_vector[list_index]; - return !list_row.bind_to_column(source_lists, target_lists).is_null(element_index); - }; - - return cudf::detail::valid_if(thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_child_rows), - is_valid_predicate, - stream, - mr); -} - -/** - * @brief (type_dispatch endpoint) Functor that constructs the child column result - * of `scatter()`ing a list column. - * - * The protocol is as follows: - * - * Inputs: - * 1. list_vector: A device_uvector of unbound_list_view, with each element - * indicating the position, size, and which column the list - * row came from. - * 2. list_offsets: The offsets column for the (outer) lists column, each offset - * marking the beginning of a list row. - * 3. source_list: The lists-column that is the source of the scatter(). - * 4. target_list: The lists-column that is the target of the scatter(). - * - * Output: A (possibly non-list) child column, which may be used in combination - * with list_offsets to fully construct the outer list. - * - * Example: - * - * Consider the following scatter operation of two `list` columns: - * - * 1. Source: [{9,9,9,9}, {8,8,8}], i.e. - * a. Child: [9,9,9,9,8,8,8] - * b. Offsets: [0, 4, 7] - * - * 2. Target: [{1,1}, {2,2}, {3,3}], i.e. - * a. Child: [1,1,2,2,3,3] - * b. Offsets: [0, 2, 4, 6] - * - * 3. Scatter-map: [2, 0] - * - * 4. Expected output: [{8,8,8}, {2,2}, {9,9,9,9}], i.e. - * a. Child: [8,8,8,2,2,9,9,9,9] <--- THIS - * b. Offsets: [0, 3, 5, 9] - * - * `list_child_constructor` constructs the Expected Child column indicated above. - * - * `list_child_constructor` expects to be called with the `Source`/`Target` - * lists columns, along with the following: - * - * 1. list_vector: [ S[1](3), T[1](2), S[0](4) ] - * Each unbound_list_view (e.g. S[1](3)) indicates: - * a. Which column the row is bound to: S == Source, T == Target - * b. The list index. E.g. S[1] indicates the 2nd list row of the Source column. - * c. The row size. E.g. S[1](3) indicates that the row has 3 elements. - * - * 2. list_offsets: [0, 3, 5, 9] - * The caller may construct this with an `inclusive_scan()` on `list_vector` - * element sizes. - */ -struct list_child_constructor { - private: - /** - * @brief Determine whether the child column type is supported with scattering lists. - * - * @tparam T The data type of the child column of the list being scattered. - */ - template - struct is_supported_child_type { - static const bool value = cudf::is_fixed_width() || std::is_same::value || - std::is_same::value || - std::is_same::value; - }; - - public: - // SFINAE catch-all, for unsupported child column types. - template - std::enable_if_t::value, std::unique_ptr> operator()( - Args&&... args) - { - CUDF_FAIL("list_child_constructor unsupported!"); - } - - /** - * @brief Implementation for fixed_width child column types. - */ - template - std::enable_if_t(), std::unique_ptr> operator()( - rmm::device_uvector const& list_vector, - cudf::column_view const& list_offsets, - cudf::lists_column_view const& source_lists_column_view, - cudf::lists_column_view const& target_lists_column_view, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const - { - auto source_column_device_view = - column_device_view::create(source_lists_column_view.parent(), stream); - auto target_column_device_view = - column_device_view::create(target_lists_column_view.parent(), stream); - auto source_lists = cudf::detail::lists_column_device_view(*source_column_device_view); - auto target_lists = cudf::detail::lists_column_device_view(*target_column_device_view); - - auto const num_child_rows{ - cudf::detail::get_value(list_offsets, list_offsets.size() - 1, stream)}; - - auto child_null_mask = - source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable() - ? construct_child_nullmask( - list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr) - : std::make_pair(rmm::device_buffer{}, 0); - - auto child_column = cudf::make_fixed_width_column(source_lists_column_view.child().type(), - num_child_rows, - std::move(child_null_mask.first), - child_null_mask.second, - stream, - mr); - - auto copy_child_values_for_list_index = - [d_scattered_lists = list_vector.begin(), // unbound_list_view* - d_child_column = child_column->mutable_view().data(), - d_offsets = list_offsets.template data(), - source_lists, - target_lists] __device__(auto const& row_index) { - auto const unbound_list_row = d_scattered_lists[row_index]; - auto const actual_list_row = unbound_list_row.bind_to_column(source_lists, target_lists); - auto const& bound_column = - (unbound_list_row.label() == unbound_list_view::label_type::SOURCE ? source_lists - : target_lists); - auto const list_begin_offset = - bound_column.offsets().template element(unbound_list_row.row_index()); - auto const list_end_offset = - bound_column.offsets().template element(unbound_list_row.row_index() + 1); - - // Copy all elements in this list row, to "appropriate" offset in child-column. - auto const destination_start_offset = d_offsets[row_index]; - thrust::for_each_n(thrust::seq, - thrust::make_counting_iterator(0), - actual_list_row.size(), - [actual_list_row, d_child_column, destination_start_offset] __device__( - auto const& list_element_index) { - d_child_column[destination_start_offset + list_element_index] = - actual_list_row.template element(list_element_index); - }); - }; - - // For each list-row, copy underlying elements to the child column. - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - list_vector.size(), - copy_child_values_for_list_index); - - return std::make_unique(child_column->view()); - } - - /** - * @brief Implementation for list child columns that contain strings. - */ - template - std::enable_if_t::value, std::unique_ptr> operator()( - rmm::device_uvector const& list_vector, - cudf::column_view const& list_offsets, - cudf::lists_column_view const& source_lists_column_view, - cudf::lists_column_view const& target_lists_column_view, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const - { - auto source_column_device_view = - column_device_view::create(source_lists_column_view.parent(), stream); - auto target_column_device_view = - column_device_view::create(target_lists_column_view.parent(), stream); - auto source_lists = cudf::detail::lists_column_device_view(*source_column_device_view); - auto target_lists = cudf::detail::lists_column_device_view(*target_column_device_view); - - auto const num_child_rows{ - cudf::detail::get_value(list_offsets, list_offsets.size() - 1, stream)}; - - if (num_child_rows == 0) { return make_empty_column(data_type{type_id::STRING}); } - - auto string_views = rmm::device_uvector(num_child_rows, stream); - - auto populate_string_views = [d_scattered_lists = list_vector.begin(), // unbound_list_view* - d_list_offsets = list_offsets.template data(), - d_string_views = string_views.data(), - source_lists, - target_lists] __device__(auto const& row_index) { - auto unbound_list_view = d_scattered_lists[row_index]; - auto actual_list_row = unbound_list_view.bind_to_column(source_lists, target_lists); - auto lists_column = actual_list_row.get_column(); - auto lists_offsets_column = lists_column.offsets(); - auto child_strings_column = lists_column.child(); - auto string_offsets_column = - child_strings_column.child(cudf::strings_column_view::offsets_column_index); - auto string_chars_column = - child_strings_column.child(cudf::strings_column_view::chars_column_index); - - auto output_start_offset = - d_list_offsets[row_index]; // Offset in `string_views` at which string_views are - // to be written for this list row_index. - auto input_list_start = - lists_offsets_column.template element(unbound_list_view.row_index()); - - thrust::for_each_n( - thrust::seq, - thrust::make_counting_iterator(0), - actual_list_row.size(), - [output_start_offset, - d_string_views, - input_list_start, - d_string_offsets = string_offsets_column.template data(), - d_string_chars = - string_chars_column.template data()] __device__(auto const& string_idx) { - auto string_start_idx = d_string_offsets[input_list_start + string_idx]; - auto string_end_idx = d_string_offsets[input_list_start + string_idx + 1]; - - d_string_views[output_start_offset + string_idx] = - string_view{d_string_chars + string_start_idx, string_end_idx - string_start_idx}; - }); - }; - - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - list_vector.size(), - populate_string_views); - - // string_views should now have been populated with source and target references. - - auto string_offsets = cudf::strings::detail::child_offsets_from_string_iterator( - string_views.begin(), string_views.size(), stream, mr); - - auto string_chars = cudf::strings::detail::child_chars_from_string_vector( - string_views, string_offsets->view(), stream, mr); - auto child_null_mask = - source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable() - ? construct_child_nullmask( - list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr) - : std::make_pair(rmm::device_buffer{}, 0); - - return cudf::make_strings_column(num_child_rows, - std::move(string_offsets), - std::move(string_chars), - child_null_mask.second, // Null count. - std::move(child_null_mask.first), // Null mask. - stream, - mr); - } - - /** - * @brief (Recursively) Constructs a child column that is itself a list column. - */ - template - std::enable_if_t::value, std::unique_ptr> operator()( - rmm::device_uvector const& list_vector, - cudf::column_view const& list_offsets, - cudf::lists_column_view const& source_lists_column_view, - cudf::lists_column_view const& target_lists_column_view, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const - { - auto source_column_device_view = - column_device_view::create(source_lists_column_view.parent(), stream); - auto target_column_device_view = - column_device_view::create(target_lists_column_view.parent(), stream); - auto source_lists = cudf::detail::lists_column_device_view(*source_column_device_view); - auto target_lists = cudf::detail::lists_column_device_view(*target_column_device_view); - - auto const num_child_rows{ - cudf::detail::get_value(list_offsets, list_offsets.size() - 1, stream)}; - - if (num_child_rows == 0) { - // make an empty lists column using the input child type - return empty_like(source_lists_column_view.child()); - } - - auto child_list_views = rmm::device_uvector(num_child_rows, stream, mr); - - // Function to convert from parent list_device_view instances to child list_device_views. - // For instance, if a parent list_device_view has 3 elements, it should have 3 corresponding - // child list_device_view instances. - auto populate_child_list_views = [d_scattered_lists = list_vector.begin(), - d_list_offsets = list_offsets.template data(), - d_child_list_views = child_list_views.begin(), - source_lists, - target_lists] __device__(auto const& row_index) { - auto scattered_row = d_scattered_lists[row_index]; - auto label = scattered_row.label(); - auto bound_list_row = scattered_row.bind_to_column(source_lists, target_lists); - auto lists_offsets_column = bound_list_row.get_column().offsets(); - - auto child_column = bound_list_row.get_column().child(); - auto child_offsets = child_column.child(cudf::lists_column_view::offsets_column_index); - - // For lists row at row_index, - // 1. Number of entries in child_list_views == bound_list_row.size(). - // 2. Offset of the first child list_view == d_list_offsets[row_index]. - auto output_start_offset = d_list_offsets[row_index]; - auto input_list_start = - lists_offsets_column.template element(scattered_row.row_index()); - - thrust::for_each_n( - thrust::seq, - thrust::make_counting_iterator(0), - bound_list_row.size(), - [input_list_start, - output_start_offset, - label, - d_child_list_views, - d_child_offsets = - child_offsets.template data()] __device__(auto const& child_list_index) { - auto child_start_idx = d_child_offsets[input_list_start + child_list_index]; - auto child_end_idx = d_child_offsets[input_list_start + child_list_index + 1]; - - d_child_list_views[output_start_offset + child_list_index] = unbound_list_view{ - label, input_list_start + child_list_index, child_end_idx - child_start_idx}; - }); - }; - - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - list_vector.size(), - populate_child_list_views); - - // child_list_views should now have been populated, with source and target references. - - auto begin = thrust::make_transform_iterator( - child_list_views.begin(), [] __device__(auto const& row) { return row.size(); }); - - auto child_offsets = cudf::strings::detail::make_offsets_child_column( - begin, begin + child_list_views.size(), stream, mr); - - auto child_column = cudf::type_dispatcher( - source_lists_column_view.child().child(1).type(), - list_child_constructor{}, - child_list_views, - child_offsets->view(), - cudf::lists_column_view(source_lists_column_view.child()), - cudf::lists_column_view(target_lists_column_view.child()), - stream, - mr); - - auto child_null_mask = - source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable() - ? construct_child_nullmask( - list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr) - : std::make_pair(rmm::device_buffer{}, 0); - - return cudf::make_lists_column(num_child_rows, - std::move(child_offsets), - std::move(child_column), - child_null_mask.second, // Null count - std::move(child_null_mask.first), // Null mask - stream, - mr); - } - - /** - * @brief (Recursively) constructs child columns that are structs. - */ - template - std::enable_if_t::value, std::unique_ptr> operator()( - rmm::device_uvector const& list_vector, - cudf::column_view const& list_offsets, - cudf::lists_column_view const& source_lists_column_view, - cudf::lists_column_view const& target_lists_column_view, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const - { - auto const source_column_device_view = - column_device_view::create(source_lists_column_view.parent(), stream); - auto const target_column_device_view = - column_device_view::create(target_lists_column_view.parent(), stream); - auto const source_lists = cudf::detail::lists_column_device_view(*source_column_device_view); - auto const target_lists = cudf::detail::lists_column_device_view(*target_column_device_view); - - auto const source_structs = source_lists_column_view.child(); - auto const target_structs = target_lists_column_view.child(); - - auto const num_child_rows{ - cudf::detail::get_value(list_offsets, list_offsets.size() - 1, stream)}; - - auto const num_struct_members = - std::distance(source_structs.child_begin(), source_structs.child_end()); - std::vector> child_columns; - child_columns.reserve(num_struct_members); - - auto project_member_as_list = [stream, mr](column_view const& structs_member, - cudf::size_type const& structs_list_num_rows, - column_view const& structs_list_offsets, - rmm::device_buffer const& structs_list_nullmask, - cudf::size_type const& structs_list_null_count) { - return cudf::make_lists_column(structs_list_num_rows, - std::make_unique(structs_list_offsets, stream, mr), - std::make_unique(structs_member, stream, mr), - structs_list_null_count, - rmm::device_buffer(structs_list_nullmask, stream), - stream, - mr); - }; - - auto const iter_source_member_as_list = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), [&](auto child_idx) { - return project_member_as_list( - source_structs.child(child_idx), - source_lists_column_view.size(), - source_lists_column_view.offsets(), - cudf::detail::copy_bitmask(source_lists_column_view.parent(), stream, mr), - source_lists_column_view.null_count()); - }); - - auto const iter_target_member_as_list = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), [&](auto child_idx) { - return project_member_as_list( - target_structs.child(child_idx), - target_lists_column_view.size(), - target_lists_column_view.offsets(), - cudf::detail::copy_bitmask(target_lists_column_view.parent(), stream, mr), - target_lists_column_view.null_count()); - }); - - std::transform( - iter_source_member_as_list, - iter_source_member_as_list + num_struct_members, - iter_target_member_as_list, - std::back_inserter(child_columns), - [&](auto source_struct_member_as_list, auto target_struct_member_as_list) { - return cudf::type_dispatcher( - source_struct_member_as_list->child(cudf::lists_column_view::child_column_index).type(), - list_child_constructor{}, - list_vector, - list_offsets, - cudf::lists_column_view(source_struct_member_as_list->view()), - cudf::lists_column_view(target_struct_member_as_list->view()), - stream, - mr); - }); - - auto child_null_mask = - source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable() - ? construct_child_nullmask( - list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr) - : std::make_pair(rmm::device_buffer{}, 0); - - return cudf::make_structs_column(num_child_rows, - std::move(child_columns), - child_null_mask.second, - std::move(child_null_mask.first), - stream, - mr); - } -}; - -/** - * @brief Checks that the specified columns have matching schemas, all the way down. - */ -void assert_same_data_type(column_view const& lhs, column_view const& rhs) -{ - CUDF_EXPECTS(lhs.type().id() == rhs.type().id(), "Mismatched Data types."); - // Empty string column has no children - CUDF_EXPECTS(lhs.type().id() == type_id::STRING or lhs.num_children() == rhs.num_children(), - "Mismatched number of child columns."); - - for (int i{0}; i < lhs.num_children(); ++i) { assert_same_data_type(lhs.child(i), rhs.child(i)); } -} - /** * @brief General implementation of scattering into list column * @@ -716,14 +110,13 @@ std::unique_ptr scatter_impl( auto offsets_column = cudf::strings::detail::make_offsets_child_column( list_size_begin, list_size_begin + target.size(), stream, mr); - auto child_column = cudf::type_dispatcher(child_column_type, - list_child_constructor{}, - target_vector, - offsets_column->view(), - source_lists_column_view, - target_lists_column_view, - stream, - mr); + auto child_column = build_lists_child_column_recursive(child_column_type, + target_vector, + offsets_column->view(), + source_lists_column_view, + target_lists_column_view, + stream, + mr); auto null_mask = target.has_nulls() ? copy_bitmask(target, stream, mr) : rmm::device_buffer{0, stream, mr}; @@ -737,8 +130,6 @@ std::unique_ptr scatter_impl( mr); } -} // namespace - /** * @brief Scatters lists into a copy of the target column * according to a scatter map. diff --git a/cpp/include/cudf/lists/detail/scatter_helper.cuh b/cpp/include/cudf/lists/detail/scatter_helper.cuh new file mode 100644 index 00000000000..76121bc35e9 --- /dev/null +++ b/cpp/include/cudf/lists/detail/scatter_helper.cuh @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include +#include + +#include + +namespace cudf { +namespace lists { +namespace detail { + +/** + * @brief Holder for a list row's positional information, without + * also holding a reference to the list column. + * + * Analogous to the list_view, this class is default constructable, + * and can thus be stored in rmm::device_uvector. It is used to represent + * the results of a `scatter()` operation; a device_uvector may hold + * several instances of unbound_list_view, each with a flag indicating + * whether it came from the scatter source or target. Each instance + * may later be "bound" to the appropriate source/target column, to + * reconstruct the list_view. + */ +struct unbound_list_view { + /** + * @brief Flag type, indicating whether this list row originated from + * the source or target column, in `scatter()`. + */ + enum class label_type : bool { SOURCE, TARGET }; + + using lists_column_device_view = cudf::detail::lists_column_device_view; + using list_device_view = cudf::list_device_view; + + unbound_list_view() = default; + unbound_list_view(unbound_list_view const&) = default; + unbound_list_view(unbound_list_view&&) = default; + unbound_list_view& operator=(unbound_list_view const&) = default; + unbound_list_view& operator=(unbound_list_view&&) = default; + + /** + * @brief __device__ Constructor, for use from `scatter()`. + * + * @param scatter_source_label Whether the row came from source or target + * @param lists_column The actual source/target lists column + * @param row_index Index of the row in lists_column that this instance represents + */ + CUDA_DEVICE_CALLABLE unbound_list_view(label_type scatter_source_label, + cudf::detail::lists_column_device_view const& lists_column, + size_type const& row_index) + : _label{scatter_source_label}, _row_index{row_index} + { + _size = list_device_view{lists_column, row_index}.size(); + } + + /** + * @brief __device__ Constructor, for use when constructing the child column + * of a scattered list column + * + * @param scatter_source_label Whether the row came from source or target + * @param row_index Index of the row that this instance represents in the source/target column + * @param size The number of elements in this list row + */ + CUDA_DEVICE_CALLABLE unbound_list_view(label_type scatter_source_label, + size_type const& row_index, + size_type const& size) + : _label{scatter_source_label}, _row_index{row_index}, _size{size} + { + } + + /** + * @brief Returns number of elements in this list row. + */ + CUDA_DEVICE_CALLABLE size_type size() const { return _size; } + + /** + * @brief Returns whether this row came from the `scatter()` source or target + */ + CUDA_DEVICE_CALLABLE label_type label() const { return _label; } + + /** + * @brief Returns the index in the source/target column + */ + CUDA_DEVICE_CALLABLE size_type row_index() const { return _row_index; } + + /** + * @brief Binds to source/target column (depending on SOURCE/TARGET labels), + * to produce a bound list_view. + * + * @param scatter_source Source column for the scatter operation + * @param scatter_target Target column for the scatter operation + * @return A (bound) list_view for the row that this object represents + */ + CUDA_DEVICE_CALLABLE list_device_view + bind_to_column(lists_column_device_view const& scatter_source, + lists_column_device_view const& scatter_target) const + { + return list_device_view(_label == label_type::SOURCE ? scatter_source : scatter_target, + _row_index); + } + + private: + // Note: Cannot store reference to list column, because of storage in device_uvector. + // Only keep track of whether this list row came from the source or target of scatter. + + label_type _label{ + label_type::SOURCE}; // Whether this list row came from the scatter source or target. + size_type _row_index{}; // Row index in the Lists column. + size_type _size{}; // Number of elements in *this* list row. +}; + +/** + * @brief Checks that the specified columns have matching schemas, all the way down. + */ +void assert_same_data_type(column_view const& lhs, column_view const& rhs); + +std::unique_ptr build_lists_child_column_recursive( + data_type child_column_type, + rmm::device_uvector const& list_vector, + cudf::column_view const& list_offsets, + cudf::lists_column_view const& source_lists_column_view, + cudf::lists_column_view const& target_lists_column_view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +} // namespace detail +} // namespace lists +} // namespace cudf diff --git a/cpp/src/lists/copying/scatter_helper.cu b/cpp/src/lists/copying/scatter_helper.cu new file mode 100644 index 00000000000..c57327569a4 --- /dev/null +++ b/cpp/src/lists/copying/scatter_helper.cu @@ -0,0 +1,511 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace cudf { +namespace lists { +namespace detail { + +void assert_same_data_type(column_view const& lhs, column_view const& rhs) +{ + CUDF_EXPECTS(lhs.type().id() == rhs.type().id(), "Mismatched Data types."); + // Empty string column has no children + CUDF_EXPECTS(lhs.type().id() == type_id::STRING or lhs.num_children() == rhs.num_children(), + "Mismatched number of child columns."); + + for (int i{0}; i < lhs.num_children(); ++i) { assert_same_data_type(lhs.child(i), rhs.child(i)); } +} + +/** + * @brief Constructs null mask for a scattered list's child column + * + * @param parent_list_vector Vector of unbound_list_view, for parent lists column + * @param parent_list_offsets List column offsets for parent lists column + * @param source_lists Source lists column for scatter operation + * @param target_lists Target lists column for scatter operation + * @param num_child_rows Number of rows in child column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate child column's null mask + * @return std::pair Child column's null mask and null row count + */ +std::pair construct_child_nullmask( + rmm::device_uvector const& parent_list_vector, + column_view const& parent_list_offsets, + cudf::detail::lists_column_device_view const& source_lists, + cudf::detail::lists_column_device_view const& target_lists, + size_type num_child_rows, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto is_valid_predicate = [d_list_vector = parent_list_vector.begin(), + d_offsets = parent_list_offsets.template data(), + d_offsets_size = parent_list_offsets.size(), + source_lists, + target_lists] __device__(auto const& i) { + auto list_start = + thrust::upper_bound(thrust::seq, d_offsets, d_offsets + d_offsets_size, i) - 1; + auto list_index = list_start - d_offsets; + auto element_index = i - *list_start; + + auto list_row = d_list_vector[list_index]; + return !list_row.bind_to_column(source_lists, target_lists).is_null(element_index); + }; + + return cudf::detail::valid_if(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_child_rows), + is_valid_predicate, + stream, + mr); +} + +/** + * @brief (type_dispatch endpoint) Functor that constructs the child column result + * of `scatter()`ing a list column. + * + * The protocol is as follows: + * + * Inputs: + * 1. list_vector: A device_uvector of unbound_list_view, with each element + * indicating the position, size, and which column the list + * row came from. + * 2. list_offsets: The offsets column for the (outer) lists column, each offset + * marking the beginning of a list row. + * 3. source_list: The lists-column that is the source of the scatter(). + * 4. target_list: The lists-column that is the target of the scatter(). + * + * Output: A (possibly non-list) child column, which may be used in combination + * with list_offsets to fully construct the outer list. + * + * Example: + * + * Consider the following scatter operation of two `list` columns: + * + * 1. Source: [{9,9,9,9}, {8,8,8}], i.e. + * a. Child: [9,9,9,9,8,8,8] + * b. Offsets: [0, 4, 7] + * + * 2. Target: [{1,1}, {2,2}, {3,3}], i.e. + * a. Child: [1,1,2,2,3,3] + * b. Offsets: [0, 2, 4, 6] + * + * 3. Scatter-map: [2, 0] + * + * 4. Expected output: [{8,8,8}, {2,2}, {9,9,9,9}], i.e. + * a. Child: [8,8,8,2,2,9,9,9,9] <--- THIS + * b. Offsets: [0, 3, 5, 9] + * + * `list_child_constructor` constructs the Expected Child column indicated above. + * + * `list_child_constructor` expects to be called with the `Source`/`Target` + * lists columns, along with the following: + * + * 1. list_vector: [ S[1](3), T[1](2), S[0](4) ] + * Each unbound_list_view (e.g. S[1](3)) indicates: + * a. Which column the row is bound to: S == Source, T == Target + * b. The list index. E.g. S[1] indicates the 2nd list row of the Source column. + * c. The row size. E.g. S[1](3) indicates that the row has 3 elements. + * + * 2. list_offsets: [0, 3, 5, 9] + * The caller may construct this with an `inclusive_scan()` on `list_vector` + * element sizes. + */ +struct list_child_constructor { + private: + /** + * @brief Determine whether the child column type is supported with scattering lists. + * + * @tparam T The data type of the child column of the list being scattered. + */ + template + struct is_supported_child_type { + static const bool value = cudf::is_fixed_width() || std::is_same::value || + std::is_same::value || + std::is_same::value; + }; + + public: + // SFINAE catch-all, for unsupported child column types. + template + std::enable_if_t::value, std::unique_ptr> operator()( + Args&&... args) + { + CUDF_FAIL("list_child_constructor unsupported!"); + } + + /** + * @brief Implementation for fixed_width child column types. + */ + template + std::enable_if_t(), std::unique_ptr> operator()( + rmm::device_uvector const& list_vector, + cudf::column_view const& list_offsets, + cudf::lists_column_view const& source_lists_column_view, + cudf::lists_column_view const& target_lists_column_view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const + { + auto source_column_device_view = + column_device_view::create(source_lists_column_view.parent(), stream); + auto target_column_device_view = + column_device_view::create(target_lists_column_view.parent(), stream); + auto source_lists = cudf::detail::lists_column_device_view(*source_column_device_view); + auto target_lists = cudf::detail::lists_column_device_view(*target_column_device_view); + + auto const num_child_rows{ + cudf::detail::get_value(list_offsets, list_offsets.size() - 1, stream)}; + + auto child_null_mask = + source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable() + ? construct_child_nullmask( + list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr) + : std::make_pair(rmm::device_buffer{}, 0); + + auto child_column = cudf::make_fixed_width_column(source_lists_column_view.child().type(), + num_child_rows, + std::move(child_null_mask.first), + child_null_mask.second, + stream, + mr); + + thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(child_column->size()), + child_column->mutable_view().begin(), + [offset_begin = list_offsets.begin(), + offset_size = list_offsets.size(), + d_list_vector = list_vector.begin(), + source_lists, + target_lists] __device__(auto index) { + auto const list_index_iter = + thrust::upper_bound(thrust::seq, offset_begin, offset_begin + offset_size, index); + auto const list_index = + static_cast(thrust::distance(offset_begin, list_index_iter) - 1); + auto const intra_index = static_cast(index - offset_begin[list_index]); + auto actual_list_row = d_list_vector[list_index].bind_to_column(source_lists, target_lists); + return actual_list_row.template element(intra_index); + }); + + return child_column; + } + + /** + * @brief Implementation for list child columns that contain strings. + */ + template + std::enable_if_t::value, std::unique_ptr> operator()( + rmm::device_uvector const& list_vector, + cudf::column_view const& list_offsets, + cudf::lists_column_view const& source_lists_column_view, + cudf::lists_column_view const& target_lists_column_view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const + { + auto source_column_device_view = + column_device_view::create(source_lists_column_view.parent(), stream); + auto target_column_device_view = + column_device_view::create(target_lists_column_view.parent(), stream); + auto source_lists = cudf::detail::lists_column_device_view(*source_column_device_view); + auto target_lists = cudf::detail::lists_column_device_view(*target_column_device_view); + + auto const num_child_rows{ + cudf::detail::get_value(list_offsets, list_offsets.size() - 1, stream)}; + + if (num_child_rows == 0) { return make_empty_column(data_type{type_id::STRING}); } + + auto string_views = rmm::device_uvector(num_child_rows, stream); + + thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(string_views.size()), + string_views.begin(), + [offset_begin = list_offsets.begin(), + offset_size = list_offsets.size(), + d_list_vector = list_vector.begin(), + source_lists, + target_lists] __device__(auto index) { + auto const list_index_iter = + thrust::upper_bound(thrust::seq, offset_begin, offset_begin + offset_size, index); + auto const list_index = + static_cast(thrust::distance(offset_begin, list_index_iter) - 1); + auto const intra_index = static_cast(index - offset_begin[list_index]); + auto row_index = d_list_vector[list_index].row_index(); + auto actual_list_row = d_list_vector[list_index].bind_to_column(source_lists, target_lists); + auto lists_column = actual_list_row.get_column(); + auto lists_offsets_ptr = lists_column.offsets().template data(); + auto child_strings_column = lists_column.child(); + auto string_offsets_ptr = + child_strings_column.child(cudf::strings_column_view::offsets_column_index) + .template data(); + auto string_chars_ptr = + child_strings_column.child(cudf::strings_column_view::chars_column_index) + .template data(); + + auto strings_offset = lists_offsets_ptr[row_index] + intra_index; + auto char_offset = string_offsets_ptr[strings_offset]; + auto char_ptr = string_chars_ptr + char_offset; + auto string_size = + string_offsets_ptr[strings_offset + 1] - string_offsets_ptr[strings_offset]; + return string_view{char_ptr, string_size}; + }); + + // string_views should now have been populated with source and target references. + + auto string_offsets = cudf::strings::detail::child_offsets_from_string_iterator( + string_views.begin(), string_views.size(), stream, mr); + + auto string_chars = cudf::strings::detail::child_chars_from_string_vector( + string_views, string_offsets->view(), stream, mr); + auto child_null_mask = + source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable() + ? construct_child_nullmask( + list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr) + : std::make_pair(rmm::device_buffer{}, 0); + + return cudf::make_strings_column(num_child_rows, + std::move(string_offsets), + std::move(string_chars), + child_null_mask.second, // Null count. + std::move(child_null_mask.first), // Null mask. + stream, + mr); + } + + /** + * @brief (Recursively) Constructs a child column that is itself a list column. + */ + template + std::enable_if_t::value, std::unique_ptr> operator()( + rmm::device_uvector const& list_vector, + cudf::column_view const& list_offsets, + cudf::lists_column_view const& source_lists_column_view, + cudf::lists_column_view const& target_lists_column_view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const + { + auto source_column_device_view = + column_device_view::create(source_lists_column_view.parent(), stream); + auto target_column_device_view = + column_device_view::create(target_lists_column_view.parent(), stream); + auto source_lists = cudf::detail::lists_column_device_view(*source_column_device_view); + auto target_lists = cudf::detail::lists_column_device_view(*target_column_device_view); + + auto const num_child_rows{ + cudf::detail::get_value(list_offsets, list_offsets.size() - 1, stream)}; + + if (num_child_rows == 0) { + // make an empty lists column using the input child type + return empty_like(source_lists_column_view.child()); + } + + auto child_list_views = rmm::device_uvector(num_child_rows, stream, mr); + + // Convert from parent list_device_view instances to child list_device_views. + // For instance, if a parent list_device_view has 3 elements, it should have 3 corresponding + // child list_device_view instances. + thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(child_list_views.size()), + child_list_views.begin(), + [offset_begin = list_offsets.begin(), + offset_size = list_offsets.size(), + d_list_vector = list_vector.begin(), + source_lists, + target_lists] __device__(auto index) { + auto const list_index_iter = + thrust::upper_bound(thrust::seq, offset_begin, offset_begin + offset_size, index); + auto const list_index = + static_cast(thrust::distance(offset_begin, list_index_iter) - 1); + auto const intra_index = static_cast(index - offset_begin[list_index]); + auto label = d_list_vector[list_index].label(); + auto row_index = d_list_vector[list_index].row_index(); + auto actual_list_row = d_list_vector[list_index].bind_to_column(source_lists, target_lists); + auto lists_column = actual_list_row.get_column(); + auto child_lists_column = lists_column.child(); + auto lists_offsets_ptr = lists_column.offsets().template data(); + auto child_lists_offsets_ptr = + child_lists_column.child(lists_column_view::offsets_column_index) + .template data(); + auto child_row_index = lists_offsets_ptr[row_index] + intra_index; + auto size = + child_lists_offsets_ptr[child_row_index + 1] - child_lists_offsets_ptr[child_row_index]; + return unbound_list_view{label, child_row_index, size}; + }); + + // child_list_views should now have been populated, with source and target references. + + auto begin = thrust::make_transform_iterator( + child_list_views.begin(), [] __device__(auto const& row) { return row.size(); }); + + auto child_offsets = cudf::strings::detail::make_offsets_child_column( + begin, begin + child_list_views.size(), stream, mr); + + auto child_column = cudf::type_dispatcher( + source_lists_column_view.child().child(1).type(), + list_child_constructor{}, + child_list_views, + child_offsets->view(), + cudf::lists_column_view(source_lists_column_view.child()), + cudf::lists_column_view(target_lists_column_view.child()), + stream, + mr); + + auto child_null_mask = + source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable() + ? construct_child_nullmask( + list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr) + : std::make_pair(rmm::device_buffer{}, 0); + + return cudf::make_lists_column(num_child_rows, + std::move(child_offsets), + std::move(child_column), + child_null_mask.second, // Null count + std::move(child_null_mask.first), // Null mask + stream, + mr); + } + + /** + * @brief (Recursively) constructs child columns that are structs. + */ + template + std::enable_if_t::value, std::unique_ptr> operator()( + rmm::device_uvector const& list_vector, + cudf::column_view const& list_offsets, + cudf::lists_column_view const& source_lists_column_view, + cudf::lists_column_view const& target_lists_column_view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const + { + auto const source_column_device_view = + column_device_view::create(source_lists_column_view.parent(), stream); + auto const target_column_device_view = + column_device_view::create(target_lists_column_view.parent(), stream); + auto const source_lists = cudf::detail::lists_column_device_view(*source_column_device_view); + auto const target_lists = cudf::detail::lists_column_device_view(*target_column_device_view); + + auto const source_structs = source_lists_column_view.child(); + auto const target_structs = target_lists_column_view.child(); + + auto const num_child_rows{ + cudf::detail::get_value(list_offsets, list_offsets.size() - 1, stream)}; + + auto const num_struct_members = + std::distance(source_structs.child_begin(), source_structs.child_end()); + std::vector> child_columns; + child_columns.reserve(num_struct_members); + + auto project_member_as_list_view = [](column_view const& structs_member, + cudf::size_type const& structs_list_num_rows, + column_view const& structs_list_offsets, + rmm::device_buffer const& structs_list_nullmask, + cudf::size_type const& structs_list_null_count) { + return lists_column_view( + column_view(data_type{type_id::LIST}, + structs_list_num_rows, + nullptr, + static_cast(structs_list_nullmask.data()), + structs_list_null_count, + 0, + {structs_list_offsets, structs_member})); + }; + + auto const iter_source_member_as_list = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), [&](auto child_idx) { + return project_member_as_list_view( + source_structs.child(child_idx), + source_lists_column_view.size(), + source_lists_column_view.offsets(), + cudf::detail::copy_bitmask(source_lists_column_view.parent(), stream, mr), + source_lists_column_view.null_count()); + }); + + auto const iter_target_member_as_list = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), [&](auto child_idx) { + return project_member_as_list_view( + target_structs.child(child_idx), + target_lists_column_view.size(), + target_lists_column_view.offsets(), + cudf::detail::copy_bitmask(target_lists_column_view.parent(), stream, mr), + target_lists_column_view.null_count()); + }); + + std::transform(iter_source_member_as_list, + iter_source_member_as_list + num_struct_members, + iter_target_member_as_list, + std::back_inserter(child_columns), + [&](auto source_struct_member_list_view, auto target_struct_member_list_view) { + return cudf::type_dispatcher( + source_struct_member_list_view.child().type(), + list_child_constructor{}, + list_vector, + list_offsets, + source_struct_member_list_view, + target_struct_member_list_view, + stream, + mr); + }); + + auto child_null_mask = + source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable() + ? construct_child_nullmask( + list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr) + : std::make_pair(rmm::device_buffer{}, 0); + + return cudf::make_structs_column(num_child_rows, + std::move(child_columns), + child_null_mask.second, + std::move(child_null_mask.first), + stream, + mr); + } +}; + +std::unique_ptr build_lists_child_column_recursive( + data_type child_column_type, + rmm::device_uvector const& list_vector, + cudf::column_view const& list_offsets, + cudf::lists_column_view const& source_lists_column_view, + cudf::lists_column_view const& target_lists_column_view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return cudf::type_dispatcher(child_column_type, + list_child_constructor{}, + list_vector, + list_offsets, + source_lists_column_view, + target_lists_column_view, + stream, + mr); +} + +} // namespace detail +} // namespace lists +} // namespace cudf