From 85500af9e0d797ff5ca7ab857e8edf2650138477 Mon Sep 17 00:00:00 2001 From: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Mon, 15 May 2023 09:02:36 -0700 Subject: [PATCH] Refactor `transform_lists_of_structs` in `row_operators.cu` (#13288) This split the functions `transform_lists_of_structs` in `row_operators.cu` into separate functions or simplified their implementation. From one function that can process both cases of having either one or two input columns, now we have two functions, each one processes one case. Closes https://github.com/rapidsai/cudf/issues/13287. Authors: - Nghia Truong (https://github.com/ttnghia) Approvers: - Divye Gala (https://github.com/divyegala) - Vyas Ramasubramani (https://github.com/vyasr) - Mike Wilson (https://github.com/hyperbolic2346) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/13288 --- cpp/src/table/row_operators.cu | 420 +++++++++++++++++++-------------- 1 file changed, 241 insertions(+), 179 deletions(-) diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 9f3a5bcdfea..770a7c775b4 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -393,6 +393,88 @@ namespace lexicographic { namespace { +/** + * @brief Replace child of the input lists column by a new child column. + * + * If the input is not sliced, just replace the input child by the new_child. + * Otherwise, we have to generate new offsets and replace both the offsets and the child of the + * input by the new ones. This is because the new child was generated by ranking and always + * has zero offset, so it cannot replace the input child if it is sliced. + * + * The new generated offsets column needs to be returned and kept alive. + * + * @param[in] input The input column_view of type LIST + * @param[in] new_child A new child column to replace the existing child of the input + * @param[out] out_cols An array to store the new generated offsets (if applicable) + * @param[in] stream CUDA stream used for device memory operations and kernel launches + * @param[in] mr Device memory resource used to allocate the returned column + * @return An output column_view with child replaced + */ +auto replace_child(column_view const& input, + column_view const& new_child, + std::vector>& out_cols, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto const make_output = [&input](auto const& offsets_cv, auto const& child_cv) { + return column_view{data_type{type_id::LIST}, + input.size(), + nullptr, + input.null_mask(), + input.null_count(), + 0, + {offsets_cv, child_cv}}; + }; + + if (input.offset() == 0) { + return make_output(input.child(lists_column_view::offsets_column_index), new_child); + } + + out_cols.emplace_back( + cudf::lists::detail::get_normalized_offsets(lists_column_view{input}, stream, mr)); + return make_output(out_cols.back()->view(), new_child); +} + +/** + * @brief Compute ranks of the input column. + * + * `Dense` rank type must be used for compute ranking of the input for later lexicographic + * comparison. + * + * To understand why, consider: `input = [ [{0, "a"}, {3, "c"}], [{0, "a"}, {2, "b"}] ]`. + * If first rank is used, `transformed_input = [ [0, 3], [1, 2] ]`. Comparing them will lead + * to the result row(0) < row(1) which is incorrect. + * With dense rank, `transformed_input = [ [0, 2], [0, 1] ]`, producing the correct output for + * lexicographic comparison. + * + * In addition, since the input column being ranked is always a nested child column instead of + * a top-level column, the column order for ranking should be fixed to the same value + * `order::ASCENDING` in all situations. + * For example, with the same input above, using column order as `order::ASCENDING` we will have + * `transformed_input = [ [0, 2], [0, 1] ]`. The output of sorting `transformed_input` will be + * exactly the same as sorting `input` regardless of the sorting order (ASC or DESC). + * + * @param input The input column to compute ranks + * @param column_null_order The flag indicating how nulls compare to non-null values + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column + * @return The output rank columns + */ +auto compute_ranks(column_view const& input, + null_order column_null_order, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return cudf::detail::rank(input, + rank_method::DENSE, + order::ASCENDING, + null_policy::EXCLUDE, + column_null_order, + false /*percentage*/, + stream, + mr); +} + /** * @brief Transform any nested lists-of-structs column into lists-of-integers column. * @@ -402,126 +484,128 @@ namespace { * If the input column is not lists-of-structs, or does not contain lists-of-structs at any nested * level, the input will be passed through without any changes. * + * @param input The input column to transform + * @param column_null_order The flag indicating how nulls compare to non-null values + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column(s) + * @return A pair consisting of new column_view representing the transformed input, along with + * an array containing its rank column(s) (of `size_type` type) and possibly new list + * offsets generated during the transformation process + */ +std::pair>> transform_lists_of_structs( + column_view const& input, + null_order column_null_order, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + std::vector> out_cols; + + if (input.type().id() == type_id::LIST) { + auto const child = cudf::lists_column_view{input}.get_sliced_child(stream); + + // Found a lists-of-structs column. + if (child.type().id() == type_id::STRUCT) { + out_cols.emplace_back(compute_ranks(child, column_null_order, stream, mr)); + return {replace_child(input, out_cols.back()->view(), out_cols, stream, mr), + std::move(out_cols)}; + } + // Found a lists-of-lists column. + else if (child.type().id() == type_id::LIST) { + // Recursively call transformation on the child column. + auto [new_child, out_cols_child] = + transform_lists_of_structs(child, column_null_order, stream, mr); + + // Only transform the current column if its child has been transformed. + if (out_cols_child.size() > 0) { + out_cols.insert(out_cols.end(), + std::make_move_iterator(out_cols_child.begin()), + std::make_move_iterator(out_cols_child.end())); + return {replace_child(input, new_child, out_cols, stream, mr), std::move(out_cols)}; + } + // else: child was not transformed so input is also not transformed. + } + // else: child is not STRUCT or LIST: no transformation. + } + // else: lhs.type().id() != type_id::LIST. + // In such situations, lhs.type().id() can still be type_id::STRUCT. However, any + // structs-of-lists should be decomposed into empty struct type `Struct<>` before being + // processed by this function so we do nothing here. + + // Passthrough: nothing changed. + return {input, std::move(out_cols)}; +} + +/** + * @brief Transform any nested lists-of-structs column into lists-of-integers column. + * + * For a lists-of-structs column at any nested level, its child structs column will be replaced by a + * `size_type` column computed as its ranks. In addition, equivalent child columns of both input + * columns (i.e., child columns at the same order, same nested level) will be combined and + * ranked together. + * + * If the input columns are not lists-of-structs, or do not contain lists-of-structs at any nested + * level, there will not be any changes. + * * @param lhs The input lhs column to transform - * @param rhs The input rhs column to transform (if available) + * @param rhs The input rhs column to transform * @param column_null_order The flag indicating how nulls compare to non-null values * @param stream CUDA stream used for device memory operations and kernel launches - * @return A tuple consisting of new column_view representing the transformed input, along with - * their ranks column(s) (of `size_type` type) and possibly new list offsets generated + * @param mr Device memory resource used to allocate the returned column(s) + * @return A tuple consisting of new column_view(s) representing the transformed input, along with + * their rank column(s) (of `size_type` type) and possibly new list offsets generated * during the transformation process */ std::tuple, + column_view, std::vector>, std::vector>> transform_lists_of_structs(column_view const& lhs, - std::optional const& rhs_opt, + column_view const& rhs, null_order column_null_order, - rmm::cuda_stream_view stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { - auto const default_mr = rmm::mr::get_current_device_resource(); - - // If the input is not sliced, just replace the input child by new_child. - // Otherwise, we have to generate new offsets and replace both offsets/child of the input by the - // new ones. This is because the new child here is generated by ranking and always has zero - // offset thus cannot replace the input child if it is sliced. - // The new offsets column needs to be returned and kept alive. - auto const replace_child = [&](column_view const& input, - column_view const& new_child, - std::vector>& out_cols) { - auto const make_output = [&input](auto const& offsets_cv, auto const& child_cv) { - return column_view{data_type{type_id::LIST}, - input.size(), - nullptr, - input.null_mask(), - input.null_count(), - 0, - {offsets_cv, child_cv}}; - }; - - if (input.offset() == 0) { - return make_output(input.child(lists_column_view::offsets_column_index), new_child); - } - - out_cols.emplace_back( - cudf::lists::detail::get_normalized_offsets(lists_column_view{input}, stream, default_mr)); - return make_output(out_cols.back()->view(), new_child); - }; - - // Dense ranks should be used instead of first rank. - // Consider this example: `input = [ [{0, "a"}, {3, "c"}], [{0, "a"}, {2, "b"}] ]`. - // If first rank is used, `transformed_input = [ [0, 3], [1, 2] ]`. Comparing them will lead - // to the result row(0) < row(1) which is incorrect. - // With dense rank, `transformed_input = [ [0, 2], [0, 1] ]`, producing correct comparison. - // - // In addition, since the ranked structs column(s) are nested child column instead of - // top-level column, the column order should be fixed to the same values in all situations. - // For example, with the same input above, using the fixed values for column order - // (`order::ASCENDING`), we have `transformed_input = [ [0, 2], [0, 1] ]`. Sorting of - // `transformed_input` will produce the same result as sorting `input` regardless of sorting - // order (ASC or DESC). - auto const compute_ranks = [&](auto const& input) { - return cudf::detail::rank(input, - rank_method::DENSE, - order::ASCENDING, - null_policy::EXCLUDE, - column_null_order, - false /*percentage*/, - stream, - default_mr); - }; - std::vector> out_cols_lhs; std::vector> out_cols_rhs; + auto const make_output = [&](auto const& new_child_lhs, auto const& new_child_rhs) { + return std::tuple{replace_child(lhs, new_child_lhs, out_cols_lhs, stream, mr), + replace_child(rhs, new_child_rhs, out_cols_rhs, stream, mr), + std::move(out_cols_lhs), + std::move(out_cols_rhs)}; + }; + if (lhs.type().id() == type_id::LIST) { auto const child_lhs = cudf::lists_column_view{lhs}.get_sliced_child(stream); + auto const child_rhs = cudf::lists_column_view{rhs}.get_sliced_child(stream); // Found a lists-of-structs column. if (child_lhs.type().id() == type_id::STRUCT) { - if (rhs_opt) { // rhs table is available - auto const child_rhs = cudf::lists_column_view{rhs_opt.value()}.get_sliced_child(stream); - auto const concatenated_children = cudf::detail::concatenate( - std::vector{child_lhs, child_rhs}, stream, default_mr); - - auto const ranks = compute_ranks(concatenated_children->view()); - auto const ranks_slices = cudf::detail::slice( - ranks->view(), - {0, child_lhs.size(), child_lhs.size(), child_lhs.size() + child_rhs.size()}, - stream); - - out_cols_lhs.emplace_back(std::make_unique(ranks_slices.front())); - out_cols_rhs.emplace_back(std::make_unique(ranks_slices.back())); - - auto transformed_lhs = replace_child(lhs, out_cols_lhs.back()->view(), out_cols_lhs); - auto transformed_rhs = - replace_child(rhs_opt.value(), out_cols_rhs.back()->view(), out_cols_rhs); - - return {std::move(transformed_lhs), - std::optional{std::move(transformed_rhs)}, - std::move(out_cols_lhs), - std::move(out_cols_rhs)}; - } else { // rhs table is not available - out_cols_lhs.emplace_back(compute_ranks(child_lhs)); - auto transformed_lhs = replace_child(lhs, out_cols_lhs.back()->view(), out_cols_lhs); - - return {std::move(transformed_lhs), - std::nullopt, - std::move(out_cols_lhs), - std::move(out_cols_rhs)}; - } + auto const concatenated_children = + cudf::detail::concatenate(std::vector{child_lhs, child_rhs}, + stream, + rmm::mr::get_current_device_resource()); + + auto const ranks = compute_ranks(concatenated_children->view(), + column_null_order, + stream, + rmm::mr::get_current_device_resource()); + auto const ranks_slices = cudf::detail::slice( + ranks->view(), + {0, child_lhs.size(), child_lhs.size(), child_lhs.size() + child_rhs.size()}, + stream); + + out_cols_lhs.emplace_back(std::make_unique(ranks_slices.front(), stream, mr)); + out_cols_rhs.emplace_back(std::make_unique(ranks_slices.back(), stream, mr)); + + return make_output(out_cols_lhs.back()->view(), out_cols_rhs.back()->view()); + } // Found a lists-of-lists column. else if (child_lhs.type().id() == type_id::LIST) { - auto const child_rhs_opt = - rhs_opt - ? std::optional{cudf::lists_column_view{rhs_opt.value()}.get_sliced_child( - stream)} - : std::nullopt; - // Recursively call transformation on the child column. - auto [new_child_lhs, new_child_rhs_opt, out_cols_child_lhs, out_cols_child_rhs] = - transform_lists_of_structs(child_lhs, child_rhs_opt, column_null_order, stream); + auto [new_child_lhs, new_child_rhs, out_cols_child_lhs, out_cols_child_rhs] = + transform_lists_of_structs(child_lhs, child_rhs, column_null_order, stream, mr); // Only transform the current pair of columns if their children have been transformed. if (out_cols_child_lhs.size() > 0 || out_cols_child_rhs.size() > 0) { @@ -532,21 +616,7 @@ transform_lists_of_structs(column_view const& lhs, std::make_move_iterator(out_cols_child_rhs.begin()), std::make_move_iterator(out_cols_child_rhs.end())); - auto transformed_lhs = replace_child(lhs, new_child_lhs, out_cols_lhs); - if (rhs_opt) { - auto transformed_rhs = - replace_child(rhs_opt.value(), new_child_rhs_opt.value(), out_cols_rhs); - - return {std::move(transformed_lhs), - std::optional{std::move(transformed_rhs)}, - std::move(out_cols_lhs), - std::move(out_cols_rhs)}; - } else { - return {std::move(transformed_lhs), - std::nullopt, - std::move(out_cols_lhs), - std::move(out_cols_rhs)}; - } + return make_output(new_child_lhs, new_child_rhs); } } // else: child is not STRUCT or LIST: just go to the end of this function, no transformation. @@ -557,67 +627,7 @@ transform_lists_of_structs(column_view const& lhs, // processed by this function so we do nothing here. // Passthrough: nothing changed. - return {lhs, rhs_opt, std::move(out_cols_lhs), std::move(out_cols_rhs)}; -} - -/** - * @brief Transform any nested lists-of-structs column in the given table(s) into lists-of-integers - * column. - * - * If the rhs table is specified, its shape should be pre-checked to match with the shape of lhs - * table using `check_shape_compatibility` before being passed into this function. - * - * @param lhs The input lhs table to transform - * @param rhs The input rhs table to transform (if available) - * @param null_precedence Optional, an array having the same length as the number of columns in - * the input tables that indicates how null values compare to all other. If it is empty, - * the order `null_order::BEFORE` will be used for all columns. - * @param stream CUDA stream used for device memory operations and kernel launches - * @return A tuple consisting of new table_view representing the transformed input, along with - * the ranks columns (of `size_type` type) and possibly new list offsets generated during - * the transformation process - */ -std::tuple, - std::vector>, - std::vector>> -transform_lists_of_structs(table_view const& lhs, - std::optional const& rhs, - host_span null_precedence, - rmm::cuda_stream_view stream) -{ - std::vector transformed_lhs_cvs; - std::vector transformed_rhs_cvs; - std::vector> out_cols_lhs; - std::vector> out_cols_rhs; - - for (size_type col_idx = 0; col_idx < lhs.num_columns(); ++col_idx) { - auto const& lhs_col = lhs.column(col_idx); - auto const rhs_col_opt = - rhs ? std::optional{rhs.value().column(col_idx)} : std::nullopt; - - auto [transformed_lhs, transformed_rhs_opt, curr_out_cols_lhs, curr_out_cols_rhs] = - transform_lists_of_structs( - lhs_col, - rhs_col_opt, - null_precedence.empty() ? null_order::BEFORE : null_precedence[col_idx], - stream); - - transformed_lhs_cvs.emplace_back(std::move(transformed_lhs)); - if (rhs) { transformed_rhs_cvs.emplace_back(std::move(transformed_rhs_opt.value())); } - - out_cols_lhs.insert(out_cols_lhs.end(), - std::make_move_iterator(curr_out_cols_lhs.begin()), - std::make_move_iterator(curr_out_cols_lhs.end())); - out_cols_rhs.insert(out_cols_rhs.end(), - std::make_move_iterator(curr_out_cols_rhs.begin()), - std::make_move_iterator(curr_out_cols_rhs.end())); - } - - return {table_view{transformed_lhs_cvs}, - rhs ? std::optional{table_view{transformed_rhs_cvs}} : std::nullopt, - std::move(out_cols_lhs), - std::move(out_cols_rhs)}; + return {lhs, rhs, std::move(out_cols_lhs), std::move(out_cols_rhs)}; } } // namespace @@ -672,9 +682,29 @@ std::shared_ptr preprocessed_table::create( auto [decomposed_input, new_column_order, new_null_precedence, verticalized_col_depths] = decompose_structs(input, decompose_lists_column::NO, column_order, null_precedence); - // Unused variables are generated for rhs table which is not available here. - [[maybe_unused]] auto [transformed_input, unused_0, transformed_columns, unused_1] = - transform_lists_of_structs(decomposed_input, std::nullopt, new_null_precedence, stream); + // Transform any (nested) lists-of-structs column into lists-of-integers column. + std::vector> transformed_columns; + auto const transformed_input = + [&, &decomposed_input = decomposed_input, &new_null_precedence = new_null_precedence] { + std::vector transformed_cvs; + + for (size_type col_idx = 0; col_idx < decomposed_input.num_columns(); ++col_idx) { + auto const& lhs_col = decomposed_input.column(col_idx); + + auto [transformed, curr_out_cols] = transform_lists_of_structs( + lhs_col, + null_precedence.empty() ? null_order::BEFORE : new_null_precedence[col_idx], + stream, + rmm::mr::get_current_device_resource()); + + transformed_cvs.emplace_back(std::move(transformed)); + transformed_columns.insert(transformed_columns.end(), + std::make_move_iterator(curr_out_cols.begin()), + std::make_move_iterator(curr_out_cols.end())); + } + + return table_view{transformed_cvs}; + }(); auto const has_ranked_children = !transformed_columns.empty(); return create(transformed_input, @@ -707,8 +737,40 @@ preprocessed_table::create(table_view const& lhs, decompose_structs(rhs, decompose_lists_column::NO, column_order, null_precedence); // Transform any (nested) lists-of-structs column into lists-of-integers column. - auto [transformed_lhs, transformed_rhs_opt, transformed_columns_lhs, transformed_columns_rhs] = - transform_lists_of_structs(decomposed_lhs, decomposed_rhs, new_null_precedence_lhs, stream); + std::vector> transformed_columns_lhs; + std::vector> transformed_columns_rhs; + auto const [transformed_lhs, + transformed_rhs] = [&, + &decomposed_lhs = decomposed_lhs, + &decomposed_rhs = decomposed_rhs, + &new_null_precedence_lhs = new_null_precedence_lhs] { + std::vector transformed_lhs_cvs; + std::vector transformed_rhs_cvs; + + for (size_type col_idx = 0; col_idx < decomposed_lhs.num_columns(); ++col_idx) { + auto const& lhs_col = decomposed_lhs.column(col_idx); + auto const& rhs_col = decomposed_rhs.column(col_idx); + + auto [transformed_lhs, transformed_rhs, curr_out_cols_lhs, curr_out_cols_rhs] = + transform_lists_of_structs( + lhs_col, + rhs_col, + null_precedence.empty() ? null_order::BEFORE : null_precedence[col_idx], + stream, + rmm::mr::get_current_device_resource()); + + transformed_lhs_cvs.emplace_back(std::move(transformed_lhs)); + transformed_rhs_cvs.emplace_back(std::move(transformed_rhs)); + transformed_columns_lhs.insert(transformed_columns_lhs.end(), + std::make_move_iterator(curr_out_cols_lhs.begin()), + std::make_move_iterator(curr_out_cols_lhs.end())); + transformed_columns_rhs.insert(transformed_columns_rhs.end(), + std::make_move_iterator(curr_out_cols_rhs.begin()), + std::make_move_iterator(curr_out_cols_rhs.end())); + } + + return std::pair{table_view{transformed_lhs_cvs}, table_view{transformed_rhs_cvs}}; + }(); // This should be the same for both lhs and rhs but not all the time, such as when one table // has 0 rows while the other has >0 rows. So we check separately for each of them. @@ -722,7 +784,7 @@ preprocessed_table::create(table_view const& lhs, new_null_precedence_lhs, has_ranked_children_lhs, stream), - create(transformed_rhs_opt.value(), + create(transformed_rhs, std::move(verticalized_col_depths_rhs), std::move(transformed_columns_rhs), new_column_order_lhs,