From 550c9c693e4d9cdf2a7a69f18ef2ff545a7af047 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 3 May 2023 19:45:49 -0700 Subject: [PATCH 1/8] Complete refactoring --- cpp/src/table/row_operators.cu | 347 ++++++++++++++++++++------------- 1 file changed, 213 insertions(+), 134 deletions(-) diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 9f3a5bcdfea..550e996e9a4 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -393,6 +393,83 @@ namespace lexicographic { namespace { +/** + * @brief Replace child of the input lists colum by a new child column. + * + * If the input is not sliced, just replace the input child by the new_child. + * Otherwise, we have to generate new offsets and replace both offsets/child of the input by the + * new ones. This is because the new child was generated by ranking and always has zero + * offset thus cannot replace the input child if it is sliced. + * + * The new generated offsets column needs to be returned and kept alive. + * + * @param[in] input The input column_view of type LIST + * @param[in] new_child A new child column to replace the existing child of the input + * @param[out] out_cols An array to store the new generated offsets (if applicable) + * @param[in] stream CUDA stream used for device memory operations and kernel launches + * @return An output column_view with child replaced + */ +auto replace_child(column_view const& input, + column_view const& new_child, + std::vector>& out_cols, + rmm::cuda_stream_view stream) +{ + auto const make_output = [&input](auto const& offsets_cv, auto const& child_cv) { + return column_view{data_type{type_id::LIST}, + input.size(), + nullptr, + input.null_mask(), + input.null_count(), + 0, + {offsets_cv, child_cv}}; + }; + + if (input.offset() == 0) { + return make_output(input.child(lists_column_view::offsets_column_index), new_child); + } + + out_cols.emplace_back(cudf::lists::detail::get_normalized_offsets( + lists_column_view{input}, stream, rmm::mr::get_current_device_resource())); + return make_output(out_cols.back()->view(), new_child); +} + +/** + * @brief Compute ranks of the input column. + * + * Dense rank type is used for compute ranking of the input for later lexicographic comparison. + * + * Consider this example: `input = [ [{0, "a"}, {3, "c"}], [{0, "a"}, {2, "b"}] ]`. + * If first rank is used, `transformed_input = [ [0, 3], [1, 2] ]`. Comparing them will lead + * to the result row(0) < row(1) which is incorrect. + * With dense rank, `transformed_input = [ [0, 2], [0, 1] ]`, producing the correct output for + * lexicographic comparison. + * + * In addition, since the being ranked input column is always a nested child column instead of + * top-level column, the column order for ranking should be fixed to the same value + * `order::ASCENDING` in all situations. + * For example, with the same input above, using column order as `order::ASCENDING` we will have + * `transformed_input = [ [0, 2], [0, 1] ]`. The output of sorting `transformed_input` will be + * exactly the same as sorting `input` regardless of the sorting order (ASC or DESC). + * + * @param input The input column to compute ranks + * @param column_null_order The flag indicating how nulls compare to non-null values + * @param stream CUDA stream used for device memory operations and kernel launches + * @return The output rank columns + */ +auto compute_ranks(column_view const& input, + null_order column_null_order, + rmm::cuda_stream_view stream) +{ + return cudf::detail::rank(input, + rank_method::DENSE, + order::ASCENDING, + null_policy::EXCLUDE, + column_null_order, + false /*percentage*/, + stream, + rmm::mr::get_current_device_resource()); +} + /** * @brief Transform any nested lists-of-structs column into lists-of-integers column. * @@ -402,126 +479,113 @@ namespace { * If the input column is not lists-of-structs, or does not contain lists-of-structs at any nested * level, the input will be passed through without any changes. * - * @param lhs The input lhs column to transform - * @param rhs The input rhs column to transform (if available) + * @param input The input column to transform * @param column_null_order The flag indicating how nulls compare to non-null values * @param stream CUDA stream used for device memory operations and kernel launches - * @return A tuple consisting of new column_view representing the transformed input, along with - * their ranks column(s) (of `size_type` type) and possibly new list offsets generated + * @return A pair consisting of new column_view representing the transformed input, along with + * an array containing its rank column(s) (of `size_type` type) and possibly new list + * offsets generated during the transformation process + */ +std::pair>> transform_lists_of_structs( + column_view const& input, null_order column_null_order, rmm::cuda_stream_view stream) +{ + std::vector> out_cols; + + if (input.type().id() == type_id::LIST) { + auto const child = cudf::lists_column_view{input}.get_sliced_child(stream); + + // Found a lists-of-structs column. + if (child.type().id() == type_id::STRUCT) { + out_cols.emplace_back(compute_ranks(child, column_null_order, stream)); + auto transformed = replace_child(input, out_cols.back()->view(), out_cols, stream); + return {std::move(transformed), std::move(out_cols)}; + } + // Found a lists-of-lists column. + else if (child.type().id() == type_id::LIST) { + // Recursively call transformation on the child column. + auto [new_child, out_cols_child] = + transform_lists_of_structs(child, column_null_order, stream); + + // Only transform the current column if its child has been transformed. + if (out_cols_child.size() > 0) { + out_cols.insert(out_cols.end(), + std::make_move_iterator(out_cols_child.begin()), + std::make_move_iterator(out_cols_child.end())); + auto transformed = replace_child(input, new_child, out_cols, stream); + return {std::move(transformed), std::move(out_cols)}; + } + // else: child was not transformed so input is also not transformed. + } + // else: child is not STRUCT or LIST: no transformation. + } + // else: lhs.type().id() != type_id::LIST. + // In such situations, lhs.type().id() can still be type_id::STRUCT. However, any + // structs-of-lists should be decomposed into empty struct type `Struct<>` before being + // processed by this function so we do nothing here. + + // Passthrough: nothing changed. + return {input, std::move(out_cols)}; +} + +/** + * @copydoc transform_lists_of_structs(column_view const&, null_order, rmm::cuda_stream_view) + * + * @param lhs The input lhs column to transform + * @param rhs The input rhs column to transform + * @return A tuple consisting of new column_view(s) representing the transformed input, along with + * their rank column(s) (of `size_type` type) and possibly new list offsets generated * during the transformation process */ std::tuple, + column_view, std::vector>, std::vector>> transform_lists_of_structs(column_view const& lhs, - std::optional const& rhs_opt, + column_view const& rhs, null_order column_null_order, rmm::cuda_stream_view stream) { - auto const default_mr = rmm::mr::get_current_device_resource(); - - // If the input is not sliced, just replace the input child by new_child. - // Otherwise, we have to generate new offsets and replace both offsets/child of the input by the - // new ones. This is because the new child here is generated by ranking and always has zero - // offset thus cannot replace the input child if it is sliced. - // The new offsets column needs to be returned and kept alive. - auto const replace_child = [&](column_view const& input, - column_view const& new_child, - std::vector>& out_cols) { - auto const make_output = [&input](auto const& offsets_cv, auto const& child_cv) { - return column_view{data_type{type_id::LIST}, - input.size(), - nullptr, - input.null_mask(), - input.null_count(), - 0, - {offsets_cv, child_cv}}; - }; - - if (input.offset() == 0) { - return make_output(input.child(lists_column_view::offsets_column_index), new_child); - } + std::vector> out_cols_lhs; + std::vector> out_cols_rhs; - out_cols.emplace_back( - cudf::lists::detail::get_normalized_offsets(lists_column_view{input}, stream, default_mr)); - return make_output(out_cols.back()->view(), new_child); - }; + auto const make_output = [&](auto const& new_child_lhs, auto const& new_child_rhs) { + auto transformed_lhs = replace_child(lhs, new_child_lhs, out_cols_lhs, stream); + auto transformed_rhs = replace_child(rhs, new_child_rhs, out_cols_rhs, stream); - // Dense ranks should be used instead of first rank. - // Consider this example: `input = [ [{0, "a"}, {3, "c"}], [{0, "a"}, {2, "b"}] ]`. - // If first rank is used, `transformed_input = [ [0, 3], [1, 2] ]`. Comparing them will lead - // to the result row(0) < row(1) which is incorrect. - // With dense rank, `transformed_input = [ [0, 2], [0, 1] ]`, producing correct comparison. - // - // In addition, since the ranked structs column(s) are nested child column instead of - // top-level column, the column order should be fixed to the same values in all situations. - // For example, with the same input above, using the fixed values for column order - // (`order::ASCENDING`), we have `transformed_input = [ [0, 2], [0, 1] ]`. Sorting of - // `transformed_input` will produce the same result as sorting `input` regardless of sorting - // order (ASC or DESC). - auto const compute_ranks = [&](auto const& input) { - return cudf::detail::rank(input, - rank_method::DENSE, - order::ASCENDING, - null_policy::EXCLUDE, - column_null_order, - false /*percentage*/, - stream, - default_mr); + return std::tuple{std::move(transformed_lhs), + std::move(transformed_rhs), + std::move(out_cols_lhs), + std::move(out_cols_rhs)}; }; - std::vector> out_cols_lhs; - std::vector> out_cols_rhs; - if (lhs.type().id() == type_id::LIST) { auto const child_lhs = cudf::lists_column_view{lhs}.get_sliced_child(stream); + auto const child_rhs = cudf::lists_column_view{rhs}.get_sliced_child(stream); // Found a lists-of-structs column. if (child_lhs.type().id() == type_id::STRUCT) { - if (rhs_opt) { // rhs table is available - auto const child_rhs = cudf::lists_column_view{rhs_opt.value()}.get_sliced_child(stream); - auto const concatenated_children = cudf::detail::concatenate( - std::vector{child_lhs, child_rhs}, stream, default_mr); - - auto const ranks = compute_ranks(concatenated_children->view()); - auto const ranks_slices = cudf::detail::slice( - ranks->view(), - {0, child_lhs.size(), child_lhs.size(), child_lhs.size() + child_rhs.size()}, - stream); - - out_cols_lhs.emplace_back(std::make_unique(ranks_slices.front())); - out_cols_rhs.emplace_back(std::make_unique(ranks_slices.back())); - - auto transformed_lhs = replace_child(lhs, out_cols_lhs.back()->view(), out_cols_lhs); - auto transformed_rhs = - replace_child(rhs_opt.value(), out_cols_rhs.back()->view(), out_cols_rhs); - - return {std::move(transformed_lhs), - std::optional{std::move(transformed_rhs)}, - std::move(out_cols_lhs), - std::move(out_cols_rhs)}; - } else { // rhs table is not available - out_cols_lhs.emplace_back(compute_ranks(child_lhs)); - auto transformed_lhs = replace_child(lhs, out_cols_lhs.back()->view(), out_cols_lhs); - - return {std::move(transformed_lhs), - std::nullopt, - std::move(out_cols_lhs), - std::move(out_cols_rhs)}; - } + auto const concatenated_children = + cudf::detail::concatenate(std::vector{child_lhs, child_rhs}, + stream, + rmm::mr::get_current_device_resource()); + + auto const ranks = compute_ranks(concatenated_children->view(), column_null_order, stream); + auto const ranks_slices = cudf::detail::slice( + ranks->view(), + {0, child_lhs.size(), child_lhs.size(), child_lhs.size() + child_rhs.size()}, + stream); + + out_cols_lhs.emplace_back(std::make_unique(ranks_slices.front())); + out_cols_rhs.emplace_back(std::make_unique(ranks_slices.back())); + + return make_output(out_cols_lhs.back()->view(), out_cols_rhs.back()->view()); + } // Found a lists-of-lists column. else if (child_lhs.type().id() == type_id::LIST) { - auto const child_rhs_opt = - rhs_opt - ? std::optional{cudf::lists_column_view{rhs_opt.value()}.get_sliced_child( - stream)} - : std::nullopt; - // Recursively call transformation on the child column. - auto [new_child_lhs, new_child_rhs_opt, out_cols_child_lhs, out_cols_child_rhs] = - transform_lists_of_structs(child_lhs, child_rhs_opt, column_null_order, stream); + auto [new_child_lhs, new_child_rhs, out_cols_child_lhs, out_cols_child_rhs] = + transform_lists_of_structs(child_lhs, child_rhs, column_null_order, stream); // Only transform the current pair of columns if their children have been transformed. if (out_cols_child_lhs.size() > 0 || out_cols_child_rhs.size() > 0) { @@ -532,21 +596,7 @@ transform_lists_of_structs(column_view const& lhs, std::make_move_iterator(out_cols_child_rhs.begin()), std::make_move_iterator(out_cols_child_rhs.end())); - auto transformed_lhs = replace_child(lhs, new_child_lhs, out_cols_lhs); - if (rhs_opt) { - auto transformed_rhs = - replace_child(rhs_opt.value(), new_child_rhs_opt.value(), out_cols_rhs); - - return {std::move(transformed_lhs), - std::optional{std::move(transformed_rhs)}, - std::move(out_cols_lhs), - std::move(out_cols_rhs)}; - } else { - return {std::move(transformed_lhs), - std::nullopt, - std::move(out_cols_lhs), - std::move(out_cols_rhs)}; - } + return make_output(new_child_lhs, new_child_rhs); } } // else: child is not STRUCT or LIST: just go to the end of this function, no transformation. @@ -557,32 +607,64 @@ transform_lists_of_structs(column_view const& lhs, // processed by this function so we do nothing here. // Passthrough: nothing changed. - return {lhs, rhs_opt, std::move(out_cols_lhs), std::move(out_cols_rhs)}; + return {lhs, rhs, std::move(out_cols_lhs), std::move(out_cols_rhs)}; } /** - * @brief Transform any nested lists-of-structs column in the given table(s) into lists-of-integers + * @brief Transform any nested lists-of-structs column in the given table into lists-of-integers * column. * - * If the rhs table is specified, its shape should be pre-checked to match with the shape of lhs - * table using `check_shape_compatibility` before being passed into this function. - * - * @param lhs The input lhs table to transform - * @param rhs The input rhs table to transform (if available) + * @param input The input table to transform * @param null_precedence Optional, an array having the same length as the number of columns in * the input tables that indicates how null values compare to all other. If it is empty, * the order `null_order::BEFORE` will be used for all columns. * @param stream CUDA stream used for device memory operations and kernel launches - * @return A tuple consisting of new table_view representing the transformed input, along with - * the ranks columns (of `size_type` type) and possibly new list offsets generated during + * @return A pair consisting of a new table_view representing the transformed input, along with + * the rank columns (of `size_type` type) and possibly new list offsets generated during + * the transformation process + */ +std::pair>> transform_lists_of_structs( + table_view const& input, + host_span null_precedence, + rmm::cuda_stream_view stream) +{ + std::vector transformed_cvs; + std::vector> out_cols; + + for (size_type col_idx = 0; col_idx < input.num_columns(); ++col_idx) { + auto const& lhs_col = input.column(col_idx); + + auto [transformed, curr_out_cols] = transform_lists_of_structs( + lhs_col, null_precedence.empty() ? null_order::BEFORE : null_precedence[col_idx], stream); + + transformed_cvs.emplace_back(std::move(transformed)); + out_cols.insert(out_cols.end(), + std::make_move_iterator(curr_out_cols.begin()), + std::make_move_iterator(curr_out_cols.end())); + } + + return {table_view{transformed_cvs}, std::move(out_cols)}; +} + +/** + * @copydoc transform_lists_of_structs(table_view const&, host_span, + * rmm::cuda_stream_view) + * + * The input tables should be pre-checked to match their shape with each other using + * `check_shape_compatibility` before being passed into this function. + * + * @param lhs The input lhs table to transform + * @param rhs The input rhs table to transform + * @return A tuple consisting of new table_view(s) representing the transformed input, along with + * the rank columns (of `size_type` type) and possibly new list offsets generated during * the transformation process */ std::tuple, + table_view, std::vector>, std::vector>> transform_lists_of_structs(table_view const& lhs, - std::optional const& rhs, + table_view const& rhs, host_span null_precedence, rmm::cuda_stream_view stream) { @@ -593,19 +675,17 @@ transform_lists_of_structs(table_view const& lhs, for (size_type col_idx = 0; col_idx < lhs.num_columns(); ++col_idx) { auto const& lhs_col = lhs.column(col_idx); - auto const rhs_col_opt = - rhs ? std::optional{rhs.value().column(col_idx)} : std::nullopt; + auto const& rhs_col = rhs.column(col_idx); - auto [transformed_lhs, transformed_rhs_opt, curr_out_cols_lhs, curr_out_cols_rhs] = + auto [transformed_lhs, transformed_rhs, curr_out_cols_lhs, curr_out_cols_rhs] = transform_lists_of_structs( lhs_col, - rhs_col_opt, + rhs_col, null_precedence.empty() ? null_order::BEFORE : null_precedence[col_idx], stream); transformed_lhs_cvs.emplace_back(std::move(transformed_lhs)); - if (rhs) { transformed_rhs_cvs.emplace_back(std::move(transformed_rhs_opt.value())); } - + transformed_rhs_cvs.emplace_back(std::move(transformed_rhs)); out_cols_lhs.insert(out_cols_lhs.end(), std::make_move_iterator(curr_out_cols_lhs.begin()), std::make_move_iterator(curr_out_cols_lhs.end())); @@ -615,7 +695,7 @@ transform_lists_of_structs(table_view const& lhs, } return {table_view{transformed_lhs_cvs}, - rhs ? std::optional{table_view{transformed_rhs_cvs}} : std::nullopt, + table_view{transformed_rhs_cvs}, std::move(out_cols_lhs), std::move(out_cols_rhs)}; } @@ -672,9 +752,8 @@ std::shared_ptr preprocessed_table::create( auto [decomposed_input, new_column_order, new_null_precedence, verticalized_col_depths] = decompose_structs(input, decompose_lists_column::NO, column_order, null_precedence); - // Unused variables are generated for rhs table which is not available here. - [[maybe_unused]] auto [transformed_input, unused_0, transformed_columns, unused_1] = - transform_lists_of_structs(decomposed_input, std::nullopt, new_null_precedence, stream); + auto [transformed_input, transformed_columns] = + transform_lists_of_structs(decomposed_input, new_null_precedence, stream); auto const has_ranked_children = !transformed_columns.empty(); return create(transformed_input, @@ -707,7 +786,7 @@ preprocessed_table::create(table_view const& lhs, decompose_structs(rhs, decompose_lists_column::NO, column_order, null_precedence); // Transform any (nested) lists-of-structs column into lists-of-integers column. - auto [transformed_lhs, transformed_rhs_opt, transformed_columns_lhs, transformed_columns_rhs] = + auto [transformed_lhs, transformed_rhs, transformed_columns_lhs, transformed_columns_rhs] = transform_lists_of_structs(decomposed_lhs, decomposed_rhs, new_null_precedence_lhs, stream); // This should be the same for both lhs and rhs but not all the time, such as when one table @@ -722,7 +801,7 @@ preprocessed_table::create(table_view const& lhs, new_null_precedence_lhs, has_ranked_children_lhs, stream), - create(transformed_rhs_opt.value(), + create(transformed_rhs, std::move(verticalized_col_depths_rhs), std::move(transformed_columns_rhs), new_column_order_lhs, From 49d0e5966ea6983c6168a7ad77cb3d4969afdd20 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 3 May 2023 20:10:03 -0700 Subject: [PATCH 2/8] Remove functions and embed their code --- cpp/src/table/row_operators.cu | 149 ++++++++++++--------------------- 1 file changed, 55 insertions(+), 94 deletions(-) diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 550e996e9a4..3717aa56d2f 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -610,96 +610,6 @@ transform_lists_of_structs(column_view const& lhs, return {lhs, rhs, std::move(out_cols_lhs), std::move(out_cols_rhs)}; } -/** - * @brief Transform any nested lists-of-structs column in the given table into lists-of-integers - * column. - * - * @param input The input table to transform - * @param null_precedence Optional, an array having the same length as the number of columns in - * the input tables that indicates how null values compare to all other. If it is empty, - * the order `null_order::BEFORE` will be used for all columns. - * @param stream CUDA stream used for device memory operations and kernel launches - * @return A pair consisting of a new table_view representing the transformed input, along with - * the rank columns (of `size_type` type) and possibly new list offsets generated during - * the transformation process - */ -std::pair>> transform_lists_of_structs( - table_view const& input, - host_span null_precedence, - rmm::cuda_stream_view stream) -{ - std::vector transformed_cvs; - std::vector> out_cols; - - for (size_type col_idx = 0; col_idx < input.num_columns(); ++col_idx) { - auto const& lhs_col = input.column(col_idx); - - auto [transformed, curr_out_cols] = transform_lists_of_structs( - lhs_col, null_precedence.empty() ? null_order::BEFORE : null_precedence[col_idx], stream); - - transformed_cvs.emplace_back(std::move(transformed)); - out_cols.insert(out_cols.end(), - std::make_move_iterator(curr_out_cols.begin()), - std::make_move_iterator(curr_out_cols.end())); - } - - return {table_view{transformed_cvs}, std::move(out_cols)}; -} - -/** - * @copydoc transform_lists_of_structs(table_view const&, host_span, - * rmm::cuda_stream_view) - * - * The input tables should be pre-checked to match their shape with each other using - * `check_shape_compatibility` before being passed into this function. - * - * @param lhs The input lhs table to transform - * @param rhs The input rhs table to transform - * @return A tuple consisting of new table_view(s) representing the transformed input, along with - * the rank columns (of `size_type` type) and possibly new list offsets generated during - * the transformation process - */ -std::tuple>, - std::vector>> -transform_lists_of_structs(table_view const& lhs, - table_view const& rhs, - host_span null_precedence, - rmm::cuda_stream_view stream) -{ - std::vector transformed_lhs_cvs; - std::vector transformed_rhs_cvs; - std::vector> out_cols_lhs; - std::vector> out_cols_rhs; - - for (size_type col_idx = 0; col_idx < lhs.num_columns(); ++col_idx) { - auto const& lhs_col = lhs.column(col_idx); - auto const& rhs_col = rhs.column(col_idx); - - auto [transformed_lhs, transformed_rhs, curr_out_cols_lhs, curr_out_cols_rhs] = - transform_lists_of_structs( - lhs_col, - rhs_col, - null_precedence.empty() ? null_order::BEFORE : null_precedence[col_idx], - stream); - - transformed_lhs_cvs.emplace_back(std::move(transformed_lhs)); - transformed_rhs_cvs.emplace_back(std::move(transformed_rhs)); - out_cols_lhs.insert(out_cols_lhs.end(), - std::make_move_iterator(curr_out_cols_lhs.begin()), - std::make_move_iterator(curr_out_cols_lhs.end())); - out_cols_rhs.insert(out_cols_rhs.end(), - std::make_move_iterator(curr_out_cols_rhs.begin()), - std::make_move_iterator(curr_out_cols_rhs.end())); - } - - return {table_view{transformed_lhs_cvs}, - table_view{transformed_rhs_cvs}, - std::move(out_cols_lhs), - std::move(out_cols_rhs)}; -} - } // namespace std::shared_ptr preprocessed_table::create( @@ -752,8 +662,28 @@ std::shared_ptr preprocessed_table::create( auto [decomposed_input, new_column_order, new_null_precedence, verticalized_col_depths] = decompose_structs(input, decompose_lists_column::NO, column_order, null_precedence); - auto [transformed_input, transformed_columns] = - transform_lists_of_structs(decomposed_input, new_null_precedence, stream); + // Transform any (nested) lists-of-structs column into lists-of-integers column. + std::vector> transformed_columns; + auto const transformed_input = + [&, &decomposed_input = decomposed_input, &new_null_precedence = new_null_precedence] { + std::vector transformed_cvs; + + for (size_type col_idx = 0; col_idx < decomposed_input.num_columns(); ++col_idx) { + auto const& lhs_col = decomposed_input.column(col_idx); + + auto [transformed, curr_out_cols] = transform_lists_of_structs( + lhs_col, + null_precedence.empty() ? null_order::BEFORE : new_null_precedence[col_idx], + stream); + + transformed_cvs.emplace_back(std::move(transformed)); + transformed_columns.insert(transformed_columns.end(), + std::make_move_iterator(curr_out_cols.begin()), + std::make_move_iterator(curr_out_cols.end())); + } + + return table_view{transformed_cvs}; + }(); auto const has_ranked_children = !transformed_columns.empty(); return create(transformed_input, @@ -786,8 +716,39 @@ preprocessed_table::create(table_view const& lhs, decompose_structs(rhs, decompose_lists_column::NO, column_order, null_precedence); // Transform any (nested) lists-of-structs column into lists-of-integers column. - auto [transformed_lhs, transformed_rhs, transformed_columns_lhs, transformed_columns_rhs] = - transform_lists_of_structs(decomposed_lhs, decomposed_rhs, new_null_precedence_lhs, stream); + std::vector> transformed_columns_lhs; + std::vector> transformed_columns_rhs; + auto const [transformed_lhs, + transformed_rhs] = [&, + &decomposed_lhs = decomposed_lhs, + &decomposed_rhs = decomposed_rhs, + &new_null_precedence_lhs = new_null_precedence_lhs] { + std::vector transformed_lhs_cvs; + std::vector transformed_rhs_cvs; + + for (size_type col_idx = 0; col_idx < decomposed_lhs.num_columns(); ++col_idx) { + auto const& lhs_col = decomposed_lhs.column(col_idx); + auto const& rhs_col = decomposed_rhs.column(col_idx); + + auto [transformed_lhs, transformed_rhs, curr_out_cols_lhs, curr_out_cols_rhs] = + transform_lists_of_structs( + lhs_col, + rhs_col, + null_precedence.empty() ? null_order::BEFORE : null_precedence[col_idx], + stream); + + transformed_lhs_cvs.emplace_back(std::move(transformed_lhs)); + transformed_rhs_cvs.emplace_back(std::move(transformed_rhs)); + transformed_columns_lhs.insert(transformed_columns_lhs.end(), + std::make_move_iterator(curr_out_cols_lhs.begin()), + std::make_move_iterator(curr_out_cols_lhs.end())); + transformed_columns_rhs.insert(transformed_columns_rhs.end(), + std::make_move_iterator(curr_out_cols_rhs.begin()), + std::make_move_iterator(curr_out_cols_rhs.end())); + } + + return std::pair{table_view{transformed_lhs_cvs}, table_view{transformed_rhs_cvs}}; + }(); // This should be the same for both lhs and rhs but not all the time, such as when one table // has 0 rows while the other has >0 rows. So we check separately for each of them. From f60e1e9abfd90b401dda46fc1f68a2e14f0795d5 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 3 May 2023 20:21:15 -0700 Subject: [PATCH 3/8] Fix spell --- cpp/src/table/row_operators.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 3717aa56d2f..8b5d2685f5d 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -394,7 +394,7 @@ namespace lexicographic { namespace { /** - * @brief Replace child of the input lists colum by a new child column. + * @brief Replace child of the input lists column by a new child column. * * If the input is not sliced, just replace the input child by the new_child. * Otherwise, we have to generate new offsets and replace both offsets/child of the input by the From 07c0c55ff153dcc446b31fac0089a25412443fa6 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 8 May 2023 09:51:26 -0700 Subject: [PATCH 4/8] Add `mr` parameter --- cpp/src/table/row_operators.cu | 46 +++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 8b5d2685f5d..e7258fc5fcc 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -407,12 +407,14 @@ namespace { * @param[in] new_child A new child column to replace the existing child of the input * @param[out] out_cols An array to store the new generated offsets (if applicable) * @param[in] stream CUDA stream used for device memory operations and kernel launches + * @param[in] mr Device memory resource used to allocate the returned column * @return An output column_view with child replaced */ auto replace_child(column_view const& input, column_view const& new_child, std::vector>& out_cols, - rmm::cuda_stream_view stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto const make_output = [&input](auto const& offsets_cv, auto const& child_cv) { return column_view{data_type{type_id::LIST}, @@ -428,8 +430,8 @@ auto replace_child(column_view const& input, return make_output(input.child(lists_column_view::offsets_column_index), new_child); } - out_cols.emplace_back(cudf::lists::detail::get_normalized_offsets( - lists_column_view{input}, stream, rmm::mr::get_current_device_resource())); + out_cols.emplace_back( + cudf::lists::detail::get_normalized_offsets(lists_column_view{input}, stream, mr)); return make_output(out_cols.back()->view(), new_child); } @@ -454,11 +456,13 @@ auto replace_child(column_view const& input, * @param input The input column to compute ranks * @param column_null_order The flag indicating how nulls compare to non-null values * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column * @return The output rank columns */ auto compute_ranks(column_view const& input, null_order column_null_order, - rmm::cuda_stream_view stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { return cudf::detail::rank(input, rank_method::DENSE, @@ -467,7 +471,7 @@ auto compute_ranks(column_view const& input, column_null_order, false /*percentage*/, stream, - rmm::mr::get_current_device_resource()); + mr); } /** @@ -482,12 +486,16 @@ auto compute_ranks(column_view const& input, * @param input The input column to transform * @param column_null_order The flag indicating how nulls compare to non-null values * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column * @return A pair consisting of new column_view representing the transformed input, along with * an array containing its rank column(s) (of `size_type` type) and possibly new list * offsets generated during the transformation process */ std::pair>> transform_lists_of_structs( - column_view const& input, null_order column_null_order, rmm::cuda_stream_view stream) + column_view const& input, + null_order column_null_order, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { std::vector> out_cols; @@ -496,22 +504,22 @@ std::pair>> transform_lists_of_ // Found a lists-of-structs column. if (child.type().id() == type_id::STRUCT) { - out_cols.emplace_back(compute_ranks(child, column_null_order, stream)); - auto transformed = replace_child(input, out_cols.back()->view(), out_cols, stream); + out_cols.emplace_back(compute_ranks(child, column_null_order, stream, mr)); + auto transformed = replace_child(input, out_cols.back()->view(), out_cols, stream, mr); return {std::move(transformed), std::move(out_cols)}; } // Found a lists-of-lists column. else if (child.type().id() == type_id::LIST) { // Recursively call transformation on the child column. auto [new_child, out_cols_child] = - transform_lists_of_structs(child, column_null_order, stream); + transform_lists_of_structs(child, column_null_order, stream, mr); // Only transform the current column if its child has been transformed. if (out_cols_child.size() > 0) { out_cols.insert(out_cols.end(), std::make_move_iterator(out_cols_child.begin()), std::make_move_iterator(out_cols_child.end())); - auto transformed = replace_child(input, new_child, out_cols, stream); + auto transformed = replace_child(input, new_child, out_cols, stream, mr); return {std::move(transformed), std::move(out_cols)}; } // else: child was not transformed so input is also not transformed. @@ -543,14 +551,15 @@ std::tuple> out_cols_lhs; std::vector> out_cols_rhs; auto const make_output = [&](auto const& new_child_lhs, auto const& new_child_rhs) { - auto transformed_lhs = replace_child(lhs, new_child_lhs, out_cols_lhs, stream); - auto transformed_rhs = replace_child(rhs, new_child_rhs, out_cols_rhs, stream); + auto transformed_lhs = replace_child(lhs, new_child_lhs, out_cols_lhs, stream, mr); + auto transformed_rhs = replace_child(rhs, new_child_rhs, out_cols_rhs, stream, mr); return std::tuple{std::move(transformed_lhs), std::move(transformed_rhs), @@ -569,7 +578,8 @@ transform_lists_of_structs(column_view const& lhs, stream, rmm::mr::get_current_device_resource()); - auto const ranks = compute_ranks(concatenated_children->view(), column_null_order, stream); + auto const ranks = + compute_ranks(concatenated_children->view(), column_null_order, stream, mr); auto const ranks_slices = cudf::detail::slice( ranks->view(), {0, child_lhs.size(), child_lhs.size(), child_lhs.size() + child_rhs.size()}, @@ -585,7 +595,7 @@ transform_lists_of_structs(column_view const& lhs, else if (child_lhs.type().id() == type_id::LIST) { // Recursively call transformation on the child column. auto [new_child_lhs, new_child_rhs, out_cols_child_lhs, out_cols_child_rhs] = - transform_lists_of_structs(child_lhs, child_rhs, column_null_order, stream); + transform_lists_of_structs(child_lhs, child_rhs, column_null_order, stream, mr); // Only transform the current pair of columns if their children have been transformed. if (out_cols_child_lhs.size() > 0 || out_cols_child_rhs.size() > 0) { @@ -674,7 +684,8 @@ std::shared_ptr preprocessed_table::create( auto [transformed, curr_out_cols] = transform_lists_of_structs( lhs_col, null_precedence.empty() ? null_order::BEFORE : new_null_precedence[col_idx], - stream); + stream, + rmm::mr::get_current_device_resource()); transformed_cvs.emplace_back(std::move(transformed)); transformed_columns.insert(transformed_columns.end(), @@ -735,7 +746,8 @@ preprocessed_table::create(table_view const& lhs, lhs_col, rhs_col, null_precedence.empty() ? null_order::BEFORE : null_precedence[col_idx], - stream); + stream, + rmm::mr::get_current_device_resource()); transformed_lhs_cvs.emplace_back(std::move(transformed_lhs)); transformed_rhs_cvs.emplace_back(std::move(transformed_rhs)); From 545ba30308095a6e4714c2b77ab384e26035e1cb Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 8 May 2023 09:53:03 -0700 Subject: [PATCH 5/8] Fix typo --- cpp/src/table/row_operators.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index e7258fc5fcc..33bd3eb1c9c 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -486,7 +486,7 @@ auto compute_ranks(column_view const& input, * @param input The input column to transform * @param column_null_order The flag indicating how nulls compare to non-null values * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned column + * @param mr Device memory resource used to allocate the returned column(s) * @return A pair consisting of new column_view representing the transformed input, along with * an array containing its rank column(s) (of `size_type` type) and possibly new list * offsets generated during the transformation process From 05aa5e5c20591c6a626839fcf70499222baf0e92 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 11 May 2023 16:09:51 -0700 Subject: [PATCH 6/8] Fix docs, and some cleanup --- cpp/src/table/row_operators.cu | 42 ++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 33bd3eb1c9c..63f88d83d9e 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -397,9 +397,9 @@ namespace { * @brief Replace child of the input lists column by a new child column. * * If the input is not sliced, just replace the input child by the new_child. - * Otherwise, we have to generate new offsets and replace both offsets/child of the input by the - * new ones. This is because the new child was generated by ranking and always has zero - * offset thus cannot replace the input child if it is sliced. + * Otherwise, we have to generate new offsets and replace both the offsets and the child of the + * input by the new ones. This is because the new child was generated by ranking and always + * has zero offset, so it cannot replace the input child if it is sliced. * * The new generated offsets column needs to be returned and kept alive. * @@ -438,16 +438,17 @@ auto replace_child(column_view const& input, /** * @brief Compute ranks of the input column. * - * Dense rank type is used for compute ranking of the input for later lexicographic comparison. + * `Dense` rank type must be used for compute ranking of the input for later lexicographic + * comparison. * - * Consider this example: `input = [ [{0, "a"}, {3, "c"}], [{0, "a"}, {2, "b"}] ]`. + * To understand why, consider: `input = [ [{0, "a"}, {3, "c"}], [{0, "a"}, {2, "b"}] ]`. * If first rank is used, `transformed_input = [ [0, 3], [1, 2] ]`. Comparing them will lead * to the result row(0) < row(1) which is incorrect. * With dense rank, `transformed_input = [ [0, 2], [0, 1] ]`, producing the correct output for * lexicographic comparison. * - * In addition, since the being ranked input column is always a nested child column instead of - * top-level column, the column order for ranking should be fixed to the same value + * In addition, since the input column being ranked is always a nested child column instead of + * a top-level column, the column order for ranking should be fixed to the same value * `order::ASCENDING` in all situations. * For example, with the same input above, using column order as `order::ASCENDING` we will have * `transformed_input = [ [0, 2], [0, 1] ]`. The output of sorting `transformed_input` will be @@ -505,8 +506,8 @@ std::pair>> transform_lists_of_ // Found a lists-of-structs column. if (child.type().id() == type_id::STRUCT) { out_cols.emplace_back(compute_ranks(child, column_null_order, stream, mr)); - auto transformed = replace_child(input, out_cols.back()->view(), out_cols, stream, mr); - return {std::move(transformed), std::move(out_cols)}; + return {replace_child(input, out_cols.back()->view(), out_cols, stream, mr), + std::move(out_cols)}; } // Found a lists-of-lists column. else if (child.type().id() == type_id::LIST) { @@ -519,8 +520,7 @@ std::pair>> transform_lists_of_ out_cols.insert(out_cols.end(), std::make_move_iterator(out_cols_child.begin()), std::make_move_iterator(out_cols_child.end())); - auto transformed = replace_child(input, new_child, out_cols, stream, mr); - return {std::move(transformed), std::move(out_cols)}; + return {replace_child(input, new_child, out_cols, stream, mr), std::move(out_cols)}; } // else: child was not transformed so input is also not transformed. } @@ -536,10 +536,21 @@ std::pair>> transform_lists_of_ } /** - * @copydoc transform_lists_of_structs(column_view const&, null_order, rmm::cuda_stream_view) + * @brief Transform any nested lists-of-structs column into lists-of-integers column. + * + * For a lists-of-structs column at any nested level, its child structs column will be replaced by a + * `size_type` column computed as its ranks. In addition, equivalent child columns of both input + * columns (i.e., child columns at the same order, same nested level) will be combined and + * ranked together. + * + * If the input columns are not lists-of-structs, or do not contain lists-of-structs at any nested + * level, there will not be any changes. * * @param lhs The input lhs column to transform * @param rhs The input rhs column to transform + * @param column_null_order The flag indicating how nulls compare to non-null values + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column(s) * @return A tuple consisting of new column_view(s) representing the transformed input, along with * their rank column(s) (of `size_type` type) and possibly new list offsets generated * during the transformation process @@ -558,11 +569,8 @@ transform_lists_of_structs(column_view const& lhs, std::vector> out_cols_rhs; auto const make_output = [&](auto const& new_child_lhs, auto const& new_child_rhs) { - auto transformed_lhs = replace_child(lhs, new_child_lhs, out_cols_lhs, stream, mr); - auto transformed_rhs = replace_child(rhs, new_child_rhs, out_cols_rhs, stream, mr); - - return std::tuple{std::move(transformed_lhs), - std::move(transformed_rhs), + return std::tuple{replace_child(lhs, new_child_lhs, out_cols_lhs, stream, mr), + replace_child(rhs, new_child_rhs, out_cols_rhs, stream, mr), std::move(out_cols_lhs), std::move(out_cols_rhs)}; }; From e2127ffb7761afee0bf8d234fd2331297f649dc6 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 11 May 2023 17:19:03 -0700 Subject: [PATCH 7/8] Add more parameters --- cpp/src/table/row_operators.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 63f88d83d9e..596d27ec8f3 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -593,8 +593,8 @@ transform_lists_of_structs(column_view const& lhs, {0, child_lhs.size(), child_lhs.size(), child_lhs.size() + child_rhs.size()}, stream); - out_cols_lhs.emplace_back(std::make_unique(ranks_slices.front())); - out_cols_rhs.emplace_back(std::make_unique(ranks_slices.back())); + out_cols_lhs.emplace_back(std::make_unique(ranks_slices.front(), stream, mr)); + out_cols_rhs.emplace_back(std::make_unique(ranks_slices.back(), stream, mr)); return make_output(out_cols_lhs.back()->view(), out_cols_rhs.back()->view()); From c09ab64a330137b160bca84e2f9dd7981a3ff2c2 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 11 May 2023 18:34:28 -0700 Subject: [PATCH 8/8] Use default `mr` for temporary buffer --- cpp/src/table/row_operators.cu | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 596d27ec8f3..770a7c775b4 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -586,8 +586,10 @@ transform_lists_of_structs(column_view const& lhs, stream, rmm::mr::get_current_device_resource()); - auto const ranks = - compute_ranks(concatenated_children->view(), column_null_order, stream, mr); + auto const ranks = compute_ranks(concatenated_children->view(), + column_null_order, + stream, + rmm::mr::get_current_device_resource()); auto const ranks_slices = cudf::detail::slice( ranks->view(), {0, child_lhs.size(), child_lhs.size(), child_lhs.size() + child_rhs.size()},