From 0146f743987a6f2a51aab08f34771eb4d3531afc Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Wed, 17 Mar 2021 01:40:50 -0400 Subject: [PATCH] Add explode_outer and explode_outer_position (#7499) This code adds support for explode_outer and explode_outer_position. These differ from explode and explode_position by the way null and empty lists are handled. Explode discards null and empty lists and as such, lifts the child column directly out of the list column. Explode_outer must find these null and empty lists and make space for a null entry in the child column. This means we need to gather both the table and the exploded column. Further, we must make a pass on the exploded column to count these entries initially as we do not know the required size of the gather maps until we have this information and it isn't just the null count. If there are no null or empty lists in the input, the normal explode function is called as it is simpler, but it does come at the cost of marching the offsets looking for duplicates, which indicate null or empty lists. closes #7466 Authors: - Mike Wilson (@hyperbolic2346) Approvers: - AJ Schmidt (@ajschmidt8) - Jake Hemstad (@jrhemstad) - Nghia Truong (@ttnghia) URL: https://github.com/rapidsai/cudf/pull/7499 --- conda/recipes/libcudf/meta.yaml | 1 + cpp/CMakeLists.txt | 2 +- cpp/include/cudf/lists/explode.hpp | 200 +++++++ cpp/include/cudf/reshape.hpp | 86 --- cpp/include/cudf/table/table.hpp | 26 +- cpp/include/cudf/table/table_view.hpp | 19 + cpp/src/lists/explode.cu | 314 ++++++++++ cpp/src/reshape/explode.cu | 178 ------ cpp/src/table/table.cpp | 8 - cpp/src/table/table_view.cpp | 6 +- cpp/tests/CMakeLists.txt | 2 +- cpp/tests/lists/explode_tests.cpp | 819 ++++++++++++++++++++++++++ cpp/tests/reshape/explode_tests.cpp | 530 ----------------- 13 files changed, 1381 insertions(+), 810 deletions(-) create mode 100644 cpp/include/cudf/lists/explode.hpp create mode 100644 cpp/src/lists/explode.cu delete mode 100644 cpp/src/reshape/explode.cu create mode 100644 cpp/tests/lists/explode_tests.cpp delete mode 100644 cpp/tests/reshape/explode_tests.cpp diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index e709824721c..5657d21889f 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -134,6 +134,7 @@ test: - test -f $PREFIX/include/cudf/lists/detail/copying.hpp - test -f $PREFIX/include/cudf/lists/detail/sorting.hpp - test -f $PREFIX/include/cudf/lists/count_elements.hpp + - test -f $PREFIX/include/cudf/lists/explode.hpp - test -f $PREFIX/include/cudf/lists/drop_list_duplicates.hpp - test -f $PREFIX/include/cudf/lists/extract.hpp - test -f $PREFIX/include/cudf/lists/contains.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 103b163e260..39acc362450 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -260,6 +260,7 @@ add_library(cudf src/lists/copying/gather.cu src/lists/copying/segmented_gather.cu src/lists/count_elements.cu + src/lists/explode.cu src/lists/extract.cu src/lists/drop_list_duplicates.cu src/lists/lists_column_factories.cu @@ -289,7 +290,6 @@ add_library(cudf src/replace/nulls.cu src/replace/replace.cu src/reshape/byte_cast.cu - src/reshape/explode.cu src/reshape/interleave_columns.cu src/reshape/tile.cu src/rolling/grouped_rolling.cu diff --git a/cpp/include/cudf/lists/explode.hpp b/cpp/include/cudf/lists/explode.hpp new file mode 100644 index 00000000000..156d4b9275d --- /dev/null +++ b/cpp/include/cudf/lists/explode.hpp @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace cudf { + +/** + * @brief Explodes a list column's elements. + * + * Any list is exploded, which means the elements of the list in each row are expanded into new rows + * in the output. The corresponding rows for other columns in the input are duplicated. Example: + * ``` + * [[5,10,15], 100], + * [[20,25], 200], + * [[30], 300], + * returns + * [5, 100], + * [10, 100], + * [15, 100], + * [20, 200], + * [25, 200], + * [30, 300], + * ``` + * + * Nulls and empty lists propagate in different ways depending on what is null or empty. + *``` + * [[5,null,15], 100], + * [null, 200], + * [[], 300], + * returns + * [5, 100], + * [null, 100], + * [15, 100], + * ``` + * Note that null lists are not included in the resulting table, but nulls inside + * lists and empty lists will be represented with a null entry for that column in that row. + * + * @param input_table Table to explode. + * @param explode_column_idx Column index to explode inside the table. + * @param mr Device memory resource used to allocate the returned column's device memory. + * + * @return A new table with explode_col exploded. + */ +std::unique_ptr explode( + table_view const& input_table, + size_type explode_column_idx, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Explodes a list column's elements and includes a position column. + * + * Any list is exploded, which means the elements of the list in each row are expanded into new rows + * in the output. The corresponding rows for other columns in the input are duplicated. A position + * column is added that has the index inside the original list for each row. Example: + * ``` + * [[5,10,15], 100], + * [[20,25], 200], + * [[30], 300], + * returns + * [0, 5, 100], + * [1, 10, 100], + * [2, 15, 100], + * [0, 20, 200], + * [1, 25, 200], + * [0, 30, 300], + * ``` + * + * Nulls and empty lists propagate in different ways depending on what is null or empty. + *``` + * [[5,null,15], 100], + * [null, 200], + * [[], 300], + * returns + * [0, 5, 100], + * [1, null, 100], + * [2, 15, 100], + * ``` + * Note that null lists are not included in the resulting table, but nulls inside + * lists and empty lists will be represented with a null entry for that column in that row. + * + * @param input_table Table to explode. + * @param explode_column_idx Column index to explode inside the table. + * @param mr Device memory resource used to allocate the returned column's device memory. + * + * @return A new table with exploded value and position. The column order of return table is + * [cols before explode_input, explode_position, explode_value, cols after explode_input]. + */ +std::unique_ptr
explode_position( + table_view const& input_table, + size_type explode_column_idx, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Explodes a list column's elements retaining any null entries or empty lists inside. + * + * Any list is exploded, which means the elements of the list in each row are expanded into new rows + * in the output. The corresponding rows for other columns in the input are duplicated. Example: + * ``` + * [[5,10,15], 100], + * [[20,25], 200], + * [[30], 300], + * returns + * [5, 100], + * [10, 100], + * [15, 100], + * [20, 200], + * [25, 200], + * [30, 300], + * ``` + * + * Nulls and empty lists propagate as null entries in the result. + *``` + * [[5,null,15], 100], + * [null, 200], + * [[], 300], + * returns + * [5, 100], + * [null, 100], + * [15, 100], + * [null, 200], + * [null, 300], + * ``` + * + * @param input_table Table to explode. + * @param explode_column_idx Column index to explode inside the table. + * @param mr Device memory resource used to allocate the returned column's device memory. + * + * @return A new table with explode_col exploded. + */ +std::unique_ptr
explode_outer( + table_view const& input_table, + size_type explode_column_idx, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Explodes a list column's elements retaining any null entries or empty lists and includes a + *position column. + * + * Any list is exploded, which means the elements of the list in each row are expanded into new rows + * in the output. The corresponding rows for other columns in the input are duplicated. A position + * column is added that has the index inside the original list for each row. Example: + * ``` + * [[5,10,15], 100], + * [[20,25], 200], + * [[30], 300], + * returns + * [0, 5, 100], + * [1, 10, 100], + * [2, 15, 100], + * [0, 20, 200], + * [1, 25, 200], + * [0, 30, 300], + * ``` + * + * Nulls and empty lists propagate as null entries in the result. + *``` + * [[5,null,15], 100], + * [null, 200], + * [[], 300], + * returns + * [0, 5, 100], + * [1, null, 100], + * [2, 15, 100], + * [0, null, 200], + * [0, null, 300], + * ``` + * + * @param input_table Table to explode. + * @param explode_column_idx Column index to explode inside the table. + * @param mr Device memory resource used to allocate the returned column's device memory. + * + * @return A new table with explode_col exploded. + */ +std::unique_ptr
explode_outer_position( + table_view const& input_table, + size_type explode_column_idx, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** @} */ // end of group + +} // namespace cudf diff --git a/cpp/include/cudf/reshape.hpp b/cpp/include/cudf/reshape.hpp index a6030f31e6d..74e4ebb8d05 100644 --- a/cpp/include/cudf/reshape.hpp +++ b/cpp/include/cudf/reshape.hpp @@ -97,92 +97,6 @@ std::unique_ptr byte_cast( flip_endianness endian_configuration, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -/** - * @brief Explodes a list column's elements. - * - * Any list is exploded, which means the elements of the list in each row are expanded into new rows - * in the output. The corresponding rows for other columns in the input are duplicated. Example: - * ``` - * [[5,10,15], 100], - * [[20,25], 200], - * [[30], 300], - * returns - * [5, 100], - * [10, 100], - * [15, 100], - * [20, 200], - * [25, 200], - * [30, 300], - * ``` - * - * Nulls and empty lists propagate in different ways depending on what is null or empty. - *``` - * [[5,null,15], 100], - * [null, 200], - * [[], 300], - * returns - * [5, 100], - * [null, 100], - * [15, 100], - * ``` - * Note that null lists are not included in the resulting table, but nulls inside - * lists and empty lists will be represented with a null entry for that column in that row. - * - * @param input_table Table to explode. - * @param explode_column_idx Column index to explode inside the table. - * @param mr Device memory resource used to allocate the returned column's device memory. - * - * @return A new table with explode_col exploded. - */ -std::unique_ptr
explode( - table_view const& input_table, - size_type explode_column_idx, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Explodes a list column's elements and includes a position column. - * - * Any list is exploded, which means the elements of the list in each row are expanded into new rows - * in the output. The corresponding rows for other columns in the input are duplicated. A position - * column is added that has the index inside the original list for each row. Example: - * ``` - * [[5,10,15], 100], - * [[20,25], 200], - * [[30], 300], - * returns - * [0, 5, 100], - * [1, 10, 100], - * [2, 15, 100], - * [0, 20, 200], - * [1, 25, 200], - * [0, 30, 300], - * ``` - * - * Nulls and empty lists propagate in different ways depending on what is null or empty. - *``` - * [[5,null,15], 100], - * [null, 200], - * [[], 300], - * returns - * [0, 5, 100], - * [1, null, 100], - * [2, 15, 100], - * ``` - * Note that null lists are not included in the resulting table, but nulls inside - * lists and empty lists will be represented with a null entry for that column in that row. - * - * @param input_table Table to explode. - * @param explode_column_idx Column index to explode inside the table. - * @param mr Device memory resource used to allocate the returned column's device memory. - * - * @return A new table with exploded value and position. The column order of return table is - * [cols before explode_input, explode_position, explode_value, cols after explode_input]. - */ -std::unique_ptr
explode_position( - table_view const& input_table, - size_type explode_column_idx, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/table/table.hpp b/cpp/include/cudf/table/table.hpp index 553cf5e9096..4571362076c 100644 --- a/cpp/include/cudf/table/table.hpp +++ b/cpp/include/cudf/table/table.hpp @@ -110,6 +110,27 @@ class table { */ std::vector> release(); + /** + * @brief Returns a table_view built from a range of column indices. + * + * @throws std::out_of_range + * If any index is outside [0, num_columns()) + * + * @param begin Beginning of the range + * @param end Ending of the range + * @return A table_view consisting of columns from the original table + * specified by the elements of `column_indices` + */ + + template + table_view select(InputIterator begin, InputIterator end) const + { + std::vector columns(std::distance(begin, end)); + std::transform( + begin, end, columns.begin(), [this](auto index) { return _columns.at(index)->view(); }); + return table_view(columns); + } + /** * @brief Returns a table_view with set of specified columns. * @@ -120,7 +141,10 @@ class table { * @return A table_view consisting of columns from the original table * specified by the elements of `column_indices` */ - table_view select(std::vector const& column_indices) const; + table_view select(std::vector const& column_indices) const + { + return select(column_indices.begin(), column_indices.end()); + }; /** * @brief Returns a reference to the specified column diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp index 22f2073f73c..083366cc310 100644 --- a/cpp/include/cudf/table/table_view.hpp +++ b/cpp/include/cudf/table/table_view.hpp @@ -174,6 +174,25 @@ class table_view : public detail::table_view_base { */ table_view(std::vector const& views); + /** + * @brief Returns a table_view built from a range of column indices. + * + * @throws std::out_of_range + * If any index is outside [0, num_columns()) + * + * @param begin Beginning of the range + * @param end Ending of the range + * @return A table_view consisting of columns from the original table + * specified by the elements of `column_indices` + */ + template + table_view select(InputIterator begin, InputIterator end) const + { + std::vector columns(std::distance(begin, end)); + std::transform(begin, end, columns.begin(), [this](auto index) { return this->column(index); }); + return table_view(columns); + } + /** * @brief Returns a table_view with set of specified columns. * diff --git a/cpp/src/lists/explode.cu b/cpp/src/lists/explode.cu new file mode 100644 index 00000000000..336aabde15e --- /dev/null +++ b/cpp/src/lists/explode.cu @@ -0,0 +1,314 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include + +namespace cudf { +namespace detail { +namespace { + +std::unique_ptr
build_table( + table_view const& input_table, + size_type const explode_column_idx, + column_view const& sliced_child, + cudf::device_span gather_map, + thrust::optional> explode_col_gather_map, + thrust::optional> position_array, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto select_iter = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [explode_column_idx](size_type i) { return i >= explode_column_idx ? i + 1 : i; }); + + auto gathered_table = + detail::gather(input_table.select(select_iter, select_iter + input_table.num_columns() - 1), + gather_map.begin(), + gather_map.end(), + cudf::out_of_bounds_policy::DONT_CHECK, + stream, + mr); + + std::vector> columns = gathered_table.release()->release(); + + columns.insert(columns.begin() + explode_column_idx, + explode_col_gather_map + ? std::move(detail::gather(table_view({sliced_child}), + explode_col_gather_map->begin(), + explode_col_gather_map->end(), + cudf::out_of_bounds_policy::NULLIFY, + stream, + mr) + ->release()[0]) + : std::make_unique(sliced_child, stream, mr)); + + if (position_array) { + size_type position_size = position_array->size(); + columns.insert(columns.begin() + explode_column_idx, + std::make_unique( + data_type(type_to_id()), position_size, position_array->release())); + } + + return std::make_unique
(std::move(columns)); +} +} // namespace + +std::unique_ptr
explode(table_view const& input_table, + size_type const explode_column_idx, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + lists_column_view explode_col{input_table.column(explode_column_idx)}; + auto sliced_child = explode_col.get_sliced_child(stream); + rmm::device_uvector gather_map(sliced_child.size(), stream); + + // Sliced columns may require rebasing of the offsets. + auto offsets = explode_col.offsets_begin(); + // offsets + 1 here to skip the 0th offset, which removes a - 1 operation later. + auto offsets_minus_one = thrust::make_transform_iterator( + thrust::next(offsets), [offsets] __device__(auto i) { return (i - offsets[0]) - 1; }); + auto counting_iter = thrust::make_counting_iterator(0); + + // This looks like an off-by-one bug, but what is going on here is that we need to reduce each + // result from `lower_bound` by 1 to build the correct gather map. This can be accomplished by + // skipping the first entry and using the result of `lower_bound` directly. + thrust::lower_bound(rmm::exec_policy(stream), + offsets_minus_one, + offsets_minus_one + explode_col.size(), + counting_iter, + counting_iter + gather_map.size(), + gather_map.begin()); + + return build_table(input_table, + explode_column_idx, + sliced_child, + gather_map, + thrust::nullopt, + thrust::nullopt, + stream, + mr); +} + +std::unique_ptr
explode_position(table_view const& input_table, + size_type const explode_column_idx, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + lists_column_view explode_col{input_table.column(explode_column_idx)}; + auto sliced_child = explode_col.get_sliced_child(stream); + rmm::device_uvector gather_map(sliced_child.size(), stream); + + // Sliced columns may require rebasing of the offsets. + auto offsets = explode_col.offsets_begin(); + // offsets + 1 here to skip the 0th offset, which removes a - 1 operation later. + auto offsets_minus_one = thrust::make_transform_iterator( + offsets + 1, [offsets] __device__(auto i) { return (i - offsets[0]) - 1; }); + auto counting_iter = thrust::make_counting_iterator(0); + + rmm::device_uvector pos(sliced_child.size(), stream, mr); + + // This looks like an off-by-one bug, but what is going on here is that we need to reduce each + // result from `lower_bound` by 1 to build the correct gather map. This can be accomplished by + // skipping the first entry and using the result of `lower_bound` directly. + thrust::transform( + rmm::exec_policy(stream), + counting_iter, + counting_iter + gather_map.size(), + gather_map.begin(), + [position_array = pos.data(), + offsets_minus_one, + offsets, + offset_size = explode_col.size()] __device__(auto idx) -> size_type { + auto lb_idx = thrust::distance( + offsets_minus_one, + thrust::lower_bound(thrust::seq, offsets_minus_one, offsets_minus_one + offset_size, idx)); + position_array[idx] = idx - (offsets[lb_idx] - offsets[0]); + return lb_idx; + }); + + return build_table(input_table, + explode_column_idx, + sliced_child, + gather_map, + thrust::nullopt, + std::move(pos), + stream, + mr); +} + +std::unique_ptr
explode_outer(table_view const& input_table, + size_type const explode_column_idx, + bool include_position, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + lists_column_view explode_col{input_table.column(explode_column_idx)}; + auto sliced_child = explode_col.get_sliced_child(stream); + auto counting_iter = thrust::make_counting_iterator(0); + auto offsets = explode_col.offsets_begin(); + + // number of nulls or empty lists found so far in the explode column + rmm::device_uvector null_or_empty_offset(explode_col.size(), stream); + + auto null_or_empty = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [offsets, offsets_size = explode_col.size() - 1] __device__(int idx) { + return (idx > offsets_size || (offsets[idx + 1] != offsets[idx])) ? 0 : 1; + }); + thrust::inclusive_scan(rmm::exec_policy(stream), + null_or_empty, + null_or_empty + sliced_child.size(), + null_or_empty_offset.begin()); + + auto null_or_empty_count = + null_or_empty_offset.size() > 0 ? null_or_empty_offset.back_element(stream) : 0; + if (null_or_empty_count == 0) { + // performance penalty to run the below loop if there are no nulls or empty lists. + // run simple explode instead + return include_position ? explode_position(input_table, explode_column_idx, stream, mr) + : explode(input_table, explode_column_idx, stream, mr); + } + + auto gather_map_size = sliced_child.size() + null_or_empty_count; + + rmm::device_uvector gather_map(gather_map_size, stream); + rmm::device_uvector explode_col_gather_map(gather_map_size, stream); + rmm::device_uvector pos(include_position ? gather_map_size : 0, stream, mr); + + // offsets + 1 here to skip the 0th offset, which removes a - 1 operation later. + auto offsets_minus_one = thrust::make_transform_iterator( + thrust::next(offsets), [offsets] __device__(auto i) { return (i - offsets[0]) - 1; }); + // Fill in gather map with all the child column's entries + thrust::for_each(rmm::exec_policy(stream), + counting_iter, + counting_iter + sliced_child.size(), + [offsets_minus_one, + gather_map = gather_map.begin(), + explode_col_gather_map = explode_col_gather_map.begin(), + position_array = pos.begin(), + include_position, + offsets, + null_or_empty_offset = null_or_empty_offset.begin(), + null_or_empty, + offset_size = explode_col.offsets().size() - 1] __device__(auto idx) { + auto lb_idx = thrust::distance( + offsets_minus_one, + thrust::lower_bound( + thrust::seq, offsets_minus_one, offsets_minus_one + (offset_size), idx)); + auto index_to_write = null_or_empty_offset[lb_idx] + idx; + gather_map[index_to_write] = lb_idx; + explode_col_gather_map[index_to_write] = idx; + if (include_position) { + position_array[index_to_write] = idx - (offsets[lb_idx] - offsets[0]); + } + if (null_or_empty[idx]) { + auto invalid_index = null_or_empty_offset[idx] == 0 + ? offsets[idx] + : offsets[idx] + null_or_empty_offset[idx] - 1; + gather_map[invalid_index] = idx; + + // negative one to indicate a null value + explode_col_gather_map[invalid_index] = -1; + + if (include_position) { position_array[invalid_index] = 0; } + } + }); + + return build_table( + input_table, + explode_column_idx, + sliced_child, + gather_map, + explode_col_gather_map, + include_position ? std::move(pos) : thrust::optional>{}, + stream, + mr); +} + +} // namespace detail + +/** + * @copydoc cudf::explode(input_table,explode_column_idx,rmm::mr::device_memory_resource) + */ +std::unique_ptr
explode(table_view const& input_table, + size_type explode_column_idx, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + CUDF_EXPECTS(input_table.column(explode_column_idx).type().id() == type_id::LIST, + "Unsupported non-list column"); + return detail::explode(input_table, explode_column_idx, rmm::cuda_stream_default, mr); +} + +/** + * @copydoc cudf::explode_position(input_table,explode_column_idx,rmm::mr::device_memory_resource) + */ +std::unique_ptr
explode_position(table_view const& input_table, + size_type explode_column_idx, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + CUDF_EXPECTS(input_table.column(explode_column_idx).type().id() == type_id::LIST, + "Unsupported non-list column"); + return detail::explode_position(input_table, explode_column_idx, rmm::cuda_stream_default, mr); +} + +/** + * @copydoc cudf::explode_outer(input_table,explode_column_idx,rmm::mr::device_memory_resource) + */ +std::unique_ptr
explode_outer(table_view const& input_table, + size_type explode_column_idx, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + CUDF_EXPECTS(input_table.column(explode_column_idx).type().id() == type_id::LIST, + "Unsupported non-list column"); + return detail::explode_outer( + input_table, explode_column_idx, false, rmm::cuda_stream_default, mr); +} + +/** + * @copydoc + * cudf::explode_outer_position(input_table,explode_column_idx,rmm::mr::device_memory_resource) + */ +std::unique_ptr
explode_outer_position(table_view const& input_table, + size_type explode_column_idx, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + CUDF_EXPECTS(input_table.column(explode_column_idx).type().id() == type_id::LIST, + "Unsupported non-list column"); + return detail::explode_outer(input_table, explode_column_idx, true, rmm::cuda_stream_default, mr); +} + +} // namespace cudf diff --git a/cpp/src/reshape/explode.cu b/cpp/src/reshape/explode.cu deleted file mode 100644 index 34d7d8fe31d..00000000000 --- a/cpp/src/reshape/explode.cu +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include - -#include -#include - -namespace cudf { -namespace detail { -namespace { -/** - * @brief Function object for exploding a column. - */ -struct explode_functor { - /** - * @brief Function object for exploding a column. - */ - template - std::unique_ptr
operator()(table_view const& input_table, - size_type const explode_column_idx, - bool include_pos, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const - { - CUDF_FAIL("Unsupported non-list column"); - - return std::make_unique
(); - } -}; - -template <> -std::unique_ptr
explode_functor::operator()( - table_view const& input_table, - size_type const explode_column_idx, - bool include_pos, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const -{ - lists_column_view lc{input_table.column(explode_column_idx)}; - auto sliced_child = lc.get_sliced_child(stream); - rmm::device_uvector gather_map_indices(sliced_child.size(), stream); - - // Sliced columns may require rebasing of the offsets. - auto offsets = lc.offsets_begin(); - // offsets + 1 here to skip the 0th offset, which removes a - 1 operation later. - auto offsets_minus_one = thrust::make_transform_iterator( - offsets + 1, [offsets] __device__(auto i) { return (i - offsets[0]) - 1; }); - auto counting_iter = thrust::make_counting_iterator(0); - - rmm::device_uvector pos(include_pos ? sliced_child.size() : 0, stream, mr); - - // This looks like an off-by-one bug, but what is going on here is that we need to reduce each - // result from `lower_bound` by 1 to build the correct gather map. This can be accomplished by - // skipping the first entry and using the result of `lower_bound` directly. - if (include_pos) { - thrust::transform( - rmm::exec_policy(stream), - counting_iter, - counting_iter + gather_map_indices.size(), - gather_map_indices.begin(), - [position_array = pos.data(), offsets_minus_one, offsets, offset_size = lc.size()] __device__( - auto idx) -> size_type { - auto lb_idx = thrust::lower_bound( - thrust::seq, offsets_minus_one, offsets_minus_one + offset_size, idx) - - offsets_minus_one; - position_array[idx] = idx - (offsets[lb_idx] - offsets[0]); - return lb_idx; - }); - } else { - thrust::lower_bound(rmm::exec_policy(stream), - offsets_minus_one, - offsets_minus_one + lc.size(), - counting_iter, - counting_iter + gather_map_indices.size(), - gather_map_indices.begin()); - } - - auto select_iter = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), - [explode_column_idx](size_type i) { return i >= explode_column_idx ? i + 1 : i; }); - std::vector selected_columns(select_iter, select_iter + input_table.num_columns() - 1); - - auto gathered_table = cudf::detail::gather(input_table.select(selected_columns), - gather_map_indices.begin(), - gather_map_indices.end(), - cudf::out_of_bounds_policy::DONT_CHECK, - stream, - mr); - - std::vector> columns = gathered_table.release()->release(); - - columns.insert(columns.begin() + explode_column_idx, - std::make_unique(sliced_child, stream, mr)); - - if (include_pos) { - columns.insert(columns.begin() + explode_column_idx, - std::make_unique( - data_type(type_to_id()), sliced_child.size(), pos.release())); - } - - return std::make_unique
(std::move(columns)); -} -} // namespace - -/** - * @copydoc - * cudf::explode(input_table,explode_column_idx,rmm::mr::device_memory_resource) - * - * @param stream CUDA stream used for device memory operations and kernel launches. - */ -std::unique_ptr
explode(table_view const& input_table, - size_type explode_column_idx, - bool include_pos, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - return type_dispatcher(input_table.column(explode_column_idx).type(), - explode_functor{}, - input_table, - explode_column_idx, - include_pos, - stream, - mr); -} - -} // namespace detail - -/** - * @copydoc cudf::explode(input_table,explode_column_idx,rmm::mr::device_memory_resource) - */ -std::unique_ptr
explode(table_view const& input_table, - size_type explode_column_idx, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::explode(input_table, explode_column_idx, false, rmm::cuda_stream_default, mr); -} - -/** - * @copydoc cudf::explode_position(input_table,explode_column_idx,rmm::mr::device_memory_resource) - */ -std::unique_ptr
explode_position(table_view const& input_table, - size_type explode_column_idx, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::explode(input_table, explode_column_idx, true, rmm::cuda_stream_default, mr); -} - -} // namespace cudf diff --git a/cpp/src/table/table.cpp b/cpp/src/table/table.cpp index afda6313254..4cd85fc5e7e 100644 --- a/cpp/src/table/table.cpp +++ b/cpp/src/table/table.cpp @@ -81,12 +81,4 @@ std::vector> table::release() return std::move(_columns); } -// Returns a table_view with set of specified columns -table_view table::select(std::vector const& column_indices) const -{ - std::vector columns; - for (auto index : column_indices) { columns.push_back(_columns.at(index)->view()); } - return table_view(columns); -} - } // namespace cudf diff --git a/cpp/src/table/table_view.cpp b/cpp/src/table/table_view.cpp index 9c421f6fd36..c64bf5b2823 100644 --- a/cpp/src/table/table_view.cpp +++ b/cpp/src/table/table_view.cpp @@ -63,11 +63,7 @@ template class table_view_base; // Returns a table_view with set of specified columns table_view table_view::select(std::vector const& column_indices) const { - std::vector columns(column_indices.size()); - std::transform(column_indices.begin(), column_indices.end(), columns.begin(), [this](auto index) { - return this->column(index); - }); - return table_view(columns); + return select(column_indices.begin(), column_indices.end()); } // Convert mutable view to immutable view diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index b94e9587fc0..e95aab16098 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -287,7 +287,6 @@ ConfigureTest(SEARCH_TEST search/search_test.cpp) # - reshape test ---------------------------------------------------------------------------------- ConfigureTest(RESHAPE_TEST reshape/byte_cast_tests.cpp - reshape/explode_tests.cpp reshape/interleave_columns_tests.cpp reshape/tile_tests.cpp) @@ -390,6 +389,7 @@ ConfigureTest(AST_TEST ast/transform_tests.cpp) ConfigureTest(LISTS_TEST lists/contains_tests.cpp lists/count_elements_tests.cpp + lists/explode_tests.cpp lists/drop_list_duplicates_tests.cpp lists/extract_tests.cpp lists/sort_lists_tests.cpp) diff --git a/cpp/tests/lists/explode_tests.cpp b/cpp/tests/lists/explode_tests.cpp new file mode 100644 index 00000000000..2ec9294d118 --- /dev/null +++ b/cpp/tests/lists/explode_tests.cpp @@ -0,0 +1,819 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +using namespace cudf::test; +using FCW = fixed_width_column_wrapper; +using LCW = lists_column_wrapper; + +class ExplodeTest : public cudf::test::BaseFixture { +}; + +class ExplodeOuterTest : public cudf::test::BaseFixture { +}; + +template +class ExplodeTypedTest : public cudf::test::BaseFixture { +}; + +template +class ExplodeOuterTypedTest : public cudf::test::BaseFixture { +}; + +TYPED_TEST_CASE(ExplodeTypedTest, cudf::test::FixedPointTypes); + +TYPED_TEST_CASE(ExplodeOuterTypedTest, cudf::test::FixedPointTypes); + +TEST_F(ExplodeTest, Empty) +{ + cudf::table_view t({LCW{}, FCW{}}); + + auto ret = cudf::explode(t, 0); + + cudf::table_view expected({FCW{}, FCW{}}); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + auto pos_ret = cudf::explode_position(t, 0); + + cudf::table_view pos_expected({FCW{}, FCW{}, FCW{}}); + + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeTest, NonList) +{ + cudf::table_view t({FCW{100, 200, 300}, FCW{100, 200, 300}}); + + EXPECT_THROW(cudf::explode(t, 1), cudf::logic_error); + EXPECT_THROW(cudf::explode_position(t, 1), cudf::logic_error); +} + +TEST_F(ExplodeTest, Basics) +{ + // a b c + // 100 [1, 2, 7] string0 + // 200 [5, 6] string1 + // 300 [0, 3] string2 + + FCW a{100, 200, 300}; + LCW b{LCW{1, 2, 7}, LCW{5, 6}, LCW{0, 3}}; + strings_column_wrapper c{"string0", "string1", "string2"}; + + FCW expected_a{100, 100, 100, 200, 200, 300, 300}; + FCW expected_b{1, 2, 7, 5, 6, 0, 3}; + strings_column_wrapper expected_c{ + "string0", "string0", "string0", "string1", "string1", "string2", "string2"}; + + cudf::table_view t({a, b, c}); + cudf::table_view expected({expected_a, expected_b, expected_c}); + + auto ret = cudf::explode(t, 1); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 2, 0, 1, 0, 1}; + cudf::table_view pos_expected({expected_a, expected_pos_col, expected_b, expected_c}); + + auto pos_ret = cudf::explode_position(t, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeTest, SingleNull) +{ + // a b + // [1, 2, 7] 100 + // [5, 6] 200 + // [] 300 + // [0, 3] 400 + + auto first_invalid = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i == 0 ? false : true; }); + + LCW a({LCW{1, 2, 7}, LCW{5, 6}, LCW{}, LCW{0, 3}}, first_invalid); + FCW b({100, 200, 300, 400}); + + FCW expected_a{5, 6, 0, 3}; + FCW expected_b{200, 200, 400, 400}; + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 1}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeTest, Nulls) +{ + // a b + // [1, 2, 7] 100 + // [5, 6] 200 + // [0, 3] 300 + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + auto always_valid = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); + + LCW a({LCW{1, 2, 7}, LCW{5, 6}, LCW{0, 3}}, valids); + FCW b({100, 200, 300}, valids); + + FCW expected_a({1, 2, 7, 0, 3}); + FCW expected_b({100, 100, 100, 300, 300}, always_valid); + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 2, 0, 1}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeTest, NullsInList) +{ + // a b + // [1, 2, 7] 100 + // [5, 6, 0, 9] 200 + // [] 300 + // [0, 3, 8] 400 + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a{LCW({1, 2, 7}, valids), LCW({5, 6, 0, 9}, valids), LCW{}, LCW({0, 3, 8}, valids)}; + FCW b{100, 200, 300, 400}; + + FCW expected_a({1, 2, 7, 5, 6, 0, 9, 0, 3, 8}, {1, 0, 1, 1, 0, 1, 0, 1, 0, 1}); + FCW expected_b{100, 100, 100, 200, 200, 200, 200, 400, 400, 400}; + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 2, 0, 1, 2, 3, 0, 1, 2}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeTest, Nested) +{ + // a b + // [[1, 2], [7, 6, 5]] 100 + // [[5, 6]] 200 + // [[0, 3],[],[5],[2, 1]] 300 + + LCW a{LCW{LCW{1, 2}, LCW{7, 6, 5}}, LCW{LCW{5, 6}}, LCW{LCW{0, 3}, LCW{}, LCW{5}, LCW{2, 1}}}; + FCW b{100, 200, 300}; + + LCW expected_a{LCW{1, 2}, LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{}, LCW{5}, LCW{2, 1}}; + FCW expected_b{100, 100, 200, 300, 300, 300, 300}; + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 0, 1, 2, 3}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeTest, NestedNulls) +{ + // a b + // [[1, 2], [7, 6, 5]] 100 + // [[5, 6]] 200 + // [[0, 3],[5],[2, 1]] 300 + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + auto always_valid = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); + + LCW a({LCW{LCW{1, 2}, LCW{7, 6, 5}}, LCW{LCW{5, 6}}, LCW{LCW{0, 3}, LCW{5}, LCW{2, 1}}}, valids); + FCW b({100, 200, 300}, valids); + + LCW expected_a{LCW{1, 2}, LCW{7, 6, 5}, LCW{0, 3}, LCW{5}, LCW{2, 1}}; + FCW expected_b({100, 100, 300, 300, 300}, always_valid); + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 1, 2}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeTest, NullsInNested) +{ + // a b + // [[1, 2], [7, 6, 5]] 100 + // [[5, 6]] 200 + // [[0, 3],[5],[2, 1]] 300 + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a({LCW{LCW({1, 2}, valids), LCW{7, 6, 5}}, + LCW{LCW{5, 6}}, + LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}}); + FCW b({100, 200, 300}); + + LCW expected_a{ + LCW({1, 2}, valids), LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}; + FCW expected_b{100, 100, 200, 300, 300, 300}; + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 0, 1, 2}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeTest, NullsInNestedDoubleExplode) +{ + // a b + // [[1, 2], [], [7, 6, 5]] 100 + // [[5, 6]] 200 + // [[0, 3],[5],[2, 1]] 300 + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a{LCW{LCW({1, 2}, valids), LCW{}, LCW{7, 6, 5}}, + LCW{LCW{5, 6}}, + LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}}; + FCW b{100, 200, 300}; + + FCW expected_a({1, 2, 7, 6, 5, 5, 6, 0, 3, 5, 2, 1}, {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0}); + FCW expected_b{100, 100, 100, 100, 100, 200, 200, 300, 300, 300, 300, 300}; + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto first_explode_ret = cudf::explode(t, 0); + auto ret = cudf::explode(first_explode_ret->view(), 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 1, 2, 0, 1, 0, 1, 0, 0, 1}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_position(first_explode_ret->view(), 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeTest, NestedStructs) +{ + // a b + // [[1, 2], [7, 6, 5]] {100, "100"} + // [[5, 6]] {200, "200"} + // [[0, 3],[5],[2, 1]] {300, "300"} + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a({LCW{LCW({1, 2}, valids), LCW{7, 6, 5}}, + LCW{LCW{5, 6}}, + LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}}); + FCW b1({100, 200, 300}); + strings_column_wrapper b2{"100", "200", "300"}; + structs_column_wrapper b({b1, b2}); + + LCW expected_a{ + LCW({1, 2}, valids), LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}; + FCW expected_b1{100, 100, 200, 300, 300, 300}; + strings_column_wrapper expected_b2{"100", "100", "200", "300", "300", "300"}; + structs_column_wrapper expected_b({expected_b1, expected_b2}); + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 0, 1, 2}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TYPED_TEST(ExplodeTypedTest, ListOfStructs) +{ + // a b + // [{70, "70"}, {75, "75"}] 100 + // [{50, "50"}, {55, "55"}] 200 + // [{35, "35"}, {45, "45"}] 300 + // [{25, "25"}, {30, "30"}] 400 + // [{15, "15"}, {20, "20"}] 500 + + auto numeric_col = + fixed_width_column_wrapper{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; + strings_column_wrapper string_col{"70", "75", "50", "55", "35", "45", "25", "30", "15", "20"}; + auto struct_col = structs_column_wrapper{{numeric_col, string_col}}.release(); + auto a = cudf::make_lists_column( + 5, FCW{0, 2, 4, 6, 8, 10}.release(), std::move(struct_col), cudf::UNKNOWN_NULL_COUNT, {}); + + FCW b{100, 200, 300, 400, 500}; + + cudf::table_view t({a->view(), b}); + auto ret = cudf::explode(t, 0); + + auto expected_numeric_col = + fixed_width_column_wrapper{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; + strings_column_wrapper expected_string_col{ + "70", "75", "50", "55", "35", "45", "25", "30", "15", "20"}; + + auto expected_a = structs_column_wrapper{{expected_numeric_col, expected_string_col}}.release(); + FCW expected_b{100, 100, 200, 200, 300, 300, 400, 400, 500, 500}; + + cudf::table_view expected({expected_a->view(), expected_b}); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + cudf::table_view pos_expected({expected_pos_col, expected_a->view(), expected_b}); + + auto pos_ret = cudf::explode_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeTest, SlicedList) +{ + // a b + // [[1, 2],[7, 6, 5]] 100 + // [[5, 6]] 200 + // [[0, 3],[5],[2, 1]] 300 + // [[8, 3],[],[4, 3, 1, 2]] 400 + // [[2, 3, 4],[9, 8]] 500 + + // slicing the top 2 rows and the bottom row off + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a({LCW{LCW({1, 2}, valids), LCW{7, 6, 5}}, + LCW{LCW{5, 6}}, + LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}, + LCW{LCW{8, 3}, LCW{}, LCW({4, 3, 1, 2}, valids)}, + LCW{LCW{2, 3, 4}, LCW{9, 8}}}); + FCW b({100, 200, 300, 400, 500}); + + LCW expected_a{ + LCW{0, 3}, LCW{5}, LCW({2, 1}, valids), LCW{8, 3}, LCW{}, LCW({4, 3, 1, 2}, valids)}; + FCW expected_b{300, 300, 300, 400, 400, 400}; + + cudf::table_view t({a, b}); + auto sliced_t = cudf::slice(t, {2, 4}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode(sliced_t[0], 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 2, 0, 1, 2}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_position(sliced_t[0], 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeOuterTest, Empty) +{ + LCW a{}; + FCW b{}; + + cudf::table_view t({LCW{}, FCW{}}); + + auto ret = cudf::explode_outer(t, 0); + + FCW expected_a{}; + FCW expected_b{}; + cudf::table_view expected({FCW{}, FCW{}}); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); +} + +TEST_F(ExplodeOuterTest, NonList) +{ + cudf::table_view t({FCW{100, 200, 300}, FCW{100, 200, 300}}); + + EXPECT_THROW(cudf::explode_outer(t, 1), cudf::logic_error); + EXPECT_THROW(cudf::explode_outer_position(t, 1), cudf::logic_error); +} + +TEST_F(ExplodeOuterTest, Basics) +{ + // a b c + // 100 [1, 2, 7] string0 + // 200 [5, 6] string1 + // 300 [0, 3] string2 + + FCW a{100, 200, 300}; + LCW b{LCW{1, 2, 7}, LCW{5, 6}, LCW{0, 3}}; + strings_column_wrapper c{"string0", "string1", "string2"}; + + FCW expected_a{100, 100, 100, 200, 200, 300, 300}; + FCW expected_b{1, 2, 7, 5, 6, 0, 3}; + strings_column_wrapper expected_c{ + "string0", "string0", "string0", "string1", "string1", "string2", "string2"}; + + cudf::table_view t({a, b, c}); + cudf::table_view expected({expected_a, expected_b, expected_c}); + + auto ret = cudf::explode_outer(t, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 2, 0, 1, 0, 1}; + cudf::table_view pos_expected({expected_a, expected_pos_col, expected_b, expected_c}); + + auto pos_ret = cudf::explode_outer_position(t, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeOuterTest, SingleNull) +{ + // a b + // [1, 2, 7] 100 + // [5, 6] 200 + // [] 300 + // [0, 3] 400 + + auto first_invalid = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i == 0 ? false : true; }); + + LCW a({LCW{1, 2, 7}, LCW{5, 6}, LCW{}, LCW{0, 3}}, first_invalid); + FCW b({100, 200, 300, 400}); + + FCW expected_a{{0, 5, 6, 0, 0, 3}, {0, 1, 1, 0, 1, 1}}; + FCW expected_b{100, 200, 200, 300, 400, 400}; + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode_outer(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 0, 1, 0, 0, 1}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_outer_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeOuterTest, Nulls) +{ + // a b + // [1, 2, 7] 100 + // [5, 6] 200 + // [0, 3] 300 + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a({LCW{1, 2, 7}, LCW{5, 6}, LCW{0, 3}}, valids); + FCW b({100, 200, 300}, valids); + + FCW expected_a({1, 2, 7, 0, 0, 3}, {1, 1, 1, 0, 1, 1}); + FCW expected_b({100, 100, 100, 200, 300, 300}, {1, 1, 1, 0, 1, 1}); + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode_outer(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 2, 0, 0, 1}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_outer_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeOuterTest, NullsInList) +{ + // a b + // [1, 2, 7] 100 + // [5, 6, 0, 9] 200 + // [] 300 + // [0, 3, 8] 400 + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a{LCW({1, 2, 7}, valids), LCW({5, 6, 0, 9}, valids), LCW{}, LCW({0, 3, 8}, valids)}; + FCW b{100, 200, 300, 400}; + + FCW expected_a({1, 2, 7, 5, 6, 0, 9, 0, 0, 3, 8}, {1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1}); + FCW expected_b{100, 100, 100, 200, 200, 200, 200, 300, 400, 400, 400}; + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode_outer(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 2, 0, 1, 2, 3, 0, 0, 1, 2}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_outer_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeOuterTest, Nested) +{ + // a b + // [[1, 2], [7, 6, 5]] 100 + // [[5, 6]] 200 + // [[0, 3],[],[5],[2, 1]] 300 + + LCW a{LCW{LCW{1, 2}, LCW{7, 6, 5}}, LCW{LCW{5, 6}}, LCW{LCW{0, 3}, LCW{}, LCW{5}, LCW{2, 1}}}; + FCW b{100, 200, 300}; + + LCW expected_a{LCW{1, 2}, LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{}, LCW{5}, LCW{2, 1}}; + FCW expected_b{100, 100, 200, 300, 300, 300, 300}; + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode_outer(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 0, 1, 2, 3}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_outer_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeOuterTest, NestedNulls) +{ + // a b + // [[1, 2], [7, 6, 5]] 100 + // [[5, 6]] 200 + // [[0, 3],[5],[2, 1]] 300 + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a({LCW{LCW{1, 2}, LCW{7, 6, 5}}, LCW{LCW{5, 6}}, LCW{LCW{0, 3}, LCW{5}, LCW{2, 1}}}, valids); + FCW b({100, 200, 300}); + + auto expected_valids = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i == 2 ? false : true; }); + LCW expected_a({LCW{1, 2}, LCW{7, 6, 5}, LCW{}, LCW{0, 3}, LCW{5}, LCW{2, 1}}, expected_valids); + FCW expected_b({100, 100, 200, 300, 300, 300}); + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode_outer(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 0, 1, 2}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_outer_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeOuterTest, NullsInNested) +{ + // a b + // [[1, 2], [7, 6, 5]] 100 + // [[5, 6]] 200 + // [[0, 3],[5],[2, 1]] 300 + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a({LCW{LCW({1, 2}, valids), LCW{7, 6, 5}}, + LCW{LCW{5, 6}}, + LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}}); + FCW b({100, 200, 300}); + + LCW expected_a{ + LCW({1, 2}, valids), LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}; + FCW expected_b{100, 100, 200, 300, 300, 300}; + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode_outer(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 0, 1, 2}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_outer_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeOuterTest, NullsInNestedDoubleExplode) +{ + // a b + // [[1, 2], [], [7, 6, 5]] 100 + // [[5, 6]] 200 + // [[0, 3],[5],[2, 1]] 300 + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a{LCW{LCW({1, 2}, valids), LCW{}, LCW{7, 6, 5}}, + LCW{LCW{5, 6}}, + LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}}; + FCW b{100, 200, 300}; + + FCW expected_a({1, 2, 0, 7, 6, 5, 5, 6, 0, 3, 5, 2, 1}, {1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0}); + FCW expected_b{100, 100, 100, 100, 100, 100, 200, 200, 300, 300, 300, 300, 300}; + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto first_explode_ret = cudf::explode_outer(t, 0); + auto ret = cudf::explode_outer(first_explode_ret->view(), 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 0, 1, 2, 0, 1, 0, 1, 0, 0, 1}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_outer_position(first_explode_ret->view(), 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeOuterTest, NestedStructs) +{ + // a b + // [[1, 2], [7, 6, 5]] {100, "100"} + // [[5, 6]] {200, "200"} + // [[0, 3],[5],[2, 1]] {300, "300"} + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a({LCW{LCW({1, 2}, valids), LCW{7, 6, 5}}, + LCW{LCW{5, 6}}, + LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}}); + FCW b1({100, 200, 300}); + strings_column_wrapper b2{"100", "200", "300"}; + structs_column_wrapper b({b1, b2}); + + LCW expected_a{ + LCW({1, 2}, valids), LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}; + FCW expected_b1{100, 100, 200, 300, 300, 300}; + strings_column_wrapper expected_b2{"100", "100", "200", "300", "300", "300"}; + structs_column_wrapper expected_b({expected_b1, expected_b2}); + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode_outer(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 0, 1, 2}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_outer_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TYPED_TEST(ExplodeOuterTypedTest, ListOfStructs) +{ + // a b + // [{70, "70"}, {75, "75"}] 100 + // [{50, "50"}, {55, "55"}] 200 + // [{35, "35"}, {45, "45"}] 300 + // [{25, "25"}, {30, "30"}] 400 + // [{15, "15"}, {20, "20"}] 500 + + auto numeric_col = + fixed_width_column_wrapper{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; + strings_column_wrapper string_col{"70", "75", "50", "55", "35", "45", "25", "30", "15", "20"}; + auto struct_col = structs_column_wrapper{{numeric_col, string_col}}.release(); + auto a = cudf::make_lists_column( + 5, FCW{0, 2, 4, 6, 8, 10}.release(), std::move(struct_col), cudf::UNKNOWN_NULL_COUNT, {}); + + FCW b{100, 200, 300, 400, 500}; + + cudf::table_view t({a->view(), b}); + auto ret = cudf::explode_outer(t, 0); + + auto expected_numeric_col = + fixed_width_column_wrapper{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; + strings_column_wrapper expected_string_col{ + "70", "75", "50", "55", "35", "45", "25", "30", "15", "20"}; + + auto expected_a = structs_column_wrapper{{expected_numeric_col, expected_string_col}}.release(); + FCW expected_b{100, 100, 200, 200, 300, 300, 400, 400, 500, 500}; + + cudf::table_view expected({expected_a->view(), expected_b}); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + cudf::table_view pos_expected({expected_pos_col, expected_a->view(), expected_b}); + + auto pos_ret = cudf::explode_outer_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeOuterTest, SlicedList) +{ + // a b + // [[1, 2],[7, 6, 5]] 100 + // [[5, 6]] 200 + // [[0, 3],[5],[2, 1]] 300 + // [[8, 3],[],[4, 3, 1, 2]] 400 + // [[2, 3, 4],[9, 8]] 500 + + // slicing the top 2 rows and the bottom row off + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a({LCW{LCW({1, 2}, valids), LCW{7, 6, 5}}, + LCW{LCW{5, 6}}, + LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}, + LCW{LCW{8, 3}, LCW{}, LCW({4, 3, 1, 2}, valids)}, + LCW{LCW{2, 3, 4}, LCW{9, 8}}}); + FCW b({100, 200, 300, 400, 500}); + + LCW expected_a{ + LCW{0, 3}, LCW{5}, LCW({2, 1}, valids), LCW{8, 3}, LCW{}, LCW({4, 3, 1, 2}, valids)}; + FCW expected_b{300, 300, 300, 400, 400, 400}; + + cudf::table_view t({a, b}); + auto sliced_t = cudf::slice(t, {2, 4}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode_outer(sliced_t[0], 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 2, 0, 1, 2}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_outer_position(sliced_t[0], 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} diff --git a/cpp/tests/reshape/explode_tests.cpp b/cpp/tests/reshape/explode_tests.cpp deleted file mode 100644 index 5f3237ce46d..00000000000 --- a/cpp/tests/reshape/explode_tests.cpp +++ /dev/null @@ -1,530 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include -#include -#include -#include - -using namespace cudf::test; - -class ExplodeTest : public cudf::test::BaseFixture { -}; - -template -class ExplodeTypedTest : public cudf::test::BaseFixture { -}; - -TYPED_TEST_CASE(ExplodeTypedTest, cudf::test::FixedPointTypes); - -TEST_F(ExplodeTest, Empty) -{ - lists_column_wrapper a{}; - fixed_width_column_wrapper b{}; - - cudf::table_view t({a, b}); - - auto ret = cudf::explode(t, 0); - - fixed_width_column_wrapper expected_a{}; - fixed_width_column_wrapper expected_b{}; - cudf::table_view expected({expected_a, expected_b}); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - auto pos_ret = cudf::explode_position(t, 0); - - fixed_width_column_wrapper expected_c{}; - cudf::table_view pos_expected({expected_a, expected_b, expected_c}); - - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TEST_F(ExplodeTest, NonList) -{ - fixed_width_column_wrapper a{100, 200, 300}; - fixed_width_column_wrapper b{100, 200, 300}; - - cudf::table_view t({a, b}); - - EXPECT_THROW(cudf::explode(t, 1), cudf::logic_error); - EXPECT_THROW(cudf::explode_position(t, 1), cudf::logic_error); -} - -TEST_F(ExplodeTest, Basics) -{ - /* - a b - [1, 2, 7] 100 - [5, 6] 200 - [0, 3] 300 - */ - - fixed_width_column_wrapper a{100, 200, 300}; - lists_column_wrapper b{lists_column_wrapper{1, 2, 7}, - lists_column_wrapper{5, 6}, - lists_column_wrapper{0, 3}}; - strings_column_wrapper c{"string0", "string1", "string2"}; - - fixed_width_column_wrapper expected_a{100, 100, 100, 200, 200, 300, 300}; - fixed_width_column_wrapper expected_b{1, 2, 7, 5, 6, 0, 3}; - strings_column_wrapper expected_c{ - "string0", "string0", "string0", "string1", "string1", "string2", "string2"}; - - cudf::table_view t({a, b, c}); - cudf::table_view expected({expected_a, expected_b, expected_c}); - - auto ret = cudf::explode(t, 1); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 2, 0, 1, 0, 1}; - cudf::table_view pos_expected({expected_a, expected_pos_col, expected_b, expected_c}); - - auto pos_ret = cudf::explode_position(t, 1); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TEST_F(ExplodeTest, SingleNull) -{ - /* - a b - [1, 2, 7] 100 - [5, 6] 200 - [] 300 - [0, 3] 400 - */ - - auto first_invalid = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i == 0 ? false : true; }); - - lists_column_wrapper a({lists_column_wrapper{1, 2, 7}, - lists_column_wrapper{5, 6}, - lists_column_wrapper{}, - lists_column_wrapper{0, 3}}, - first_invalid); - fixed_width_column_wrapper b({100, 200, 300, 400}); - - fixed_width_column_wrapper expected_a{5, 6, 0, 3}; - fixed_width_column_wrapper expected_b{200, 200, 400, 400}; - - cudf::table_view t({a, b}); - cudf::table_view expected({expected_a, expected_b}); - - auto ret = cudf::explode(t, 0); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 0, 1}; - cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); - - auto pos_ret = cudf::explode_position(t, 0); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TEST_F(ExplodeTest, Nulls) -{ - /* - a b - [1, 2, 7] 100 - [5, 6] 200 - [0, 3] 300 - */ - - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 2 == 0 ? true : false; }); - auto always_valid = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); - - lists_column_wrapper a({lists_column_wrapper{1, 2, 7}, - lists_column_wrapper{5, 6}, - lists_column_wrapper{0, 3}}, - valids); - fixed_width_column_wrapper b({100, 200, 300}, valids); - - fixed_width_column_wrapper expected_a({1, 2, 7, 0, 3}); - fixed_width_column_wrapper expected_b({100, 100, 100, 300, 300}, always_valid); - - cudf::table_view t({a, b}); - cudf::table_view expected({expected_a, expected_b}); - - auto ret = cudf::explode(t, 0); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 2, 0, 1}; - cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); - - auto pos_ret = cudf::explode_position(t, 0); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TEST_F(ExplodeTest, NullsInList) -{ - /* - a b - [1, 2, 7] 100 - [5, 6, 0, 9] 200 - [] 300 - [0, 3, 8] 400 - */ - - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 2 == 0 ? true : false; }); - - lists_column_wrapper a{lists_column_wrapper({1, 2, 7}, valids), - lists_column_wrapper({5, 6, 0, 9}, valids), - lists_column_wrapper{}, - lists_column_wrapper({0, 3, 8}, valids)}; - fixed_width_column_wrapper b{100, 200, 300, 400}; - - fixed_width_column_wrapper expected_a({1, 2, 7, 5, 6, 0, 9, 0, 3, 8}, - {1, 0, 1, 1, 0, 1, 0, 1, 0, 1}); - fixed_width_column_wrapper expected_b{100, 100, 100, 200, 200, 200, 200, 400, 400, 400}; - - cudf::table_view t({a, b}); - cudf::table_view expected({expected_a, expected_b}); - - auto ret = cudf::explode(t, 0); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 2, 0, 1, 2, 3, 0, 1, 2}; - cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); - - auto pos_ret = cudf::explode_position(t, 0); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TEST_F(ExplodeTest, Nested) -{ - /* - a b - [[1, 2], [7, 6, 5]] 100 - [[5, 6]] 200 - [[0, 3],[],[5],[2, 1]] 300 - */ - - lists_column_wrapper a{ - lists_column_wrapper{lists_column_wrapper{1, 2}, - lists_column_wrapper{7, 6, 5}}, - lists_column_wrapper{lists_column_wrapper{5, 6}}, - lists_column_wrapper{lists_column_wrapper{0, 3}, - lists_column_wrapper{}, - lists_column_wrapper{5}, - lists_column_wrapper{2, 1}}}; - fixed_width_column_wrapper b{100, 200, 300}; - - lists_column_wrapper expected_a{lists_column_wrapper{1, 2}, - lists_column_wrapper{7, 6, 5}, - lists_column_wrapper{5, 6}, - lists_column_wrapper{0, 3}, - lists_column_wrapper{}, - lists_column_wrapper{5}, - lists_column_wrapper{2, 1}}; - fixed_width_column_wrapper expected_b{100, 100, 200, 300, 300, 300, 300}; - - cudf::table_view t({a, b}); - cudf::table_view expected({expected_a, expected_b}); - - auto ret = cudf::explode(t, 0); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 0, 0, 1, 2, 3}; - cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); - - auto pos_ret = cudf::explode_position(t, 0); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TEST_F(ExplodeTest, NestedNulls) -{ - /* - a b - [[1, 2], [7, 6, 5]] 100 - [[5, 6]] 200 - [[0, 3],[5],[2, 1]] 300 - */ - - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 2 == 0 ? true : false; }); - auto always_valid = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); - - lists_column_wrapper a( - {lists_column_wrapper{lists_column_wrapper{1, 2}, - lists_column_wrapper{7, 6, 5}}, - lists_column_wrapper{lists_column_wrapper{5, 6}}, - lists_column_wrapper{lists_column_wrapper{0, 3}, - lists_column_wrapper{5}, - lists_column_wrapper{2, 1}}}, - valids); - fixed_width_column_wrapper b({100, 200, 300}, valids); - - lists_column_wrapper expected_a{lists_column_wrapper{1, 2}, - lists_column_wrapper{7, 6, 5}, - lists_column_wrapper{0, 3}, - lists_column_wrapper{5}, - lists_column_wrapper{2, 1}}; - fixed_width_column_wrapper expected_b({100, 100, 300, 300, 300}, always_valid); - - cudf::table_view t({a, b}); - cudf::table_view expected({expected_a, expected_b}); - - auto ret = cudf::explode(t, 0); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 0, 1, 2}; - cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); - - auto pos_ret = cudf::explode_position(t, 0); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TEST_F(ExplodeTest, NullsInNested) -{ - /* - a b - [[1, 2], [7, 6, 5]] 100 - [[5, 6]] 200 - [[0, 3],[5],[2, 1]] 300 - */ - - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 2 == 0 ? true : false; }); - - lists_column_wrapper a( - {lists_column_wrapper{lists_column_wrapper({1, 2}, valids), - lists_column_wrapper{7, 6, 5}}, - lists_column_wrapper{lists_column_wrapper{5, 6}}, - lists_column_wrapper{lists_column_wrapper{0, 3}, - lists_column_wrapper{5}, - lists_column_wrapper({2, 1}, valids)}}); - fixed_width_column_wrapper b({100, 200, 300}); - - lists_column_wrapper expected_a{lists_column_wrapper({1, 2}, valids), - lists_column_wrapper{7, 6, 5}, - lists_column_wrapper{5, 6}, - lists_column_wrapper{0, 3}, - lists_column_wrapper{5}, - lists_column_wrapper({2, 1}, valids)}; - fixed_width_column_wrapper expected_b{100, 100, 200, 300, 300, 300}; - - cudf::table_view t({a, b}); - cudf::table_view expected({expected_a, expected_b}); - - auto ret = cudf::explode(t, 0); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 0, 0, 1, 2}; - cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); - - auto pos_ret = cudf::explode_position(t, 0); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TEST_F(ExplodeTest, NullsInNestedDoubleExplode) -{ - /* - a b - [[1, 2], [], [7, 6, 5]] 100 - [[5, 6]] 200 - [[0, 3],[5],[2, 1]] 300 - */ - - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 2 == 0 ? true : false; }); - - lists_column_wrapper a{ - lists_column_wrapper{lists_column_wrapper({1, 2}, valids), - lists_column_wrapper{}, - lists_column_wrapper{7, 6, 5}}, - lists_column_wrapper{lists_column_wrapper{5, 6}}, - lists_column_wrapper{lists_column_wrapper{0, 3}, - lists_column_wrapper{5}, - lists_column_wrapper({2, 1}, valids)}}; - fixed_width_column_wrapper b{100, 200, 300}; - - fixed_width_column_wrapper expected_a({1, 2, 7, 6, 5, 5, 6, 0, 3, 5, 2, 1}, - {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0}); - fixed_width_column_wrapper expected_b{ - 100, 100, 100, 100, 100, 200, 200, 300, 300, 300, 300, 300}; - - cudf::table_view t({a, b}); - cudf::table_view expected({expected_a, expected_b}); - - auto first_explode_ret = cudf::explode(t, 0); - auto ret = cudf::explode(first_explode_ret->view(), 0); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 0, 1, 2, 0, 1, 0, 1, 0, 0, 1}; - cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); - - auto pos_ret = cudf::explode_position(first_explode_ret->view(), 0); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TEST_F(ExplodeTest, NestedStructs) -{ - /* - a b - [[1, 2], [7, 6, 5]] {100, "100"} - [[5, 6]] {200, "200"} - [[0, 3],[5],[2, 1]] {300, "300"} - */ - - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 2 == 0 ? true : false; }); - - lists_column_wrapper a( - {lists_column_wrapper{lists_column_wrapper({1, 2}, valids), - lists_column_wrapper{7, 6, 5}}, - lists_column_wrapper{lists_column_wrapper{5, 6}}, - lists_column_wrapper{lists_column_wrapper{0, 3}, - lists_column_wrapper{5}, - lists_column_wrapper({2, 1}, valids)}}); - fixed_width_column_wrapper b1({100, 200, 300}); - strings_column_wrapper b2{"100", "200", "300"}; - structs_column_wrapper b({b1, b2}); - - lists_column_wrapper expected_a{lists_column_wrapper({1, 2}, valids), - lists_column_wrapper{7, 6, 5}, - lists_column_wrapper{5, 6}, - lists_column_wrapper{0, 3}, - lists_column_wrapper{5}, - lists_column_wrapper({2, 1}, valids)}; - fixed_width_column_wrapper expected_b1{100, 100, 200, 300, 300, 300}; - strings_column_wrapper expected_b2{"100", "100", "200", "300", "300", "300"}; - structs_column_wrapper expected_b({expected_b1, expected_b2}); - - cudf::table_view t({a, b}); - cudf::table_view expected({expected_a, expected_b}); - - auto ret = cudf::explode(t, 0); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 0, 0, 1, 2}; - cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); - - auto pos_ret = cudf::explode_position(t, 0); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TYPED_TEST(ExplodeTypedTest, ListOfStructs) -{ - /* - a b - [{70, "70"}, {75, "75"}] 100 - [{50, "50"}, {55, "55"}] 200 - [{35, "35"}, {45, "45"}] 300 - [{25, "25"}, {30, "30"}] 400 - [{15, "15"}, {20, "20"}] 500 -*/ - - auto numeric_col = - fixed_width_column_wrapper{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; - strings_column_wrapper string_col{"70", "75", "50", "55", "35", "45", "25", "30", "15", "20"}; - auto struct_col = structs_column_wrapper{{numeric_col, string_col}}.release(); - auto a = cudf::make_lists_column(5, - fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release(), - std::move(struct_col), - cudf::UNKNOWN_NULL_COUNT, - {}); - - fixed_width_column_wrapper b{100, 200, 300, 400, 500}; - - cudf::table_view t({a->view(), b}); - auto ret = cudf::explode(t, 0); - - auto expected_numeric_col = - fixed_width_column_wrapper{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; - strings_column_wrapper expected_string_col{ - "70", "75", "50", "55", "35", "45", "25", "30", "15", "20"}; - - auto expected_a = structs_column_wrapper{{expected_numeric_col, expected_string_col}}.release(); - fixed_width_column_wrapper expected_b{100, 100, 200, 200, 300, 300, 400, 400, 500, 500}; - - cudf::table_view expected({expected_a->view(), expected_b}); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; - cudf::table_view pos_expected({expected_pos_col, expected_a->view(), expected_b}); - - auto pos_ret = cudf::explode_position(t, 0); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TEST_F(ExplodeTest, SlicedList) -{ - /* - a b - [[1, 2],[7, 6, 5]] 100 - [[5, 6]] 200 - [[0, 3],[5],[2, 1]] 300 - [[8, 3],[],[4, 3, 1, 2]] 400 - [[2, 3, 4],[9, 8]] 500 - - slicing the top 2 rows and the bottom row off - */ - - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 2 == 0 ? true : false; }); - - lists_column_wrapper a( - {lists_column_wrapper{lists_column_wrapper({1, 2}, valids), - lists_column_wrapper{7, 6, 5}}, - lists_column_wrapper{lists_column_wrapper{5, 6}}, - lists_column_wrapper{lists_column_wrapper{0, 3}, - lists_column_wrapper{5}, - lists_column_wrapper({2, 1}, valids)}, - lists_column_wrapper{lists_column_wrapper{8, 3}, - lists_column_wrapper{}, - lists_column_wrapper({4, 3, 1, 2}, valids)}, - lists_column_wrapper{lists_column_wrapper{2, 3, 4}, - lists_column_wrapper{9, 8}}}); - fixed_width_column_wrapper b({100, 200, 300, 400, 500}); - - lists_column_wrapper expected_a{lists_column_wrapper{0, 3}, - lists_column_wrapper{5}, - lists_column_wrapper({2, 1}, valids), - lists_column_wrapper{8, 3}, - lists_column_wrapper{}, - lists_column_wrapper({4, 3, 1, 2}, valids)}; - fixed_width_column_wrapper expected_b{300, 300, 300, 400, 400, 400}; - - cudf::table_view t({a, b}); - auto sliced_t = cudf::slice(t, {2, 4}); - cudf::table_view expected({expected_a, expected_b}); - - auto ret = cudf::explode(sliced_t[0], 0); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 2, 0, 1, 2}; - cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); - - auto pos_ret = cudf::explode_position(sliced_t[0], 0); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -}