diff --git a/CHANGELOG.md b/CHANGELOG.md index e0a7411cd13..565881d5afb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -67,6 +67,7 @@ - PR #5811 Add `nvtext::edit_distance` API - PR #5789 Add groupby support for duration types - PR #5810 Make Cython subdirs packages and simplify package_data +- PR #5807 Initial support for struct columns - PR #5817 Enable more `fixed_point` unit tests by introducing "scale-less" constructor - PR #5822 Add `cudf_kafka` to `custreamz` run time conda dependency and fix bash syntax issue - PR #5845 Add support for `mask_to_bools` diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index e37008c3778..3a45d222992 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -153,6 +153,8 @@ test: - test -f $PREFIX/include/cudf/strings/substring.hpp - test -f $PREFIX/include/cudf/strings/translate.hpp - test -f $PREFIX/include/cudf/strings/wrap.hpp + - test -f $PREFIX/include/cudf/structs/structs_column_view.hpp + - test -f $PREFIX/include/cudf/structs/struct_view.hpp - test -f $PREFIX/include/cudf/table/table.hpp - test -f $PREFIX/include/cudf/table/table_view.hpp - test -f $PREFIX/include/cudf/transform.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index e3f832a79cb..0f53461b0c3 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -567,6 +567,8 @@ add_library(cudf src/lists/lists_column_view.cu src/lists/copying/concatenate.cu src/lists/copying/gather.cu + src/structs/structs_column_view.cu + src/structs/structs_column_factories.cu src/text/detokenize.cu src/text/edit_distance.cu src/text/generate_ngrams.cu diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 2522143d5fa..fd225aec472 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp index 3fc4c60b51c..f41ef2b5c7c 100644 --- a/cpp/include/cudf/column/column_factories.hpp +++ b/cpp/include/cudf/column/column_factories.hpp @@ -502,6 +502,37 @@ std::unique_ptr make_lists_column( cudaStream_t stream = 0, rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource()); +/** + * @brief Constructs a STRUCT column using specified child columns as members. + * + * Specified child/member columns and null_mask are adopted by resultant + * struct column. + * + * A struct column requires that all specified child columns have the same + * number of rows. A struct column's row count equals that of any/all + * of its child columns. A single struct row at any index is comprised of + * all the individual child column values at the same index, in the order + * specified in the list of child columns. + * + * The specified null mask governs which struct row has a null value. This + * is orthogonal to the null values of individual child columns. + * + * @param num_rows The number of struct values in the struct column. + * @param child_columns The list of child/members that the struct is comprised of. + * @param null_count The number of null values in the struct column. + * @param null_mask The bits specifying the null struct values in the column. + * @param stream Optional stream for use with all memory allocation and device kernels. + * @param mr Optional resource to use for device memory allocation. + * + */ +std::unique_ptr make_structs_column( + size_type num_rows, + std::vector>&& child_columns, + size_type null_count, + rmm::device_buffer&& null_mask, + cudaStream_t stream = 0, + rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource()); + /** * @brief Return a column with size elements that are all equal to the * given scalar. diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index 64a0b44f79b..1be0db71101 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -358,6 +358,19 @@ struct column_gatherer_impl { } }; +template +struct column_gatherer_impl { + std::unique_ptr operator()(column_view const& column, + MapItRoot gather_map_begin, + MapItRoot gather_map_end, + bool nullify_out_of_bounds, + cudaStream_t stream, + rmm::mr::device_memory_resource* mr) + { + CUDF_FAIL("Gather not yet supported on struct_view."); + } +}; + /** * @brief Function object for gathering a type-erased * column. To be used with the cudf::type_dispatcher. diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index bc8e7e450f3..1c3046735e7 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -26,8 +26,8 @@ namespace detail { * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::vector segmented_count_set_bits(bitmask_type const* bitmask, - std::vector const& indices, +std::vector segmented_count_set_bits(bitmask_type const *bitmask, + std::vector const &indices, cudaStream_t stream = 0); /** @@ -35,10 +35,45 @@ std::vector segmented_count_set_bits(bitmask_type const* bitmask, * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::vector segmented_count_unset_bits(bitmask_type const* bitmask, - std::vector const& indices, +std::vector segmented_count_unset_bits(bitmask_type const *bitmask, + std::vector const &indices, cudaStream_t stream = 0); +/** + * @brief Returns a bitwise AND of the specified bitmasks + * + * @param masks The list of data pointers of the bitmasks to be ANDed + * @param begin_bits The bit offsets from which each mask is to be ANDed + * @param mask_size The number of bits to be ANDed in each mask + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned device_buffer + * @return rmm::device_buffer Output bitmask + */ +rmm::device_buffer bitmask_and(std::vector const &masks, + std::vector const &begin_bits, + size_type mask_size, + cudaStream_t stream, + rmm::mr::device_memory_resource *mr); + +/** + * @brief Performs a bitwise AND of the specified bitmasks, + * and writes in place to destination + * + * @param dest_mask Destination to which the AND result is written + * @param masks The list of data pointers of the bitmasks to be ANDed + * @param begin_bits The bit offsets from which each mask is to be ANDed + * @param mask_size The number of bits to be ANDed in each mask + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned device_buffer + * @return rmm::device_buffer Output bitmask + */ +void inplace_bitmask_and(bitmask_type *dest_mask, + std::vector const &masks, + std::vector const &begin_bits, + size_type mask_size, + cudaStream_t stream, + rmm::mr::device_memory_resource *mr); + } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/structs/struct_view.hpp b/cpp/include/cudf/structs/struct_view.hpp new file mode 100644 index 00000000000..6240feb009e --- /dev/null +++ b/cpp/include/cudf/structs/struct_view.hpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +/** + * @file struct_view.cuh + * @brief Class definition for cudf::struct_view. + */ + +namespace cudf { + +/** + * @brief A non-owning, immutable view of device data that represents + * a struct with fields of arbitrary types (including primitives, lists, + * and other structs) + * + */ +class struct_view { +}; + +} // namespace cudf diff --git a/cpp/include/cudf/structs/structs_column_view.hpp b/cpp/include/cudf/structs/structs_column_view.hpp new file mode 100644 index 00000000000..8e10ef0e1ea --- /dev/null +++ b/cpp/include/cudf/structs/structs_column_view.hpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +namespace cudf { + +class structs_column_view : private column_view { + public: + // Foundation members: + structs_column_view(structs_column_view const&) = default; + structs_column_view(structs_column_view&&) = default; + ~structs_column_view() = default; + structs_column_view& operator=(structs_column_view const&) = default; + structs_column_view& operator=(structs_column_view&&) = default; + + explicit structs_column_view(column_view const& rhs); + + using column_view::child_begin; + using column_view::child_end; + using column_view::has_nulls; + using column_view::null_count; + using column_view::null_mask; + using column_view::offset; + using column_view::size; + +}; // class structs_column_view; + +} // namespace cudf diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index 596ca4840db..5cb18cd52d6 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -59,6 +59,7 @@ class column_view; class mutable_column_view; class string_view; class list_view; +class struct_view; class scalar; template @@ -87,6 +88,8 @@ class duration_scalar_device_view; class list_scalar; +class struct_scalar; + class table; class table_view; class mutable_table_view; @@ -217,6 +220,7 @@ enum class type_id : int32_t { LIST, ///< List elements DECIMAL32, ///< Fixed-point type with int32_t DECIMAL64, ///< Fixed-point type with int64_t + STRUCT, ///< Struct elements // `NUM_TYPE_IDS` must be last! NUM_TYPE_IDS ///< Total number of type ids }; diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index d99d5daf10a..8ae5e6b2f3f 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -24,6 +24,7 @@ #include #include +#include "cudf/structs/struct_view.hpp" namespace cudf { @@ -488,7 +489,7 @@ template constexpr inline bool is_compound() { return std::is_same::value or std::is_same::value or - std::is_same::value; + std::is_same::value or std::is_same::value; } struct is_compound_impl { @@ -530,7 +531,7 @@ constexpr inline bool is_compound(data_type type) template constexpr inline bool is_nested() { - return std::is_same::value; + return std::is_same::value || std::is_same::value; } struct is_nested_impl { diff --git a/cpp/include/cudf/utilities/type_dispatcher.hpp b/cpp/include/cudf/utilities/type_dispatcher.hpp index 47041084ba9..fbf540c3d12 100644 --- a/cpp/include/cudf/utilities/type_dispatcher.hpp +++ b/cpp/include/cudf/utilities/type_dispatcher.hpp @@ -136,6 +136,7 @@ CUDF_TYPE_MAPPING(dictionary32, type_id::DICTIONARY32); CUDF_TYPE_MAPPING(cudf::list_view, type_id::LIST); CUDF_TYPE_MAPPING(numeric::decimal32, type_id::DECIMAL32); CUDF_TYPE_MAPPING(numeric::decimal64, type_id::DECIMAL64); +CUDF_TYPE_MAPPING(cudf::struct_view, type_id::STRUCT); template struct type_to_scalar_type_impl { @@ -200,6 +201,12 @@ struct type_to_scalar_type_impl { // using ScalarDeviceType = cudf::list_scalar_device_view; }; +template <> // TODO: Ditto, likewise. +struct type_to_scalar_type_impl { + using ScalarType = cudf::struct_scalar; + // using ScalarDeviceType = cudf::struct_scalar_device_view; // CALEB: TODO! +}; + #ifndef MAP_TIMESTAMP_SCALAR #define MAP_TIMESTAMP_SCALAR(Type) \ template <> \ @@ -422,6 +429,9 @@ CUDA_HOST_DEVICE_CALLABLE constexpr decltype(auto) type_dispatcher(cudf::data_ty case type_id::DECIMAL64: return f.template operator()::type>( std::forward(args)...); + case type_id::STRUCT: + return f.template operator()::type>( + std::forward(args)...); default: { #ifndef __CUDA_ARCH__ CUDF_FAIL("Unsupported type_id."); diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 34817650012..0fada446dea 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -341,43 +341,6 @@ __global__ void offset_bitmask_and(bitmask_type *__restrict__ destination, } } -// Bitwise AND of the masks -rmm::device_buffer bitmask_and(std::vector const &masks, - std::vector const &begin_bits, - size_type mask_size, - cudaStream_t stream, - rmm::mr::device_memory_resource *mr) -{ - CUDF_EXPECTS(std::all_of(begin_bits.begin(), begin_bits.end(), [](auto b) { return b >= 0; }), - "Invalid range."); - CUDF_EXPECTS(mask_size > 0, "Invalid bit range."); - CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }), - "Mask pointer cannot be null"); - - rmm::device_buffer dest_mask{}; - auto num_bytes = bitmask_allocation_size_bytes(mask_size); - - auto number_of_mask_words = num_bitmask_words(mask_size); - - dest_mask = rmm::device_buffer{num_bytes, stream, mr}; - - rmm::device_vector d_masks(masks); - rmm::device_vector d_begin_bits(begin_bits); - - cudf::detail::grid_1d config(number_of_mask_words, 256); - offset_bitmask_and<<>>( - static_cast(dest_mask.data()), - d_masks.data().get(), - d_begin_bits.data().get(), - d_masks.size(), - mask_size, - number_of_mask_words); - - CHECK_CUDA(stream); - - return dest_mask; -} - // convert [first_bit_index,last_bit_index) to // [first_word_index,last_word_index) struct to_word_index : public thrust::unary_function { @@ -407,6 +370,59 @@ struct to_word_index : public thrust::unary_function { } // namespace namespace detail { + +// Inplace Bitwise AND of the masks +void inplace_bitmask_and(bitmask_type *dest_mask, + std::vector const &masks, + std::vector const &begin_bits, + size_type mask_size, + cudaStream_t stream, + rmm::mr::device_memory_resource *mr) +{ + CUDF_EXPECTS(std::all_of(begin_bits.begin(), begin_bits.end(), [](auto b) { return b >= 0; }), + "Invalid range."); + CUDF_EXPECTS(mask_size > 0, "Invalid bit range."); + CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }), + "Mask pointer cannot be null"); + + auto num_bytes = bitmask_allocation_size_bytes(mask_size); + + auto number_of_mask_words = num_bitmask_words(mask_size); + + rmm::device_vector d_masks(masks); + rmm::device_vector d_begin_bits(begin_bits); + + cudf::detail::grid_1d config(number_of_mask_words, 256); + offset_bitmask_and<<>>( + dest_mask, + d_masks.data().get(), + d_begin_bits.data().get(), + d_masks.size(), + mask_size, + number_of_mask_words); + + CHECK_CUDA(stream); +} + +// Bitwise AND of the masks +rmm::device_buffer bitmask_and(std::vector const &masks, + std::vector const &begin_bits, + size_type mask_size, + cudaStream_t stream, + rmm::mr::device_memory_resource *mr) +{ + rmm::device_buffer dest_mask{}; + auto num_bytes = bitmask_allocation_size_bytes(mask_size); + + auto number_of_mask_words = num_bitmask_words(mask_size); + + dest_mask = rmm::device_buffer{num_bytes, stream, mr}; + inplace_bitmask_and( + static_cast(dest_mask.data()), masks, begin_bits, mask_size, stream, mr); + + return dest_mask; +} + cudf::size_type count_set_bits(bitmask_type const *bitmask, size_type start, size_type stop, @@ -662,7 +678,9 @@ rmm::device_buffer bitmask_and(table_view const &view, } } - if (masks.size() > 0) { return bitmask_and(masks, offsets, view.num_rows(), stream, mr); } + if (masks.size() > 0) { + return cudf::detail::bitmask_and(masks, offsets, view.num_rows(), stream, mr); + } return null_mask; } diff --git a/cpp/src/column/column.cu b/cpp/src/column/column.cu index 3380fda5836..edb0941dbc0 100644 --- a/cpp/src/column/column.cu +++ b/cpp/src/column/column.cu @@ -235,6 +235,13 @@ struct create_column_from_view { { CUDF_FAIL("list_view not supported yet"); } + + template ::value> * = nullptr> + std::unique_ptr operator()() + { + CUDF_FAIL("struct_view not supported yet"); + } }; } // anonymous namespace diff --git a/cpp/src/column/column_factories.cpp b/cpp/src/column/column_factories.cpp index 1d4fc7ed849..8038ab356ab 100644 --- a/cpp/src/column/column_factories.cpp +++ b/cpp/src/column/column_factories.cpp @@ -192,6 +192,16 @@ std::unique_ptr column_from_scalar_dispatch::operator() +std::unique_ptr column_from_scalar_dispatch::operator()( + scalar const& value, + size_type size, + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) const +{ + CUDF_FAIL("TODO. struct_view currently not supported."); +} + std::unique_ptr make_column_from_scalar(scalar const& s, size_type size, rmm::mr::device_memory_resource* mr, diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index 2def3471729..8d576e11182 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -116,6 +116,21 @@ struct copy_if_else_functor_impl { } }; +template +struct copy_if_else_functor_impl { + std::unique_ptr operator()(Left const& lhs, + Right const& rhs, + size_type size, + bool left_nullable, + bool right_nullable, + Filter filter, + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) + { + CUDF_FAIL("copy_if_else not supported for struct_view yet"); + } +}; + /** * @brief Specialization of copy_if_else_functor for decimal32. */ diff --git a/cpp/src/copying/get_element.cu b/cpp/src/copying/get_element.cu index 6408d1c7720..c8784765a6c 100644 --- a/cpp/src/copying/get_element.cu +++ b/cpp/src/copying/get_element.cu @@ -134,6 +134,16 @@ struct get_element_functor { { CUDF_FAIL("get_element_functor not supported for decimal64"); } + + template ::value> *p = nullptr> + std::unique_ptr operator()( + column_view const &input, + size_type index, + cudaStream_t stream = 0, + rmm::mr::device_memory_resource *mr = rmm::mr::get_default_resource()) + { + CUDF_FAIL("get_element_functor not supported for struct_view"); + } }; } // namespace diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index 75564359b90..c1ec86337f5 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -188,6 +189,19 @@ struct column_scalar_scatterer_impl { } }; +template +struct column_scalar_scatterer_impl { + std::unique_ptr operator()(std::unique_ptr const& source, + MapIterator scatter_iter, + size_type scatter_rows, + column_view const& target, + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) const + { + CUDF_FAIL("scatter scalar to struct_view not implemented"); + } +}; + template struct column_scalar_scatterer { template diff --git a/cpp/src/dictionary/search.cu b/cpp/src/dictionary/search.cu index d3cd71360dd..8f1871bd2fc 100644 --- a/cpp/src/dictionary/search.cu +++ b/cpp/src/dictionary/search.cu @@ -38,7 +38,8 @@ namespace detail { struct find_index_fn { template ::value and - not std::is_same::value>* = nullptr> + not std::is_same::value and + not std::is_same::value>* = nullptr> std::unique_ptr> operator()(dictionary_column_view const& input, scalar const& key, rmm::mr::device_memory_resource* mr, @@ -78,6 +79,16 @@ struct find_index_fn { { CUDF_FAIL("list_view column cannot be the keys column of a dictionary"); } + + template ::value>* = nullptr> + std::unique_ptr> operator()(dictionary_column_view const& input, + scalar const& key, + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) const + { + CUDF_FAIL("struct_view column cannot be the keys column of a dictionary"); + } }; std::unique_ptr> get_index(dictionary_column_view const& dictionary, diff --git a/cpp/src/filling/fill.cu b/cpp/src/filling/fill.cu index 65c912ba2f5..aa5a6d7bf40 100644 --- a/cpp/src/filling/fill.cu +++ b/cpp/src/filling/fill.cu @@ -114,6 +114,16 @@ std::unique_ptr out_of_place_fill_range_dispatch::operator() +std::unique_ptr out_of_place_fill_range_dispatch::operator()( + cudf::size_type begin, + cudf::size_type end, + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) +{ + CUDF_FAIL("struct_view not supported yet"); +} + template <> std::unique_ptr out_of_place_fill_range_dispatch::operator()( cudf::size_type begin, diff --git a/cpp/src/io/csv/csv_gpu.cu b/cpp/src/io/csv/csv_gpu.cu index 9d91ebb95f0..888ed8ff23b 100644 --- a/cpp/src/io/csv/csv_gpu.cu +++ b/cpp/src/io/csv/csv_gpu.cu @@ -425,6 +425,17 @@ __inline__ __device__ numeric::decimal64 decode_value(const char *data, return numeric::decimal64{}; } +// The purpose of this is merely to allow compilation ONLY +// TODO : make this work for csv +template <> +__inline__ __device__ cudf::struct_view decode_value(const char *data, + long start, + long end, + ParseOptions const &opts) +{ + return cudf::struct_view{}; +} + /** * @brief Functor for converting CSV raw data to typed value. */ diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu index 1347f2ecef5..5e70d2598b9 100644 --- a/cpp/src/io/json/json_gpu.cu +++ b/cpp/src/io/json/json_gpu.cu @@ -271,6 +271,14 @@ __inline__ __device__ cudf::list_view decode_value(const char *data, { return cudf::list_view{}; } +template <> +__inline__ __device__ cudf::struct_view decode_value(const char *data, + uint64_t start, + uint64_t end, + ParseOptions const &opts) +{ + return cudf::struct_view{}; +} template <> __inline__ __device__ numeric::decimal32 decode_value(const char *data, diff --git a/cpp/src/reductions/simple.cuh b/cpp/src/reductions/simple.cuh index f39ad0b27b8..f69c38bb6cb 100644 --- a/cpp/src/reductions/simple.cuh +++ b/cpp/src/reductions/simple.cuh @@ -21,6 +21,7 @@ #include #include #include +#include "cudf/structs/struct_view.hpp" namespace cudf { namespace reduction { @@ -82,7 +83,8 @@ struct result_type_dispatcher { std::is_same::value || std::is_same::value || cudf::is_fixed_point()) && - !std::is_same::value; + !std::is_same::value && + !std::is_same::value; } public: @@ -118,8 +120,9 @@ struct element_type_dispatcher { return !((std::is_same::value && !(std::is_same::value || std::is_same::value)) - // disable for list views - || std::is_same::value); + // disable for list/struct views + || std::is_same::value || + std::is_same::value); } public: diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu index a47892baf39..e7628f966fc 100644 --- a/cpp/src/replace/clamp.cu +++ b/cpp/src/replace/clamp.cu @@ -239,6 +239,19 @@ std::enable_if_t::value, std::unique_ptr +std::enable_if_t::value, std::unique_ptr> clamper( + column_view const& input, + ScalarIterator const& lo_itr, + ScalarIterator const& lo_replace_itr, + ScalarIterator const& hi_itr, + ScalarIterator const& hi_replace_itr, + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) +{ + CUDF_FAIL("struct_view type not supported"); +} + } // namespace template @@ -312,6 +325,18 @@ std::unique_ptr dispatch_clamp::operator()( CUDF_FAIL("clamp for decimal32 not supported"); } +template <> +std::unique_ptr dispatch_clamp::operator()(column_view const& input, + scalar const& lo, + scalar const& lo_replace, + scalar const& hi, + scalar const& hi_replace, + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) +{ + CUDF_FAIL("clamp for struct_view not supported"); +} + /** * @copydoc cudf::clamp(column_view const& input, scalar const& lo, diff --git a/cpp/src/scalar/scalar_factories.cpp b/cpp/src/scalar/scalar_factories.cpp index 6bff618c815..a0d7082c905 100644 --- a/cpp/src/scalar/scalar_factories.cpp +++ b/cpp/src/scalar/scalar_factories.cpp @@ -101,6 +101,12 @@ std::unique_ptr default_scalar_functor::operator()() CUDF_FAIL("list_view type not supported"); } +template <> +std::unique_ptr default_scalar_functor::operator()() +{ + CUDF_FAIL("struct_view type not supported"); +} + } // namespace std::unique_ptr make_default_constructed_scalar(data_type type) diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index 1e0a256076b..e31efdc54a7 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -187,6 +187,15 @@ bool contains_scalar_dispatch::operator()(column_view const& co CUDF_FAIL("list_view type not supported yet"); } +template <> +bool contains_scalar_dispatch::operator()(column_view const& col, + scalar const& value, + cudaStream_t stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FAIL("struct_view type not supported yet"); +} + } // namespace namespace detail { @@ -279,6 +288,16 @@ std::unique_ptr multi_contains_dispatch::operator()( CUDF_FAIL("list_view type not supported"); } +template <> +std::unique_ptr multi_contains_dispatch::operator()( + column_view const& haystack, + column_view const& needles, + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) +{ + CUDF_FAIL("struct_view type not supported"); +} + std::unique_ptr contains(column_view const& haystack, column_view const& needles, rmm::mr::device_memory_resource* mr, diff --git a/cpp/src/structs/structs_column_factories.cu b/cpp/src/structs/structs_column_factories.cu new file mode 100644 index 00000000000..2e239fce5f3 --- /dev/null +++ b/cpp/src/structs/structs_column_factories.cu @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include "cudf/types.hpp" +#include "thrust/iterator/counting_iterator.h" + +namespace cudf { +namespace { +// Helper function to superimpose validity of parent struct +// over the specified member (child) column. +void superimpose_parent_nullmask(bitmask_type const* parent_null_mask, + std::size_t parent_null_mask_size, + size_type parent_null_count, + column& child, + cudaStream_t stream, + rmm::mr::device_memory_resource* mr) +{ + if (!child.nullable()) { + // Child currently has no null mask. Copy parent's null mask. + child.set_null_mask(rmm::device_buffer{parent_null_mask, parent_null_mask_size, stream, mr}); + child.set_null_count(parent_null_count); + } else { + // Child should have a null mask. + // `AND` the child's null mask with the parent's. + + auto data_type{child.type()}; + auto num_rows{child.size()}; + + auto current_child_mask = child.mutable_view().null_mask(); + + cudf::detail::inplace_bitmask_and(current_child_mask, + {reinterpret_cast(parent_null_mask), + reinterpret_cast(current_child_mask)}, + {0, 0}, + child.size(), + stream, + mr); + child.set_null_count(UNKNOWN_NULL_COUNT); + } + + // If the child is also a struct, repeat for all grandchildren. + if (child.type().id() == cudf::type_id::STRUCT) { + const auto current_child_mask = child.mutable_view().null_mask(); + std::for_each(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(child.num_children()), + [¤t_child_mask, &child, parent_null_mask_size, stream, mr](auto i) { + superimpose_parent_nullmask(current_child_mask, + parent_null_mask_size, + UNKNOWN_NULL_COUNT, + child.child(i), + stream, + mr); + }); + } +} +} // namespace + +/// Column factory that adopts child columns. +std::unique_ptr make_structs_column( + size_type num_rows, + std::vector>&& child_columns, + size_type null_count, + rmm::device_buffer&& null_mask, + cudaStream_t stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_EXPECTS(null_count <= 0 || !null_mask.is_empty(), + "Struct column with nulls must be nullable."); + + CUDF_EXPECTS(std::all_of(child_columns.begin(), + child_columns.end(), + [&](auto const& child_col) { return num_rows == child_col->size(); }), + "Child columns must have the same number of rows as the Struct column."); + + if (!null_mask.is_empty()) { + for (auto& child : child_columns) { + superimpose_parent_nullmask(static_cast(null_mask.data()), + null_mask.size(), + null_count, + *child, + stream, + mr); + } + } + + return std::make_unique( + cudf::data_type{type_id::STRUCT}, + num_rows, + rmm::device_buffer{0, stream, mr}, // Empty data buffer. Structs hold no data. + null_mask, + null_count, + std::move(child_columns)); +} + +} // namespace cudf diff --git a/cpp/src/structs/structs_column_view.cu b/cpp/src/structs/structs_column_view.cu new file mode 100644 index 00000000000..f9cb345de6f --- /dev/null +++ b/cpp/src/structs/structs_column_view.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "cudf/utilities/error.hpp" + +namespace cudf { + +structs_column_view::structs_column_view(column_view const& rhs) : column_view{rhs} +{ + CUDF_EXPECTS(type().id() == type_id::STRUCT, "structs_column_view only supports struct columns"); +} + +} // namespace cudf \ No newline at end of file diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 9f02aaf38e4..c772857683b 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -537,6 +537,14 @@ set(STRINGS_TEST_SRC ConfigureTest(STRINGS_TEST "${STRINGS_TEST_SRC}") +################################################################################################### +# - structs test ---------------------------------------------------------------------------------- + +set(STRUCTS_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/structs/structs_column_tests.cu") + +ConfigureTest(STRUCTS_TEST "${STRUCTS_TEST_SRC}") + ################################################################################################### # - nvtext test ---------------------------------------------------------------------------------- diff --git a/cpp/tests/structs/structs_column_tests.cu b/cpp/tests/structs/structs_column_tests.cu new file mode 100644 index 00000000000..a33170340f9 --- /dev/null +++ b/cpp/tests/structs/structs_column_tests.cu @@ -0,0 +1,595 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cudf/column/column_factories.hpp" +#include "cudf/detail/utilities/device_operators.cuh" +#include "cudf/null_mask.hpp" +#include "cudf/structs/structs_column_view.hpp" +#include "cudf/table/table_view.hpp" +#include "cudf/types.hpp" +#include "cudf/utilities/error.hpp" +#include "gtest/gtest.h" +#include "rmm/device_buffer.hpp" +#include "thrust/host_vector.h" +#include "thrust/iterator/counting_iterator.h" +#include "thrust/scan.h" +#include "thrust/sequence.h" + +using vector_of_columns = std::vector>; +using cudf::size_type; + +struct StructColumnWrapperTest : public cudf::test::BaseFixture { +}; + +template +struct TypedStructColumnWrapperTest : public cudf::test::BaseFixture { +}; + +using FixedWidthTypesNotBool = cudf::test::Concat; + +TYPED_TEST_CASE(TypedStructColumnWrapperTest, FixedWidthTypesNotBool); + +// Test simple struct construction without nullmask, through column factory. +// Columns must retain their originally set values. +TYPED_TEST(TypedStructColumnWrapperTest, TestColumnFactoryConstruction) +{ + auto names_col = + cudf::test::strings_column_wrapper{ + "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Uberwald"} + .release(); + + int num_rows{names_col->size()}; + + auto ages_col = cudf::test::fixed_width_column_wrapper{{48, 27, 25}}.release(); + + auto is_human_col = cudf::test::fixed_width_column_wrapper{{true, true, false}}.release(); + + vector_of_columns cols; + cols.push_back(std::move(names_col)); + cols.push_back(std::move(ages_col)); + cols.push_back(std::move(is_human_col)); + + auto struct_col = cudf::make_structs_column(num_rows, std::move(cols), 0, {}); + + EXPECT_EQ(num_rows, struct_col->size()); + + auto struct_col_view{struct_col->view()}; + EXPECT_TRUE(std::all_of(struct_col_view.child_begin(), + struct_col_view.child_end(), + [&](auto const& child) { return child.size() == num_rows; })); + + // Check child columns for exactly correct values. + vector_of_columns expected_children; + expected_children.emplace_back(cudf::test::strings_column_wrapper{ + "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Uberwald"} + .release()); + expected_children.emplace_back( + cudf::test::fixed_width_column_wrapper{48, 27, 25}.release()); + expected_children.emplace_back( + cudf::test::fixed_width_column_wrapper{true, true, false}.release()); + + std::for_each(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + expected_children.size(), + [&](auto idx) { + cudf::test::expect_columns_equivalent(struct_col_view.child(idx), + expected_children[idx]->view()); + }); +} + +// Test simple struct construction with nullmasks, through column wrappers. +// When the struct row is null, the child column value must be null. +TYPED_TEST(TypedStructColumnWrapperTest, TestColumnWrapperConstruction) +{ + std::initializer_list names = {"Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Uberwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant"}; + + auto num_rows{std::distance(names.begin(), names.end())}; + + auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; + + auto ages_col = cudf::test::fixed_width_column_wrapper{{48, 27, 25, 31, 351, 351}, + {1, 1, 1, 1, 1, 0}}; + + auto is_human_col = cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}}; + + auto struct_col = + cudf::test::structs_column_wrapper{{names_col, ages_col, is_human_col}, {1, 1, 1, 0, 1, 1}} + .release(); + + EXPECT_EQ(num_rows, struct_col->size()); + + auto struct_col_view{struct_col->view()}; + EXPECT_TRUE(std::all_of(struct_col_view.child_begin(), + struct_col_view.child_end(), + [&](auto const& child) { return child.size() == num_rows; })); + + // Check child columns for exactly correct values. + vector_of_columns expected_children; + expected_children.emplace_back( + cudf::test::strings_column_wrapper{names, {1, 1, 1, 0, 1, 1}}.release()); + expected_children.emplace_back(cudf::test::fixed_width_column_wrapper{ + {48, 27, 25, 31, 351, 351}, + {1, 1, 1, 0, 1, 0}}.release()); + expected_children.emplace_back(cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, + {1, 1, 0, 0, 1, 0}}.release()); + + std::for_each(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + expected_children.size(), + [&](auto idx) { + cudf::test::expect_columns_equivalent(struct_col_view.child(idx), + expected_children[idx]->view()); + }); + + auto expected_struct_col = + cudf::test::structs_column_wrapper{std::move(expected_children), {1, 1, 1, 0, 1, 1}}.release(); + + cudf::test::expect_columns_equivalent(struct_col_view, expected_struct_col->view()); +} + +TYPED_TEST(TypedStructColumnWrapperTest, TestStructsContainingLists) +{ + // Test structs with two members: + // 1. Name: String + // 2. List: List + + std::initializer_list names = {"Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Uberwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant"}; + + auto num_rows{std::distance(names.begin(), names.end())}; + + // `Name` column has all valid values. + auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; + + // `List` member. + auto lists_col = + cudf::test::lists_column_wrapper{{1, 2, 3}, {4}, {5, 6}, {}, {7, 8}, {9}}; + + // Construct a Struct column of 6 rows, with the last two values set to null. + auto struct_col = + cudf::test::structs_column_wrapper{{names_col, lists_col}, {1, 1, 1, 1, 0, 0}}.release(); + + // Check that the last two rows are null for all members. + + // For `Name` member, indices 4 and 5 are null. + auto expected_names_col = cudf::test::strings_column_wrapper{ + names.begin(), names.end(), cudf::test::make_counting_transform_iterator(0, [](auto i) { + return i < 4; + })}.release(); + + cudf::test::expect_columns_equivalent(struct_col->view().child(0), expected_names_col->view()); + + // For the `List` member, indices 4, 5 should be null. + // FIXME: The way list columns are currently compared is not ideal for testing + // structs' list members. Rather than comparing for equivalence, + // column_comparator_impl currently checks that list's data (child) + // and offsets match perfectly. + // This causes two "equivalent lists" to compare unequal, if the data columns + // have different values at an index where the value is null. + auto expected_last_two_lists_col = cudf::test::lists_column_wrapper{ + { + {1, 2, 3}, + {4}, + {5, 6}, + {}, + {7, 8}, // Null. + {9} // Null. + }, + cudf::test::make_counting_transform_iterator(0, [](auto i) { + return i == 0; + })}.release(); + + // FIXME: Uncomment after list comparison is fixed. + // cudf::test::expect_columns_equivalent( + // struct_col->view().child(1), + // expected_last_two_lists_col->view()); +} + +TYPED_TEST(TypedStructColumnWrapperTest, StructOfStructs) +{ + // Struct> + + auto names = {"Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Uberwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant"}; + + auto num_rows{std::distance(names.begin(), names.end())}; + + // `Name` column has all valid values. + auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; + + auto ages_col = + cudf::test::fixed_width_column_wrapper{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}}; + + auto struct_1 = cudf::test::structs_column_wrapper{{names_col, ages_col}, {1, 1, 1, 1, 0, 1}}; + + auto is_human_col = cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}}; + + auto struct_2 = + cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release(); + + // Verify that the child/grandchild columns are as expected. + auto expected_names_col = + cudf::test::strings_column_wrapper( + names.begin(), + names.end(), + cudf::test::make_counting_transform_iterator(0, [](auto i) { return i != 0 && i != 4; })) + .release(); + + cudf::test::expect_columns_equivalent(*expected_names_col, struct_2->child(1).child(0)); + + auto expected_ages_col = cudf::test::fixed_width_column_wrapper{ + {48, 27, 25, 31, 351, 351}, + {0, 1, 1, 1, 0, 0}}.release(); + cudf::test::expect_columns_equivalent(*expected_ages_col, struct_2->child(1).child(1)); + + auto expected_bool_col = cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, + {0, 1, 0, 1, 1, 0}}.release(); + + cudf::test::expect_columns_equivalent(*expected_bool_col, struct_2->child(0)); + + // Verify that recursive struct columns may be compared + // using expect_columns_equivalent. + + vector_of_columns expected_cols_1; + expected_cols_1.emplace_back(std::move(expected_names_col)); + expected_cols_1.emplace_back(std::move(expected_ages_col)); + auto expected_struct_1 = + cudf::test::structs_column_wrapper(std::move(expected_cols_1), {1, 1, 1, 1, 0, 1}).release(); + + vector_of_columns expected_cols_2; + expected_cols_2.emplace_back(std::move(expected_bool_col)); + expected_cols_2.emplace_back(std::move(expected_struct_1)); + auto expected_struct_2 = + cudf::test::structs_column_wrapper(std::move(expected_cols_2), {0, 1, 1, 1, 1, 1}).release(); + + cudf::test::expect_columns_equivalent(*expected_struct_2, *struct_2); +} + +TYPED_TEST(TypedStructColumnWrapperTest, TestNullMaskPropagationForNonNullStruct) +{ + // Struct> + + auto names = {"Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Uberwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant"}; + + auto num_rows{std::distance(names.begin(), names.end())}; + + // `Name` column has all valid values. + auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; + + auto ages_col = cudf::test::fixed_width_column_wrapper{ + {48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 1} // <-- No nulls in ages_col either. + }; + + auto struct_1 = cudf::test::structs_column_wrapper{ + {names_col, ages_col}, {1, 1, 1, 1, 1, 1} // <-- Non-null, bottom level struct. + }; + + auto is_human_col = cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}}; + + auto struct_2 = + cudf::test::structs_column_wrapper{ + {is_human_col, struct_1}, {0, 1, 1, 1, 1, 1} // <-- First row is null, for top-level struct. + } + .release(); + + // Verify that the child/grandchild columns are as expected. + + // Top-struct has 1 null (at index 0). + // Bottom-level struct had no nulls, but must now report nulls + auto expected_names_col = + cudf::test::strings_column_wrapper( + names.begin(), + names.end(), + cudf::test::make_counting_transform_iterator(0, [](auto i) { return i != 0; })) + .release(); + + cudf::test::expect_columns_equivalent(*expected_names_col, struct_2->child(1).child(0)); + + auto expected_ages_col = cudf::test::fixed_width_column_wrapper{ + {48, 27, 25, 31, 351, 351}, + {0, 1, 1, 1, 1, 1}}.release(); + cudf::test::expect_columns_equivalent(*expected_ages_col, struct_2->child(1).child(1)); + + auto expected_bool_col = cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, + {0, 1, 0, 1, 1, 0}}.release(); + + cudf::test::expect_columns_equivalent(*expected_bool_col, struct_2->child(0)); + + // Verify that recursive struct columns may be compared + // using expect_columns_equivalent. + + vector_of_columns expected_cols_1; + expected_cols_1.emplace_back(std::move(expected_names_col)); + expected_cols_1.emplace_back(std::move(expected_ages_col)); + auto expected_struct_1 = + cudf::test::structs_column_wrapper(std::move(expected_cols_1), {1, 1, 1, 1, 1, 1}).release(); + + vector_of_columns expected_cols_2; + expected_cols_2.emplace_back(std::move(expected_bool_col)); + expected_cols_2.emplace_back(std::move(expected_struct_1)); + auto expected_struct_2 = + cudf::test::structs_column_wrapper(std::move(expected_cols_2), {0, 1, 1, 1, 1, 1}).release(); + + cudf::test::expect_columns_equivalent(*expected_struct_2, *struct_2); +} + +TEST_F(StructColumnWrapperTest, StructWithNoMembers) +{ + auto struct_col{cudf::test::structs_column_wrapper{}.release()}; + EXPECT_TRUE(struct_col->num_children() == 0); + EXPECT_TRUE(struct_col->null_count() == 0); + EXPECT_TRUE(struct_col->size() == 0); +} + +TYPED_TEST(TypedStructColumnWrapperTest, StructsWithMembersWithDifferentRowCounts) +{ + auto numeric_col_5 = cudf::test::fixed_width_column_wrapper{{1, 2, 3, 4, 5}}; + auto bool_col_4 = cudf::test::fixed_width_column_wrapper{1, 0, 1, 0}; + + EXPECT_THROW(cudf::test::structs_column_wrapper({numeric_col_5, bool_col_4}), cudf::logic_error); +} + +TYPED_TEST(TypedStructColumnWrapperTest, TestListsOfStructs) +{ + // Test structs with two members: + // 1. Name: String + // 2. List: List + + std::initializer_list names = {"Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Uberwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant"}; + + auto num_rows{std::distance(names.begin(), names.end())}; + + // `Name` column has all valid values. + auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; + + // Numeric column has some nulls. + auto ages_col = cudf::test::fixed_width_column_wrapper{{48, 27, 25, 31, 351, 351}, + {1, 1, 1, 1, 1, 0}}; + + auto struct_col = + cudf::test::structs_column_wrapper({names_col, ages_col}, {1, 1, 1, 0, 0, 1}).release(); + + auto expected_unchanged_struct_col = cudf::column(*struct_col); + + auto list_offsets_column = + cudf::test::fixed_width_column_wrapper{0, 2, 3, 5, 6}.release(); + auto num_list_rows = list_offsets_column->size() - 1; + + auto list_col = cudf::make_lists_column(num_list_rows, + std::move(list_offsets_column), + std::move(struct_col), + cudf::UNKNOWN_NULL_COUNT, + {}); + + // List of structs was constructed successfully. No exceptions. + // Verify that child columns is as it was set. + + cudf::test::expect_columns_equivalent(expected_unchanged_struct_col, + cudf::lists_column_view(*list_col).child()); + +#ifndef NDEBUG + std::cout << "Printing list col: \n"; + cudf::test::print(*list_col); +#endif +} + +TYPED_TEST(TypedStructColumnWrapperTest, ListOfStructOfList) +{ + using namespace cudf::test; + + auto list_col = lists_column_wrapper{ + {{0}, {1}, {}, {3}, {4}, {5, 5}, {6}, {}, {8}, {9}}, + cudf::test::make_counting_transform_iterator(0, [](auto i) { return i % 2; })}; + + // TODO: Struct cannot be compared with expect_columns_equal(), + // if the struct has null values. After lists support "equivalence" + // comparisons, the structs column needs to be modified to add nulls. + auto struct_of_lists_col = structs_column_wrapper{{list_col}}.release(); + + auto list_of_struct_of_list_validity = + make_counting_transform_iterator(0, [](auto i) { return i % 3; }); + auto list_of_struct_of_list = cudf::make_lists_column( + 5, + std::move(fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release()), + std::move(struct_of_lists_col), + cudf::UNKNOWN_NULL_COUNT, + detail::make_null_mask(list_of_struct_of_list_validity, list_of_struct_of_list_validity + 5)); + + // Compare with expected values. + + auto expected_level0_list = lists_column_wrapper{ + {{}, {1}, {}, {3}, {}, {5, 5}, {}, {}, {}, {9}}, + make_counting_transform_iterator(0, [](auto i) { return i % 2; })}; + + auto expected_level2_struct = structs_column_wrapper{{expected_level0_list}}.release(); + + expect_columns_equivalent(cudf::lists_column_view(*list_of_struct_of_list).child(), + *expected_level2_struct); + + auto expected_level3_list = cudf::make_lists_column( + 5, + std::move(fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release()), + std::move(expected_level2_struct), + cudf::UNKNOWN_NULL_COUNT, + detail::make_null_mask(list_of_struct_of_list_validity, list_of_struct_of_list_validity + 5)); + + expect_columns_equivalent(*list_of_struct_of_list, *expected_level3_list); +} + +TYPED_TEST(TypedStructColumnWrapperTest, StructOfListOfStruct) +{ + using namespace cudf::test; + + auto ints_col = fixed_width_column_wrapper{ + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + make_counting_transform_iterator(0, [](auto i) { return i % 2; })}; + + auto structs_col = + structs_column_wrapper{ + {ints_col}, + make_counting_transform_iterator(0, [](auto i) { return i < 6; }) // Last 4 structs are null. + } + .release(); + + auto list_validity = make_counting_transform_iterator(0, [](auto i) { return i % 3; }); + auto lists_col = cudf::make_lists_column( + 5, + std::move(fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release()), + std::move(structs_col), + cudf::UNKNOWN_NULL_COUNT, + detail::make_null_mask(list_validity, list_validity + 5)); + + std::vector> cols; + cols.push_back(std::move(lists_col)); + auto struct_of_list_of_struct = structs_column_wrapper{std::move(cols)}.release(); + + // Check that the struct is constructed as expected. + + auto expected_ints_col = fixed_width_column_wrapper{{0, 1, 0, 3, 0, 5, 0, 0, 0, 0}, + {0, 1, 0, 1, 0, 1, 0, 0, 0, 0}}; + + auto expected_structs_col = + structs_column_wrapper{{expected_ints_col}, {1, 1, 1, 1, 1, 1, 0, 0, 0, 0}}.release(); + + auto expected_lists_col = cudf::make_lists_column( + 5, + std::move(fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release()), + std::move(expected_structs_col), + cudf::UNKNOWN_NULL_COUNT, + detail::make_null_mask(list_validity, list_validity + 5)); + + // Test that the lists child column is as expected. + cudf::test::expect_columns_equivalent(*expected_lists_col, struct_of_list_of_struct->child(0)); + + // Test that the outer struct column is as expected. + cols.clear(); + cols.push_back(std::move(expected_lists_col)); + cudf::test::expect_columns_equivalent(*(structs_column_wrapper{std::move(cols)}.release()), + *struct_of_list_of_struct); +} + +TYPED_TEST(TypedStructColumnWrapperTest, EmptyColumnsOfStructs) +{ + using namespace cudf::test; + + { + // Empty struct column. + auto empty_struct_column = structs_column_wrapper{}.release(); + EXPECT_TRUE(empty_struct_column->num_children() == 0); + EXPECT_TRUE(empty_struct_column->size() == 0); + EXPECT_TRUE(empty_struct_column->null_count() == 0); + } + + { + // Empty struct column. + auto empty_list_column = lists_column_wrapper{}; + auto struct_column = structs_column_wrapper{{empty_list_column}}.release(); + EXPECT_TRUE(struct_column->num_children() == 1); + EXPECT_TRUE(struct_column->size() == 0); + EXPECT_TRUE(struct_column->null_count() == 0); + + auto empty_list_of_structs = + cudf::make_lists_column(0, + fixed_width_column_wrapper{0}.release(), + std::move(struct_column), + cudf::UNKNOWN_NULL_COUNT, + {}); + + EXPECT_TRUE(empty_list_of_structs->size() == 0); + EXPECT_TRUE(empty_list_of_structs->null_count() == 0); + + auto child_struct_column = cudf::lists_column_view(*empty_list_of_structs).child(); + EXPECT_TRUE(child_struct_column.num_children() == 1); + EXPECT_TRUE(child_struct_column.size() == 0); + EXPECT_TRUE(child_struct_column.null_count() == 0); + } + + // TODO: Uncomment test after adding support to compare empty + // lists whose child columns may not be empty. + // { + // auto non_empty_column_of_numbers = + // fixed_width_column_wrapper{1,2,3,4,5}.release(); + // + // auto list_offsets = + // fixed_width_column_wrapper{0}.release(); + // + // auto empty_list_column = + // cudf::make_lists_column( + // 0, std::move(list_offsets), std::move(non_empty_column_of_numbers), 0, {}); + // + // expect_columns_equivalent(*lists_column_wrapper{}.release(), *empty_list_column); + // auto struct_column = structs_column_wrapper{{empty_list_column}}.release(); + // EXPECT_TRUE(struct_column->num_children() == 1); + // EXPECT_TRUE(struct_column->size() == 0); + // EXPECT_TRUE(struct_column->null_count() == 0); + // } +} + +TEST_F(StructColumnWrapperTest, SimpleTestExpectStructColumnsEqual) +{ + auto ints_col = cudf::test::fixed_width_column_wrapper{{0, 1}, {0, 0}}.release(); + + vector_of_columns cols; + cols.emplace_back(std::move(ints_col)); + auto structs_col = cudf::test::structs_column_wrapper{std::move(cols)}; + + cudf::test::expect_columns_equivalent(structs_col, structs_col); +} + +CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index 91123cb686f..c1392de3909 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -15,17 +15,22 @@ */ #include "column_utilities.hpp" +#include "cudf/utilities/type_dispatcher.hpp" #include "detail/column_utilities.hpp" +#include "thrust/iterator/counting_iterator.h" #include #include #include #include #include +#include +#include #include #include #include +#include #include #include @@ -265,6 +270,26 @@ struct column_comparator_impl { } }; +template +struct column_comparator_impl { + void operator()(column_view const& lhs, + column_view const& rhs, + bool print_all_differences, + int depth) + { + std::for_each(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + lhs.num_children(), + [&](auto i) { + cudf::type_dispatcher(lhs.child(i).type(), + column_comparator{}, + lhs.child(i), + rhs.child(i), + print_all_differences, + depth + 1); + }); + } +}; + template struct column_comparator { template @@ -399,6 +424,19 @@ std::string get_nested_type_str(cudf::column_view const& view) lists_column_view lcv(view); return cudf::jit::get_type_name(view.type()) + "<" + (get_nested_type_str(lcv.child())) + ">"; } + + if (view.type().id() == cudf::type_id::STRUCT) { + std::ostringstream out; + + out << cudf::jit::get_type_name(view.type()) + "<"; + std::transform(view.child_begin(), + view.child_end(), + std::ostream_iterator(out, ","), + [&out](auto const col) { return get_nested_type_str(col); }); + out << ">"; + return out.str(); + } + return cudf::jit::get_type_name(view.type()); } @@ -557,6 +595,32 @@ struct column_view_printer { out.push_back(tmp); } + + template ::value>* = nullptr> + void operator()(cudf::column_view const& col, + std::vector& out, + std::string const& indent) + { + structs_column_view view{col}; + + std::ostringstream out_stream; + + out_stream << get_nested_type_str(col) << ":\n" + << indent << "Length : " << view.size() << ":\n"; + if (view.has_nulls()) { + out_stream << indent << "Null count: " << view.null_count() << "\n" + << detail::to_string(bitmask_to_host(col), col.size(), indent) << "\n"; + } + + std::transform( + view.child_begin(), + view.child_end(), + std::ostream_iterator(out_stream, "\n"), + [&](auto child_column) { return detail::to_string(child_column, ", ", indent + " "); }); + + out.push_back(out_stream.str()); + } }; } // namespace diff --git a/cpp/tests/utilities/column_wrapper.hpp b/cpp/tests/utilities/column_wrapper.hpp index ddb128cfe61..11e00acebb2 100644 --- a/cpp/tests/utilities/column_wrapper.hpp +++ b/cpp/tests/utilities/column_wrapper.hpp @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -1043,5 +1044,149 @@ class lists_column_wrapper : public cudf::test::detail::column_wrapper { bool root = false; }; +/** + * @brief `column_wrapper` derived class for wrapping columns of structs. + */ +class structs_column_wrapper : public detail::column_wrapper { + public: + /** + * @brief Constructs a struct column from the specified list of pre-constructed child columns. + * + * The child columns are "adopted" by the struct column constructed here. + * + * Example usage: + * @code{.cpp} + * // The following constructs a column for struct< int, string >. + * auto child_int_col = fixed_width_column_wrapper{ 1, 2, 3, 4, 5 }.release(); + * auto child_string_col = string_column_wrapper {"All", "the", "leaves", "are", + * "brown"}.release(); + * + * std::vector> child_columns; + * child_columns.push_back(std::move(child_int_col)); + * child_columns.push_back(std::move(child_string_col)); + * + * struct_column_wrapper struct_column_wrapper{ + * child_cols, + * {1,0,1,0,1} // Validity. + * }; + * + * auto struct_col {struct_column_wrapper.release()}; + * @endcode + * + * @param child_columns The vector of pre-constructed child columns + * @param validity The vector of bools representing the column validity values + */ + structs_column_wrapper(std::vector>&& child_columns, + std::vector const& validity = {}) + { + init(std::move(child_columns), validity); + } + + /** + * @brief Constructs a struct column from the list of column wrappers for child columns. + * + * Example usage: + * @code{.cpp} + * // The following constructs a column for struct< int, string >. + * fixed_width_column_wrapper child_int_col_wrapper{ 1, 2, 3, 4, 5 }; + * string_column_wrapper child_string_col_wrapper {"All", "the", "leaves", "are", "brown"}; + * + * struct_column_wrapper struct_column_wrapper{ + * {child_int_col_wrapper, child_string_col_wrapper} + * {1,0,1,0,1} // Validity. + * }; + * + * auto struct_col {struct_column_wrapper.release()}; + * @endcode + * + * @param child_columns_wrappers The list of child column wrappers + * @param validity The vector of bools representing the column validity values + */ + structs_column_wrapper( + std::initializer_list> child_column_wrappers, + std::vector const& validity = {}) + { + std::vector> child_columns; + child_columns.reserve(child_column_wrappers.size()); + std::transform(child_column_wrappers.begin(), + child_column_wrappers.end(), + std::back_inserter(child_columns), + [&](auto column_wrapper) { return column_wrapper.get().release(); }); + init(std::move(child_columns), validity); + } + + /** + * @brief Constructs a struct column from the list of column wrappers for child columns. + * + * Example usage: + * @code{.cpp} + * // The following constructs a column for struct< int, string >. + * fixed_width_column_wrapper child_int_col_wrapper{ 1, 2, 3, 4, 5 }; + * string_column_wrapper child_string_col_wrapper {"All", "the", "leaves", "are", "brown"}; + * + * struct_column_wrapper struct_column_wrapper{ + * {child_int_col_wrapper, child_string_col_wrapper} + * cudf::test::make_counting_transform_iterator(0, [](auto i){ return i%2; }) // Validity. + * }; + * + * auto struct_col {struct_column_wrapper.release()}; + * @endcode + * + * @param child_columns_wrappers The list of child column wrappers + * @param validity Iterator returning the per-row validity bool + */ + template + structs_column_wrapper( + std::initializer_list> child_column_wrappers, + V validity_iter) + { + std::vector> child_columns; + child_columns.reserve(child_column_wrappers.size()); + std::transform(child_column_wrappers.begin(), + child_column_wrappers.end(), + std::back_inserter(child_columns), + [&](auto column_wrapper) { return column_wrapper.get().release(); }); + init(std::move(child_columns), validity_iter); + } + + private: + void init(std::vector>&& child_columns, + std::vector const& validity) + { + size_type num_rows = child_columns.empty() ? 0 : child_columns[0]->size(); + + CUDF_EXPECTS(std::all_of(child_columns.begin(), + child_columns.end(), + [&](auto const& p_column) { return p_column->size() == num_rows; }), + "All struct member columns must have the same row count."); + + CUDF_EXPECTS(validity.size() <= 0 || static_cast(validity.size()) == num_rows, + "Validity buffer must have as many elements as rows in the struct column."); + + wrapped = cudf::make_structs_column( + num_rows, + std::move(child_columns), + validity.size() <= 0 ? 0 : cudf::UNKNOWN_NULL_COUNT, + validity.size() <= 0 ? rmm::device_buffer{0} + : detail::make_null_mask(validity.begin(), validity.end())); + } + + template + void init(std::vector>&& child_columns, V validity_iterator) + { + size_type num_rows = child_columns.empty() ? 0 : child_columns[0]->size(); + + CUDF_EXPECTS(std::all_of(child_columns.begin(), + child_columns.end(), + [&](auto const& p_column) { return p_column->size() == num_rows; }), + "All struct member columns must have the same row count."); + + std::vector validity(num_rows); + std::copy(validity_iterator, validity_iterator + num_rows, validity.begin()); + + init(std::move(child_columns), validity); + } +}; + } // namespace test } // namespace cudf diff --git a/cpp/tests/utilities/scalar_utilities.cu b/cpp/tests/utilities/scalar_utilities.cu index 8f2f9800711..841d5adb839 100644 --- a/cpp/tests/utilities/scalar_utilities.cu +++ b/cpp/tests/utilities/scalar_utilities.cu @@ -70,6 +70,13 @@ void compare_scalar_functor::operator()(cudf::scalar const& lhs CUDF_FAIL("Unsupported scalar compare type: list_view"); } +template <> +void compare_scalar_functor::operator()(cudf::scalar const& lhs, + cudf::scalar const& rhs) +{ + CUDF_FAIL("Unsupported scalar compare type: struct_view"); +} + } // anonymous namespace void expect_scalars_equal(cudf::scalar const& lhs, cudf::scalar const& rhs) diff --git a/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp b/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp index 6712a3c7d82..b0fb5aa9e44 100644 --- a/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp +++ b/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp @@ -21,6 +21,11 @@ #include #include #include +#include "cudf/column/column_factories.hpp" +#include "cudf/types.hpp" +#include "rmm/device_buffer.hpp" +#include "thrust/iterator/counting_iterator.h" +#include "thrust/iterator/transform_iterator.h" struct ListColumnWrapperTest : public cudf::test::BaseFixture { }; @@ -1358,3 +1363,220 @@ TEST_F(ListColumnWrapperTest, MismatchedHierarchies) EXPECT_THROW(expect_failure(), cudf::logic_error); } } + +TYPED_TEST(ListColumnWrapperTestTyped, ListsOfStructs) +{ + using namespace cudf; + + using T = TypeParam; + + auto num_struct_rows = 8; + auto numeric_column = test::fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7, 8}; + auto bool_column = test::fixed_width_column_wrapper{1, 1, 1, 1, 0, 0, 0, 0}; + auto struct_column = test::structs_column_wrapper{{numeric_column, bool_column}}.release(); + EXPECT_EQ(struct_column->size(), num_struct_rows); + EXPECT_TRUE(!struct_column->nullable()); + + auto lists_column_offsets = test::fixed_width_column_wrapper{0, 2, 4, 8}.release(); + auto num_lists = lists_column_offsets->size() - 1; + auto lists_column = make_lists_column( + num_lists, std::move(lists_column_offsets), std::move(struct_column), UNKNOWN_NULL_COUNT, {}); + + // Check if child column is unchanged. + + auto expected_numeric_column = test::fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7, 8}; + auto expected_bool_column = test::fixed_width_column_wrapper{1, 1, 1, 1, 0, 0, 0, 0}; + auto expected_struct_column = + test::structs_column_wrapper{{expected_numeric_column, expected_bool_column}}.release(); + + cudf::test::expect_columns_equal(*expected_struct_column, + lists_column_view(*lists_column).child()); +} + +TYPED_TEST(ListColumnWrapperTestTyped, ListsOfStructsWithValidity) +{ + using namespace cudf; + + using T = TypeParam; + + auto num_struct_rows = 8; + auto numeric_column = + test::fixed_width_column_wrapper{{1, 2, 3, 4, 5, 6, 7, 8}, {1, 1, 1, 1, 0, 0, 0, 0}}; + auto bool_column = test::fixed_width_column_wrapper{1, 1, 1, 1, 0, 0, 0, 0}; + auto struct_column = test::structs_column_wrapper{{numeric_column, bool_column}}.release(); + EXPECT_EQ(struct_column->size(), num_struct_rows); + EXPECT_TRUE(!struct_column->nullable()); + + auto lists_column_offsets = test::fixed_width_column_wrapper{0, 2, 4, 8}.release(); + auto list_null_mask = {1, 1, 0}; + auto num_lists = lists_column_offsets->size() - 1; + auto lists_column = + make_lists_column(num_lists, + std::move(lists_column_offsets), + std::move(struct_column), + UNKNOWN_NULL_COUNT, + test::detail::make_null_mask(list_null_mask.begin(), list_null_mask.end())); + + // Check if child column is unchanged. + + auto expected_numeric_column = + test::fixed_width_column_wrapper{{1, 2, 3, 4, 5, 6, 7, 8}, {1, 1, 1, 1, 0, 0, 0, 0}}; + auto expected_bool_column = test::fixed_width_column_wrapper{1, 1, 1, 1, 0, 0, 0, 0}; + auto expected_struct_column = + test::structs_column_wrapper{{expected_numeric_column, expected_bool_column}}.release(); + + cudf::test::expect_columns_equal(*expected_struct_column, + lists_column_view(*lists_column).child()); +} + +TYPED_TEST(ListColumnWrapperTestTyped, ListsOfListsOfStructs) +{ + using namespace cudf; + + using T = TypeParam; + + auto num_struct_rows = 8; + auto numeric_column = test::fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7, 8}; + auto bool_column = test::fixed_width_column_wrapper{1, 1, 1, 1, 0, 0, 0, 0}; + auto struct_column = test::structs_column_wrapper{{numeric_column, bool_column}}.release(); + EXPECT_EQ(struct_column->size(), num_struct_rows); + EXPECT_TRUE(!struct_column->nullable()); + + auto lists_column_offsets = test::fixed_width_column_wrapper{0, 2, 4, 8}.release(); + auto num_lists = lists_column_offsets->size() - 1; + auto lists_column = make_lists_column( + num_lists, std::move(lists_column_offsets), std::move(struct_column), UNKNOWN_NULL_COUNT, {}); + + auto lists_of_lists_column_offsets = + test::fixed_width_column_wrapper{0, 2, 3}.release(); + auto num_lists_of_lists = lists_of_lists_column_offsets->size() - 1; + auto lists_of_lists_of_structs_column = + make_lists_column(num_lists_of_lists, + std::move(lists_of_lists_column_offsets), + std::move(lists_column), + UNKNOWN_NULL_COUNT, + {}); + + // Check if child column is unchanged. + + auto expected_numeric_column = test::fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7, 8}; + auto expected_bool_column = test::fixed_width_column_wrapper{1, 1, 1, 1, 0, 0, 0, 0}; + auto expected_struct_column = + test::structs_column_wrapper{{expected_numeric_column, expected_bool_column}}.release(); + + cudf::test::expect_columns_equal( + *expected_struct_column, + lists_column_view{lists_column_view{*lists_of_lists_of_structs_column}.child()}.child()); +} + +TYPED_TEST(ListColumnWrapperTestTyped, ListsOfListsOfStructsWithValidity) +{ + using namespace cudf; + + using T = TypeParam; + + auto num_struct_rows = 8; + auto numeric_column = + test::fixed_width_column_wrapper{{1, 2, 3, 4, 5, 6, 7, 8}, {1, 1, 1, 1, 0, 0, 0, 0}}; + auto bool_column = test::fixed_width_column_wrapper{1, 1, 1, 1, 0, 0, 0, 0}; + auto struct_column = test::structs_column_wrapper{{numeric_column, bool_column}}.release(); + EXPECT_EQ(struct_column->size(), num_struct_rows); + EXPECT_TRUE(!struct_column->nullable()); + + auto lists_column_offsets = test::fixed_width_column_wrapper{0, 2, 4, 8}.release(); + auto num_lists = lists_column_offsets->size() - 1; + auto list_null_mask = {1, 1, 0}; + auto lists_column = + make_lists_column(num_lists, + std::move(lists_column_offsets), + std::move(struct_column), + UNKNOWN_NULL_COUNT, + test::detail::make_null_mask(list_null_mask.begin(), list_null_mask.end())); + + auto lists_of_lists_column_offsets = + test::fixed_width_column_wrapper{0, 2, 3}.release(); + auto num_lists_of_lists = lists_of_lists_column_offsets->size() - 1; + auto list_of_lists_null_mask = {1, 0}; + auto lists_of_lists_of_structs_column = make_lists_column( + num_lists_of_lists, + std::move(lists_of_lists_column_offsets), + std::move(lists_column), + UNKNOWN_NULL_COUNT, + test::detail::make_null_mask(list_of_lists_null_mask.begin(), list_of_lists_null_mask.end())); + + // Check if child column is unchanged. + + auto expected_numeric_column = + test::fixed_width_column_wrapper{{1, 2, 3, 4, 5, 6, 7, 8}, {1, 1, 1, 1, 0, 0, 0, 0}}; + auto expected_bool_column = test::fixed_width_column_wrapper{1, 1, 1, 1, 0, 0, 0, 0}; + auto expected_struct_column = + test::structs_column_wrapper{{expected_numeric_column, expected_bool_column}}.release(); + + cudf::test::expect_columns_equal( + *expected_struct_column, + lists_column_view{lists_column_view{*lists_of_lists_of_structs_column}.child()}.child()); +} + +TYPED_TEST(ListColumnWrapperTestTyped, LargeListsOfStructsWithValidity) +{ + using namespace cudf; + + using T = TypeParam; + + auto num_struct_rows = 10000; + + // Creating Struct. + auto numeric_column = test::fixed_width_column_wrapper{ + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_struct_rows), + cudf::test::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 1; })}; + + auto bool_iterator = thrust::make_transform_iterator(thrust::make_counting_iterator(0), + [](auto i) { return i % 3 == 0; }); + auto bool_column = + test::fixed_width_column_wrapper(bool_iterator, bool_iterator + num_struct_rows); + + auto struct_validity_iterator = + cudf::test::make_counting_transform_iterator(0, [](auto i) { return i % 5 == 0; }); + auto struct_column = + test::structs_column_wrapper{ + {numeric_column, bool_column}, + std::vector(struct_validity_iterator, struct_validity_iterator + num_struct_rows)} + .release(); + + EXPECT_EQ(struct_column->size(), num_struct_rows); + + // Now, use struct_column to create a list column. + // Each list has 50 elements. + auto num_list_rows = num_struct_rows / 50; + auto list_offset_iterator = + test::make_counting_transform_iterator(0, [](auto i) { return i * 50; }); + auto list_offset_column = test::fixed_width_column_wrapper( + list_offset_iterator, list_offset_iterator + num_list_rows + 1) + .release(); + auto lists_column = make_lists_column(num_list_rows, + std::move(list_offset_column), + std::move(struct_column), + cudf::UNKNOWN_NULL_COUNT, + {}); + + // List construction succeeded. + // Verify that the child is unchanged. + + auto expected_numeric_column = test::fixed_width_column_wrapper{ + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_struct_rows), + cudf::test::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 1; })}; + + auto expected_bool_column = + test::fixed_width_column_wrapper(bool_iterator, bool_iterator + num_struct_rows); + + auto expected_struct_column = + test::structs_column_wrapper{ + {expected_numeric_column, expected_bool_column}, + std::vector(struct_validity_iterator, struct_validity_iterator + num_struct_rows)} + .release(); + + cudf::test::expect_columns_equal(*expected_struct_column, + lists_column_view(*lists_column).child()); +}