From 0683c40971494fa724949d750e8cd8ecd38ce3cd Mon Sep 17 00:00:00 2001 From: Mithun RK Date: Fri, 17 Jul 2020 12:08:21 -0700 Subject: [PATCH 01/20] [WIP] [struct] Initial commit --- cpp/CMakeLists.txt | 2 + .../cudf/column/column_device_view.cuh | 1 + cpp/include/cudf/column/column_factories.hpp | 8 + cpp/include/cudf/column/column_view.hpp | 3 + cpp/include/cudf/detail/gather.cuh | 13 + cpp/include/cudf/detail/null_mask.hpp | 15 + cpp/include/cudf/structs/struct_view.hpp | 35 ++ .../cudf/structs/structs_column_view.hpp | 48 +++ cpp/include/cudf/types.hpp | 4 + cpp/include/cudf/utilities/traits.hpp | 6 +- .../cudf/utilities/type_dispatcher.hpp | 10 + cpp/src/bitmask/null_mask.cu | 61 +-- cpp/src/column/column.cu | 7 + cpp/src/column/column_factories.cpp | 10 + cpp/src/copying/copy.cu | 15 + cpp/src/copying/get_element.cu | 10 + cpp/src/copying/scatter.cu | 14 + cpp/src/dictionary/search.cu | 12 +- cpp/src/filling/fill.cu | 10 + cpp/src/io/csv/csv_gpu.cu | 11 + cpp/src/io/json/json_gpu.cu | 8 + cpp/src/reductions/simple.cuh | 7 +- cpp/src/replace/clamp.cu | 26 ++ cpp/src/scalar/scalar_factories.cpp | 6 + cpp/src/search/search.cu | 20 + cpp/src/structs/structs_column_factories.cu | 120 ++++++ cpp/src/structs/structs_column_view.cu | 30 ++ cpp/tests/CMakeLists.txt | 8 + cpp/tests/structs/structs_column_tests.cu | 380 ++++++++++++++++++ cpp/tests/utilities/column_utilities.cu | 70 ++++ cpp/tests/utilities/column_wrapper.hpp | 52 +++ cpp/tests/utilities/scalar_utilities.cu | 7 + 32 files changed, 994 insertions(+), 35 deletions(-) create mode 100644 cpp/include/cudf/structs/struct_view.hpp create mode 100644 cpp/include/cudf/structs/structs_column_view.hpp create mode 100644 cpp/src/structs/structs_column_factories.cu create mode 100644 cpp/src/structs/structs_column_view.cu create mode 100644 cpp/tests/structs/structs_column_tests.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 2f84663bcf3..8fd282d2baa 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -562,6 +562,8 @@ add_library(cudf src/lists/lists_column_view.cu src/lists/copying/concatenate.cu src/lists/copying/gather.cu + src/structs/structs_column_view.cu + src/structs/structs_column_factories.cu src/text/detokenize.cu src/text/generate_ngrams.cu src/text/normalize.cu diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 2522143d5fa..666132ff9bd 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp index 8738f7ce56b..14f6b135608 100644 --- a/cpp/include/cudf/column/column_factories.hpp +++ b/cpp/include/cudf/column/column_factories.hpp @@ -502,6 +502,14 @@ std::unique_ptr make_lists_column( cudaStream_t stream = 0, rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource()); +std::unique_ptr make_structs_column( + size_type num_rows, + std::vector>&& child_column, + size_type null_count, + rmm::device_buffer&& null_mask, + cudaStream_t stream = 0, + rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource()); + /** * @brief Return a column with size elements that are all equal to the * given scalar. diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index 0afb10c6ff0..7d217de90f6 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -335,6 +335,9 @@ class column_view : public detail::column_view_base { **/ size_type num_children() const noexcept { return _children.size(); } + auto child_begin() const noexcept { return _children.begin(); } + auto child_end() const noexcept { return _children.end(); } + private: std::vector _children{}; ///< Based on element type, children ///< may contain additional data diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index 1056035c1fa..8149a6c30ed 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -357,6 +357,19 @@ struct column_gatherer_impl { } }; +template +struct column_gatherer_impl { + std::unique_ptr operator()(column_view const& column, + MapItRoot gather_map_begin, + MapItRoot gather_map_end, + bool nullify_out_of_bounds, + cudaStream_t stream, + rmm::mr::device_memory_resource* mr) + { + CUDF_FAIL("Gather not yet supported on struct_view."); + } +}; + /** * @brief Function object for gathering a type-erased * column. To be used with the cudf::type_dispatcher. diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index bc8e7e450f3..b0b9ebd0359 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -39,6 +39,21 @@ std::vector segmented_count_unset_bits(bitmask_type const* bitmask, std::vector const& indices, cudaStream_t stream = 0); +/** + * @brief Returns a bitwise AND of the specified bitmasks + * + * @param masks The list of data pointers of the bitmasks to be ANDed + * @param begin_bits The bit offsets from which each mask is to be ANDed + * @param mask_size The number of bits to be ANDed in each mask + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned device_buffer + * @return rmm::device_buffer Output bitmask + */ +rmm::device_buffer bitmask_and(std::vector const &masks, + std::vector const &begin_bits, + size_type mask_size, + cudaStream_t stream, + rmm::mr::device_memory_resource *mr); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/structs/struct_view.hpp b/cpp/include/cudf/structs/struct_view.hpp new file mode 100644 index 00000000000..d9733b29406 --- /dev/null +++ b/cpp/include/cudf/structs/struct_view.hpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +/** + * @file struct_view.cuh + * @brief Class definition for cudf::struct_view. + */ + +namespace cudf { + +/** + * @brief A non-owning, immutable view of device data that represents + * a struct with fields of arbitrary types (including primitives, lists, + * and other structs) + * + */ +class struct_view { + +}; + +} // namespace cudf \ No newline at end of file diff --git a/cpp/include/cudf/structs/structs_column_view.hpp b/cpp/include/cudf/structs/structs_column_view.hpp new file mode 100644 index 00000000000..31063d6aeee --- /dev/null +++ b/cpp/include/cudf/structs/structs_column_view.hpp @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +namespace cudf { + + +class structs_column_view : private column_view +{ + + public: + + // Foundation members: + structs_column_view(structs_column_view const&) = default; + structs_column_view(structs_column_view &&) = default; + ~structs_column_view() = default; + structs_column_view& operator=(structs_column_view const&) = default; + structs_column_view& operator=(structs_column_view &&) = default; + + explicit structs_column_view(column_view const& rhs); + + using column_view::has_nulls; + using column_view::null_count; + using column_view::null_mask; + using column_view::offset; + using column_view::size; + using column_view::child_begin; + using column_view::child_end; + +}; // class structs_column_view; + +} // namespace cudf; diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index 5bce5dc393c..895268a56a5 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -59,6 +59,7 @@ class column_view; class mutable_column_view; class string_view; class list_view; +class struct_view; class scalar; template @@ -79,6 +80,8 @@ class duration_scalar_device_view; class list_scalar; +class struct_scalar; + class table; class table_view; class mutable_table_view; @@ -207,6 +210,7 @@ enum class type_id : int32_t { DICTIONARY32, ///< Dictionary type using int32 indices STRING, ///< String elements LIST, ///< List elements + STRUCT, ///< Struct elements // `NUM_TYPE_IDS` must be last! NUM_TYPE_IDS ///< Total number of type ids }; diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index 637483ef747..b30c4580179 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -24,6 +24,7 @@ #include #include +#include "cudf/structs/struct_view.hpp" namespace cudf { @@ -455,7 +456,7 @@ template constexpr inline bool is_compound() { return std::is_same::value or std::is_same::value or - std::is_same::value; + std::is_same::value or std::is_same::value; } struct is_compound_impl { @@ -497,7 +498,8 @@ constexpr inline bool is_compound(data_type type) template constexpr inline bool is_nested() { - return std::is_same::value; + return std::is_same::value + || std::is_same::value; } struct is_nested_impl { diff --git a/cpp/include/cudf/utilities/type_dispatcher.hpp b/cpp/include/cudf/utilities/type_dispatcher.hpp index cb0db3492b5..6c8723eda6f 100644 --- a/cpp/include/cudf/utilities/type_dispatcher.hpp +++ b/cpp/include/cudf/utilities/type_dispatcher.hpp @@ -132,6 +132,7 @@ CUDF_TYPE_MAPPING(cudf::duration_us, type_id::DURATION_MICROSECONDS); CUDF_TYPE_MAPPING(cudf::duration_ns, type_id::DURATION_NANOSECONDS); CUDF_TYPE_MAPPING(dictionary32, type_id::DICTIONARY32); CUDF_TYPE_MAPPING(cudf::list_view, type_id::LIST); +CUDF_TYPE_MAPPING(cudf::struct_view, type_id::STRUCT); template struct type_to_scalar_type_impl { @@ -184,6 +185,12 @@ struct type_to_scalar_type_impl { // using ScalarDeviceType = cudf::list_scalar_device_view; }; +template <> // TODO: Ditto, likewise. +struct type_to_scalar_type_impl { + using ScalarType = cudf::struct_scalar; + // using ScalarDeviceType = cudf::struct_scalar_device_view; // CALEB: TODO! +}; + #ifndef MAP_TIMESTAMP_SCALAR #define MAP_TIMESTAMP_SCALAR(Type) \ template <> \ @@ -400,6 +407,9 @@ CUDA_HOST_DEVICE_CALLABLE constexpr decltype(auto) type_dispatcher(cudf::data_ty case type_id::LIST: return f.template operator()::type>( std::forward(args)...); + case type_id::STRUCT: + return f.template operator()::type>( + std::forward(args)...); default: { #ifndef __CUDA_ARCH__ CUDF_FAIL("Unsupported type_id."); diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 34817650012..8f47bdeb5df 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -341,6 +341,36 @@ __global__ void offset_bitmask_and(bitmask_type *__restrict__ destination, } } +// convert [first_bit_index,last_bit_index) to +// [first_word_index,last_word_index) +struct to_word_index : public thrust::unary_function { + const bool _inclusive = false; + size_type const *const _d_bit_indices = nullptr; + + /** + * @brief Constructor of a functor that converts bit indices to bitmask word + * indices. + * + * @param[in] inclusive Flag that indicates whether bit indices are inclusive + * or exclusive. + * @param[in] d_bit_indices Pointer to an array of bit indices + */ + __host__ to_word_index(bool inclusive, size_type const *d_bit_indices) + : _inclusive(inclusive), _d_bit_indices(d_bit_indices) + { + } + + __device__ size_type operator()(const size_type &i) const + { + auto bit_index = _d_bit_indices[i]; + return word_index(bit_index) + ((_inclusive || intra_word_index(bit_index) == 0) ? 0 : 1); + } +}; + +} // namespace + +namespace detail { + // Bitwise AND of the masks rmm::device_buffer bitmask_and(std::vector const &masks, std::vector const &begin_bits, @@ -378,35 +408,6 @@ rmm::device_buffer bitmask_and(std::vector const &masks, return dest_mask; } -// convert [first_bit_index,last_bit_index) to -// [first_word_index,last_word_index) -struct to_word_index : public thrust::unary_function { - const bool _inclusive = false; - size_type const *const _d_bit_indices = nullptr; - - /** - * @brief Constructor of a functor that converts bit indices to bitmask word - * indices. - * - * @param[in] inclusive Flag that indicates whether bit indices are inclusive - * or exclusive. - * @param[in] d_bit_indices Pointer to an array of bit indices - */ - __host__ to_word_index(bool inclusive, size_type const *d_bit_indices) - : _inclusive(inclusive), _d_bit_indices(d_bit_indices) - { - } - - __device__ size_type operator()(const size_type &i) const - { - auto bit_index = _d_bit_indices[i]; - return word_index(bit_index) + ((_inclusive || intra_word_index(bit_index) == 0) ? 0 : 1); - } -}; - -} // namespace - -namespace detail { cudf::size_type count_set_bits(bitmask_type const *bitmask, size_type start, size_type stop, @@ -662,7 +663,7 @@ rmm::device_buffer bitmask_and(table_view const &view, } } - if (masks.size() > 0) { return bitmask_and(masks, offsets, view.num_rows(), stream, mr); } + if (masks.size() > 0) { return cudf::detail::bitmask_and(masks, offsets, view.num_rows(), stream, mr); } return null_mask; } diff --git a/cpp/src/column/column.cu b/cpp/src/column/column.cu index 3380fda5836..edb0941dbc0 100644 --- a/cpp/src/column/column.cu +++ b/cpp/src/column/column.cu @@ -235,6 +235,13 @@ struct create_column_from_view { { CUDF_FAIL("list_view not supported yet"); } + + template ::value> * = nullptr> + std::unique_ptr operator()() + { + CUDF_FAIL("struct_view not supported yet"); + } }; } // anonymous namespace diff --git a/cpp/src/column/column_factories.cpp b/cpp/src/column/column_factories.cpp index 3e0d07a28dc..70542fc1877 100644 --- a/cpp/src/column/column_factories.cpp +++ b/cpp/src/column/column_factories.cpp @@ -190,6 +190,16 @@ std::unique_ptr column_from_scalar_dispatch::operator() +std::unique_ptr column_from_scalar_dispatch::operator()( + scalar const& value, + size_type size, + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) const +{ + CUDF_FAIL("TODO. struct_view currently not supported."); +} + std::unique_ptr make_column_from_scalar(scalar const& s, size_type size, rmm::mr::device_memory_resource* mr, diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index 1eb41ffae15..6166e89c463 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -115,6 +115,21 @@ struct copy_if_else_functor_impl { } }; +template +struct copy_if_else_functor_impl { + std::unique_ptr operator()(Left const& lhs, + Right const& rhs, + size_type size, + bool left_nullable, + bool right_nullable, + Filter filter, + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) + { + CUDF_FAIL("copy_if_else not supported for struct_view yet"); + } +}; + /** * @brief Functor called by the `type_dispatcher` to invoke copy_if_else on combinations * of column_view and scalar diff --git a/cpp/src/copying/get_element.cu b/cpp/src/copying/get_element.cu index 04edae9366b..c9a11dfb979 100644 --- a/cpp/src/copying/get_element.cu +++ b/cpp/src/copying/get_element.cu @@ -114,6 +114,16 @@ struct get_element_functor { { CUDF_FAIL("get_element_functor not supported for list_view"); } + + template ::value> *p = nullptr> + std::unique_ptr operator()( + column_view const &input, + size_type index, + cudaStream_t stream = 0, + rmm::mr::device_memory_resource *mr = rmm::mr::get_default_resource()) + { + CUDF_FAIL("get_element_functor not supported for struct_view"); + } }; } // namespace diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index 75564359b90..66155530cc4 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -188,6 +189,19 @@ struct column_scalar_scatterer_impl { } }; +template +struct column_scalar_scatterer_impl { + std::unique_ptr operator()(std::unique_ptr const& source, + MapIterator scatter_iter, + size_type scatter_rows, + column_view const& target, + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) const + { + CUDF_FAIL("scatter scalar to struct_view not implemented"); + } +}; + template struct column_scalar_scatterer { template diff --git a/cpp/src/dictionary/search.cu b/cpp/src/dictionary/search.cu index d3cd71360dd..522b25e8bba 100644 --- a/cpp/src/dictionary/search.cu +++ b/cpp/src/dictionary/search.cu @@ -38,7 +38,8 @@ namespace detail { struct find_index_fn { template ::value and - not std::is_same::value>* = nullptr> + not std::is_same::value and + not std::is_same::value>* = nullptr> std::unique_ptr> operator()(dictionary_column_view const& input, scalar const& key, rmm::mr::device_memory_resource* mr, @@ -78,6 +79,15 @@ struct find_index_fn { { CUDF_FAIL("list_view column cannot be the keys column of a dictionary"); } + + template ::value>* = nullptr> + std::unique_ptr> operator()(dictionary_column_view const& input, + scalar const& key, + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) const + { + CUDF_FAIL("struct_view column cannot be the keys column of a dictionary"); + } }; std::unique_ptr> get_index(dictionary_column_view const& dictionary, diff --git a/cpp/src/filling/fill.cu b/cpp/src/filling/fill.cu index 65c912ba2f5..aa5a6d7bf40 100644 --- a/cpp/src/filling/fill.cu +++ b/cpp/src/filling/fill.cu @@ -114,6 +114,16 @@ std::unique_ptr out_of_place_fill_range_dispatch::operator() +std::unique_ptr out_of_place_fill_range_dispatch::operator()( + cudf::size_type begin, + cudf::size_type end, + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) +{ + CUDF_FAIL("struct_view not supported yet"); +} + template <> std::unique_ptr out_of_place_fill_range_dispatch::operator()( cudf::size_type begin, diff --git a/cpp/src/io/csv/csv_gpu.cu b/cpp/src/io/csv/csv_gpu.cu index 3eb53f422d0..37fc475b688 100644 --- a/cpp/src/io/csv/csv_gpu.cu +++ b/cpp/src/io/csv/csv_gpu.cu @@ -402,6 +402,17 @@ __inline__ __device__ cudf::list_view decode_value(const char *data, return cudf::list_view{}; } +// The purpose of this is merely to allow compilation ONLY +// TODO : make this work for csv +template <> +__inline__ __device__ cudf::struct_view decode_value(const char *data, + long start, + long end, + ParseOptions const &opts) +{ + return cudf::struct_view{}; +} + /** * @brief Functor for converting CSV raw data to typed value. */ diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu index d15b4465f47..75ea12c9617 100644 --- a/cpp/src/io/json/json_gpu.cu +++ b/cpp/src/io/json/json_gpu.cu @@ -268,6 +268,14 @@ __inline__ __device__ cudf::list_view decode_value(const char *data, { return cudf::list_view{}; } +template <> +__inline__ __device__ cudf::struct_view decode_value(const char *data, + uint64_t start, + uint64_t end, + ParseOptions const &opts) +{ + return cudf::struct_view{}; +} /** * @brief Functor for converting plain text data to cuDF data type value. diff --git a/cpp/src/reductions/simple.cuh b/cpp/src/reductions/simple.cuh index cf5a30b1c46..d81f662eb3d 100644 --- a/cpp/src/reductions/simple.cuh +++ b/cpp/src/reductions/simple.cuh @@ -21,6 +21,7 @@ #include #include #include +#include "cudf/structs/struct_view.hpp" namespace cudf { namespace reduction { @@ -80,7 +81,8 @@ struct result_type_dispatcher { (std::is_arithmetic::value || std::is_same::value || std::is_same::value) && - !std::is_same::value; + !std::is_same::value && + !std::is_same::value; } public: @@ -117,7 +119,8 @@ struct element_type_dispatcher { !(std::is_same::value || std::is_same::value)) // disable for list views - || std::is_same::value); + || std::is_same::value + || std::is_same::value); } public: diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu index d9b2c50c081..649da52b001 100644 --- a/cpp/src/replace/clamp.cu +++ b/cpp/src/replace/clamp.cu @@ -239,6 +239,19 @@ std::enable_if_t::value, std::unique_ptr +std::enable_if_t::value, std::unique_ptr> clamper( + column_view const& input, + ScalarIterator const& lo_itr, + ScalarIterator const& lo_replace_itr, + ScalarIterator const& hi_itr, + ScalarIterator const& hi_replace_itr, + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) +{ + CUDF_FAIL("struct_view type not supported"); +} + } // namespace template @@ -286,6 +299,19 @@ std::unique_ptr dispatch_clamp::operator()( CUDF_FAIL("clamp for list_view not supported"); } +template <> +std::unique_ptr dispatch_clamp::operator()( + column_view const& input, + scalar const& lo, + scalar const& lo_replace, + scalar const& hi, + scalar const& hi_replace, + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) +{ + CUDF_FAIL("clamp for struct_view not supported"); +} + /** * @copydoc cudf::clamp(column_view const& input, scalar const& lo, diff --git a/cpp/src/scalar/scalar_factories.cpp b/cpp/src/scalar/scalar_factories.cpp index aa0088c4dc0..9cdd1f6beb4 100644 --- a/cpp/src/scalar/scalar_factories.cpp +++ b/cpp/src/scalar/scalar_factories.cpp @@ -101,6 +101,12 @@ std::unique_ptr default_scalar_functor::operator()() CUDF_FAIL("list_view type not supported"); } +template <> +std::unique_ptr default_scalar_functor::operator()() +{ + CUDF_FAIL("list_view type not supported"); +} + } // namespace std::unique_ptr make_default_constructed_scalar(data_type type) diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index 1e0a256076b..6299d237783 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -187,6 +187,16 @@ bool contains_scalar_dispatch::operator()(column_view const& co CUDF_FAIL("list_view type not supported yet"); } + +template <> +bool contains_scalar_dispatch::operator()(column_view const& col, + scalar const& value, + cudaStream_t stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FAIL("struct_view type not supported yet"); +} + } // namespace namespace detail { @@ -279,6 +289,16 @@ std::unique_ptr multi_contains_dispatch::operator()( CUDF_FAIL("list_view type not supported"); } +template <> +std::unique_ptr multi_contains_dispatch::operator()( + column_view const& haystack, + column_view const& needles, + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) +{ + CUDF_FAIL("list_view type not supported"); +} + std::unique_ptr contains(column_view const& haystack, column_view const& needles, rmm::mr::device_memory_resource* mr, diff --git a/cpp/src/structs/structs_column_factories.cu b/cpp/src/structs/structs_column_factories.cu new file mode 100644 index 00000000000..ae29a8246b5 --- /dev/null +++ b/cpp/src/structs/structs_column_factories.cu @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace cudf +{ + namespace + { + // Helper function to superimpose validity of parent struct + // over all member fields (i.e. child columns). + void superimpose_validity( + rmm::device_buffer const& parent_null_mask, + size_type parent_null_count, + std::vector>& children, + cudaStream_t stream, + rmm::mr::device_memory_resource* mr + ) + { + if (parent_null_mask.is_empty()) { + // Struct is not nullable. Children do not need adjustment. + // Bail. + return; + } + + std::for_each( + children.begin(), + children.end(), + [&](std::unique_ptr& p_child) + { + if (!p_child->nullable()) + { + p_child->set_null_mask(std::move(rmm::device_buffer{parent_null_mask, stream, mr})); + p_child->set_null_count(parent_null_count); + } + else { + + auto data_type{p_child->type()}; + auto num_rows{p_child->size()}; + + // All this to reset the null mask. :/ + cudf::column::contents contents{p_child->release()}; + std::vector masks { + reinterpret_cast(parent_null_mask.data()), + reinterpret_cast(contents.null_mask->data())}; + + rmm::device_buffer new_child_mask = cudf::detail::bitmask_and(masks, {0, 0}, num_rows, stream, mr); + + // Recurse for struct members. + // Push down recomputed child mask to child columns of the current child. + if (data_type.id() == cudf::type_id::STRUCT) + { + superimpose_validity(new_child_mask, UNKNOWN_NULL_COUNT, contents.children, stream, mr); + } + + // Reconstitute the column. + p_child.reset( + new column( + data_type, + num_rows, + std::move(*contents.data), + std::move(new_child_mask), + UNKNOWN_NULL_COUNT, + std::move(contents.children) + ) + ); + } + } + ); + } + } + + /// Column factory that adopts child columns. + std::unique_ptr make_structs_column( + size_type num_rows, + std::vector>&& child_columns, + size_type null_count, + rmm::device_buffer&& null_mask, + cudaStream_t stream, + rmm::mr::device_memory_resource* mr) + { + if (null_count > 0) + { + CUDF_EXPECTS(!null_mask.is_empty(), "Column with nulls must be nullable."); + } + + CUDF_EXPECTS( + std::all_of(child_columns.begin(), + child_columns.end(), + [&](auto const& i){ return num_rows == i->size(); }), + "Child columns must have the same number of rows as the Struct column."); + + superimpose_validity(null_mask, null_count, child_columns, stream, mr); + + return std::make_unique( + cudf::data_type{type_id::STRUCT}, + num_rows, + rmm::device_buffer{0, stream, mr}, // Empty data buffer. Structs hold no data. + null_mask, + null_count, + std::move(child_columns) + ); + } + +} // namespace cudf; diff --git a/cpp/src/structs/structs_column_view.cu b/cpp/src/structs/structs_column_view.cu new file mode 100644 index 00000000000..4b2637b63bd --- /dev/null +++ b/cpp/src/structs/structs_column_view.cu @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "cudf/utilities/error.hpp" + + namespace cudf + { + + structs_column_view::structs_column_view(column_view const& rhs) + : column_view{rhs} + { + CUDF_EXPECTS(type().id() == type_id::STRUCT, "structs_column_view only supports struct columns"); + } + + } \ No newline at end of file diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index b8cc3b09213..7854bde4c62 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -533,6 +533,14 @@ set(STRINGS_TEST_SRC ConfigureTest(STRINGS_TEST "${STRINGS_TEST_SRC}") +################################################################################################### +# - structs test ---------------------------------------------------------------------------------- + +set(STRUCTS_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/structs/structs_column_tests.cu") + +ConfigureTest(STRUCTS_TEST "${STRUCTS_TEST_SRC}") + ################################################################################################### # - nvtext test ---------------------------------------------------------------------------------- diff --git a/cpp/tests/structs/structs_column_tests.cu b/cpp/tests/structs/structs_column_tests.cu new file mode 100644 index 00000000000..13627633126 --- /dev/null +++ b/cpp/tests/structs/structs_column_tests.cu @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cudf/column/column_factories.hpp" +#include "cudf/detail/utilities/device_operators.cuh" +#include "cudf/table/table_view.hpp" +#include "cudf/types.hpp" +#include "cudf/utilities/error.hpp" +#include "gtest/gtest.h" +#include "rmm/device_buffer.hpp" +#include "thrust/host_vector.h" +#include "thrust/iterator/counting_iterator.h" +#include "thrust/scan.h" +#include "thrust/sequence.h" + +using vector_of_columns = std::vector>; +using cudf::size_type; + +struct StructColumnWrapperTest : public cudf::test::BaseFixture +{}; + +template +struct TypedStructColumnWrapperTest : public cudf::test::BaseFixture +{}; + +using FixedWidthTypesNotBool = cudf::test::Concat; + +TYPED_TEST_CASE(TypedStructColumnWrapperTest, FixedWidthTypesNotBool); + +// Test simple struct construction without nullmask, through column factory. +// Columns must retain their originally set values. +TYPED_TEST(TypedStructColumnWrapperTest, TestColumnFactoryConstruction) +{ + + auto names_col = cudf::test::strings_column_wrapper{ + "Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Uberwald" + }.release(); + + int num_rows {names_col->size()}; + + auto ages_col = + cudf::test::fixed_width_column_wrapper{ + {48, 27, 25} + }.release(); + + auto is_human_col = + cudf::test::fixed_width_column_wrapper{ + {true, true, false} + }.release(); + + vector_of_columns cols; + cols.push_back(std::move(names_col)); + cols.push_back(std::move(ages_col)); + cols.push_back(std::move(is_human_col)); + + auto struct_col = cudf::make_structs_column(num_rows, std::move(cols), 0, {}); + + EXPECT_EQ(num_rows, struct_col->size()); + + auto struct_col_view {struct_col->view()}; + EXPECT_TRUE( + std::all_of( + struct_col_view.child_begin(), + struct_col_view.child_end(), + [&](auto const& child) { + return child.size() == num_rows; + } + ) + ); + + // Check child columns for exactly correct values. + vector_of_columns expected_children; + expected_children.emplace_back( + cudf::test::strings_column_wrapper{ + "Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Uberwald" + }.release() + ); + expected_children.emplace_back(cudf::test::fixed_width_column_wrapper{ + 48, 27, 25 + }.release()); + expected_children.emplace_back(cudf::test::fixed_width_column_wrapper{ + true, true, false + }.release()); + + std::for_each( + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0)+expected_children.size(), + [&](auto idx) { + cudf::test::expect_columns_equal( + struct_col_view.child(idx), + expected_children[idx]->view() + ); + } + ); +} + + +// Test simple struct construction with nullmasks, through column wrappers. +// When the struct row is null, the child column value must be null. +TYPED_TEST(TypedStructColumnWrapperTest, TestColumnWrapperConstruction) +{ + std::initializer_list names = { + "Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Uberwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant" + }; + + auto num_rows {std::distance(names.begin(), names.end())}; + + auto names_col = cudf::test::strings_column_wrapper{ + names.begin(), + names.end() + }; + + auto ages_col = + cudf::test::fixed_width_column_wrapper{ + {48, 27, 25, 31, 351, 351}, + { 1, 1, 1, 1, 1, 0} + }; + + auto is_human_col = + cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, + { 1, 1, 0, 1, 1, 0} + }; + + auto struct_col = + cudf::test::structs_column_wrapper{ + {names_col, ages_col, is_human_col}, + {1, 1, 1, 0, 1, 1} + }.release(); + + EXPECT_EQ(num_rows, struct_col->size()); + + auto struct_col_view {struct_col->view()}; + EXPECT_TRUE( + std::all_of( + struct_col_view.child_begin(), + struct_col_view.child_end(), + [&](auto const& child) { + return child.size() == num_rows; + } + ) + ); + + // Check child columns for exactly correct values. + vector_of_columns expected_children; + expected_children.emplace_back( + cudf::test::strings_column_wrapper{ + names, + {1, 1, 1, 0, 1, 1} + }.release() + ); + expected_children.emplace_back(cudf::test::fixed_width_column_wrapper{ + {48, 27, 25, 31, 351, 351}, + { 1, 1, 1, 0, 1, 0} + }.release()); + expected_children.emplace_back(cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, + { 1, 1, 0, 0, 1, 0} + }.release()); + + std::for_each( + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0)+expected_children.size(), + [&](auto idx) { + cudf::test::expect_columns_equal( + struct_col_view.child(idx), + expected_children[idx]->view() + ); + } + ); + + auto expected_struct_col = + cudf::test::structs_column_wrapper{std::move(expected_children), {1, 1, 1, 0, 1, 1}}.release(); + + cudf::test::expect_columns_equal(struct_col_view, expected_struct_col->view()); +} + + +TYPED_TEST(TypedStructColumnWrapperTest, TestStructsContainingLists) +{ + // Test structs with two members: + // 1. Name: String + // 2. List: List + + std::initializer_list names = { + "Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Uberwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant" + }; + + auto num_rows {std::distance(names.begin(), names.end())}; + + // `Name` column has all valid values. + auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; + + // `List` member. + auto lists_col = cudf::test::lists_column_wrapper{ + {1,2,3}, + {4}, + {5,6}, + {}, + {7,8}, + {9} + }; + + // Construct a Struct column of 6 rows, with the last two values set to null. + auto struct_col = cudf::test::structs_column_wrapper{ + {names_col, lists_col}, + {1, 1, 1, 1, 0, 0} + }.release(); + + // Check that the last two rows are null for all members. + + // For `Name` member, indices 4 and 5 are null. + auto expected_names_col = cudf::test::strings_column_wrapper{ + names.begin(), + names.end(), + cudf::test::make_counting_transform_iterator(0, [](auto i) { return i<4; } ) + }.release(); + + cudf::test::expect_columns_equal(struct_col->view().child(0), expected_names_col->view()); + + // For the `List` member, indices 4, 5 should be null. + // FIXME: The way list columns are currently compared is not ideal for testing + // structs' list members. Rather than comparing for equivalence, + // column_comparator_impl currently checks that list's data (child) + // and offsets match perfectly. + // This causes two "equivalent lists" to compare unequal, if the data columns + // have different values at an index where the value is null. + auto expected_last_two_lists_col = cudf::test::lists_column_wrapper{ + { + {1,2,3}, + {4}, + {5,6}, + {}, + {7,8}, // Null. + {9} // Null. + }, + cudf::test::make_counting_transform_iterator(0, [](auto i) { return i==0; }) + }.release(); + + // FIXME: Uncomment after list comparison is fixed. + // cudf::test::expect_columns_equal( + // struct_col->view().child(1), + // expected_last_two_lists_col->view()); +} + + +TYPED_TEST(TypedStructColumnWrapperTest, StructOfStructs) +{ + // Struct> + + auto names = { + "Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Uberwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant" + }; + + auto num_rows {std::distance(names.begin(), names.end())}; + + // `Name` column has all valid values. + auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; + + auto ages_col = + cudf::test::fixed_width_column_wrapper{ + {48, 27, 25, 31, 351, 351}, + { 1, 1, 1, 1, 1, 0} + }; + + auto struct_1 = cudf::test::structs_column_wrapper{ + {names_col, ages_col}, + {1, 1, 1, 1, 0, 1} + }; + + auto is_human_col = + cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, + { 1, 1, 0, 1, 1, 0} + }; + + auto struct_2 = cudf::test::structs_column_wrapper{ + {is_human_col, struct_1}, + {0, 1, 1, 1, 1, 1} + }.release(); + + // Verify that the child/grandchild columns are as expected. + auto expected_names_col = cudf::test::strings_column_wrapper( + names.begin(), + names.end(), + cudf::test::make_counting_transform_iterator(0, [](auto i){ return i!=0 && i!=4; })).release(); + + cudf::test::expect_columns_equal(*expected_names_col, struct_2->child(1).child(0)); + + auto expected_ages_col = cudf::test::fixed_width_column_wrapper{ + {48, 27, 25, 31, 351, 351}, + { 0, 1, 1, 1, 0, 0} + }.release(); + cudf::test::expect_columns_equal(*expected_ages_col, struct_2->child(1).child(1)); + + auto expected_bool_col = cudf::test::fixed_width_column_wrapper { + {true, true, false, false, false, false}, + { 0, 1, 0, 1, 1, 0} + }.release(); + + cudf::test::expect_columns_equal(*expected_bool_col, struct_2->child(0)); + + // Verify that recursive struct columns may be compared + // using expect_columns_equal. + + vector_of_columns expected_cols_1; + expected_cols_1.emplace_back(std::move(expected_names_col)); + expected_cols_1.emplace_back(std::move(expected_ages_col)); + auto expected_struct_1 = cudf::test::structs_column_wrapper(std::move(expected_cols_1), {1, 1, 1, 1, 0, 1}).release(); + + vector_of_columns expected_cols_2; + expected_cols_2.emplace_back(std::move(expected_bool_col)); + expected_cols_2.emplace_back(std::move(expected_struct_1)); + auto expected_struct_2 = cudf::test::structs_column_wrapper(std::move(expected_cols_2), {0, 1, 1, 1, 1, 1}).release(); + + cudf::test::expect_columns_equal(*expected_struct_2, *struct_2); +} + + +TEST_F(StructColumnWrapperTest, SimpleTestExpectStructColumnsEqual) +{ + auto ints_col = cudf::test::fixed_width_column_wrapper{{0,1}, {0,0}}.release(); + + vector_of_columns cols; + cols.emplace_back(std::move(ints_col)); + auto structs_col = cudf::test::structs_column_wrapper{std::move(cols)}; + + cudf::test::expect_columns_equal(structs_col, structs_col); +} + + +CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index 636daf07ab7..0d0b8b4b01d 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -15,17 +15,22 @@ */ #include "column_utilities.hpp" +#include "cudf/utilities/type_dispatcher.hpp" #include "detail/column_utilities.hpp" +#include "thrust/iterator/counting_iterator.h" #include #include #include #include +#include +#include #include #include #include #include +#include #include #include @@ -260,6 +265,28 @@ struct column_comparator_impl { } }; +template +struct column_comparator_impl { + void operator()(column_view const& lhs, + column_view const& rhs, + bool print_all_differences, + int depth) + { + std::for_each( + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + lhs.num_children(), + [&](auto i) { + cudf::type_dispatcher( + lhs.child(i).type(), + column_comparator{}, + lhs.child(i), + rhs.child(i), + print_all_differences, depth+1); + } + ); + } +}; + template struct column_comparator { template @@ -395,6 +422,23 @@ std::string get_nested_type_str(cudf::column_view const& view) return cudf::jit::get_type_name(view.type()) + "<" + (lcv.size() > 0 ? get_nested_type_str(lcv.child()) : "") + ">"; } + + if (view.type().id() == cudf::type_id::STRUCT) { + + std::ostringstream out; + + out << cudf::jit::get_type_name(view.type()) + "<"; + // std::for_each(view.child_begin(), view.child_end(), [&out](auto col){ out << get_nested_type_str(col);}); + std::transform( + view.child_begin(), + view.child_end(), + std::ostream_iterator(out, ","), + [&out](auto const col) {return get_nested_type_str(col);} + ); + out << ">"; + return out.str(); + } + return cudf::jit::get_type_name(view.type()); } @@ -536,6 +580,32 @@ struct column_view_printer { out.push_back(tmp); } + + template ::value>* = nullptr> + void operator()(cudf::column_view const& col, + std::vector& out, + std::string const& indent) + { + structs_column_view view{col}; + + std::ostringstream out_stream; + + out_stream << get_nested_type_str(col) << ":\n" + << indent << "Length : " << view.size() << ":\n"; + if (view.has_nulls()) { + out_stream << indent << "Null count: " << view.null_count() << "\n" + << detail::to_string(bitmask_to_host(col), col.size(), indent) << "\n"; + } + + std::transform( + view.child_begin(), view.child_end(), + std::ostream_iterator(out_stream, "\n"), + [&](auto child_column) {return detail::to_string(child_column, ", ", indent + " ");} + ); + + out.push_back(out_stream.str()); + } }; } // namespace diff --git a/cpp/tests/utilities/column_wrapper.hpp b/cpp/tests/utilities/column_wrapper.hpp index bfd8a387d67..d6f95ba2a81 100644 --- a/cpp/tests/utilities/column_wrapper.hpp +++ b/cpp/tests/utilities/column_wrapper.hpp @@ -956,5 +956,57 @@ class lists_column_wrapper : public detail::column_wrapper { bool root = false; }; + +class structs_column_wrapper : public detail::column_wrapper +{ + public: + + structs_column_wrapper(std::vector>&& child_columns, std::vector const& validity = {}) + { + init(std::move(child_columns), validity); + } + + structs_column_wrapper(std::initializer_list> child_column_wrappers, std::vector const& validity = {}) + { + std::vector> child_columns; + child_columns.reserve(child_column_wrappers.size()); + std::transform( + child_column_wrappers.begin(), + child_column_wrappers.end(), + std::back_inserter(child_columns), + [&](auto column_wrapper){return column_wrapper.get().release();} + ); + init(std::move(child_columns), validity); + } + + static auto ref(detail::column_wrapper& column_wrapper) + { + return std::ref(column_wrapper); + } + + private: + + void init(std::vector>&& child_columns, std::vector const& validity) + { + size_type num_rows = child_columns.empty()? 0 : child_columns[0]->size(); + + CUDF_EXPECTS( + std::all_of(child_columns.begin(), child_columns.end(), [&](auto const& p_column) {return p_column->size() == num_rows;}), + "All struct member columns must have the same row count." + ); + + CUDF_EXPECTS( + validity.size() <= 0 || static_cast(validity.size()) == num_rows, + "Validity buffer must have as many elements as rows in the struct column." + ); + + wrapped = cudf::make_structs_column( + num_rows, + std::move(child_columns), + validity.size() <= 0? 0 : cudf::UNKNOWN_NULL_COUNT, + validity.size() <= 0? rmm::device_buffer{0} : detail::make_null_mask(validity.begin(), validity.end())); + } +}; + } // namespace test } // namespace cudf diff --git a/cpp/tests/utilities/scalar_utilities.cu b/cpp/tests/utilities/scalar_utilities.cu index 8f2f9800711..c2e273251be 100644 --- a/cpp/tests/utilities/scalar_utilities.cu +++ b/cpp/tests/utilities/scalar_utilities.cu @@ -70,6 +70,13 @@ void compare_scalar_functor::operator()(cudf::scalar const& lhs CUDF_FAIL("Unsupported scalar compare type: list_view"); } +template <> +void compare_scalar_functor::operator()(cudf::scalar const& lhs, + cudf::scalar const& rhs) +{ + CUDF_FAIL("Unsupported scalar compare type: struct_view"); +} + } // anonymous namespace void expect_scalars_equal(cudf::scalar const& lhs, cudf::scalar const& rhs) From 8ed38e32383a8cd7e7673510a0a5e68ba695af47 Mon Sep 17 00:00:00 2001 From: Mithun RK Date: Thu, 30 Jul 2020 15:57:17 -0700 Subject: [PATCH 02/20] [WIP] [struct] Review comments Changed how null-masks are retrrieved from child columns. --- cpp/src/structs/structs_column_factories.cu | 97 +++++++++++---------- 1 file changed, 50 insertions(+), 47 deletions(-) diff --git a/cpp/src/structs/structs_column_factories.cu b/cpp/src/structs/structs_column_factories.cu index ae29a8246b5..7f27ce7dcb4 100644 --- a/cpp/src/structs/structs_column_factories.cu +++ b/cpp/src/structs/structs_column_factories.cu @@ -17,71 +17,61 @@ #include #include #include +#include +#include "cudf/types.hpp" +#include "thrust/iterator/counting_iterator.h" namespace cudf { namespace { // Helper function to superimpose validity of parent struct - // over all member fields (i.e. child columns). - void superimpose_validity( + // over the specified member (child) column. + void superimpose_parent_nullmask( rmm::device_buffer const& parent_null_mask, size_type parent_null_count, - std::vector>& children, + column& child, cudaStream_t stream, rmm::mr::device_memory_resource* mr ) { - if (parent_null_mask.is_empty()) { - // Struct is not nullable. Children do not need adjustment. - // Bail. - return; + if (!child.nullable()) + { + child.set_null_mask(std::move(rmm::device_buffer{parent_null_mask, stream, mr})); + child.set_null_count(parent_null_count); } + else { - std::for_each( - children.begin(), - children.end(), - [&](std::unique_ptr& p_child) - { - if (!p_child->nullable()) - { - p_child->set_null_mask(std::move(rmm::device_buffer{parent_null_mask, stream, mr})); - p_child->set_null_count(parent_null_count); - } - else { - - auto data_type{p_child->type()}; - auto num_rows{p_child->size()}; + auto data_type{child.type()}; + auto num_rows{child.size()}; - // All this to reset the null mask. :/ - cudf::column::contents contents{p_child->release()}; - std::vector masks { - reinterpret_cast(parent_null_mask.data()), - reinterpret_cast(contents.null_mask->data())}; - - rmm::device_buffer new_child_mask = cudf::detail::bitmask_and(masks, {0, 0}, num_rows, stream, mr); + auto new_child_mask = + cudf::detail::bitmask_and( + { + reinterpret_cast(parent_null_mask.data()), + reinterpret_cast(child.mutable_view().null_mask()) + }, + {0, 0}, + child.size(), + stream, + mr + ); - // Recurse for struct members. - // Push down recomputed child mask to child columns of the current child. - if (data_type.id() == cudf::type_id::STRUCT) + if (child.type().id() == cudf::type_id::STRUCT) + { + std::for_each( + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(child.num_children()), + [&new_child_mask, &child, stream, mr](auto i) { - superimpose_validity(new_child_mask, UNKNOWN_NULL_COUNT, contents.children, stream, mr); + superimpose_parent_nullmask(new_child_mask, UNKNOWN_NULL_COUNT, child.child(i), stream, mr); } - - // Reconstitute the column. - p_child.reset( - new column( - data_type, - num_rows, - std::move(*contents.data), - std::move(new_child_mask), - UNKNOWN_NULL_COUNT, - std::move(contents.children) - ) - ); - } + ); } - ); + + child.set_null_mask(std::move(new_child_mask)); + child.set_null_count(UNKNOWN_NULL_COUNT); + } } } @@ -105,7 +95,20 @@ namespace cudf [&](auto const& i){ return num_rows == i->size(); }), "Child columns must have the same number of rows as the Struct column."); - superimpose_validity(null_mask, null_count, child_columns, stream, mr); + if (!null_mask.is_empty()) + { + std::for_each( + child_columns.begin(), + child_columns.end(), + [& null_mask, + null_count, + stream, + mr + ](auto & p_child) { + superimpose_parent_nullmask(null_mask, null_count, *p_child, stream, mr); + } + ); + } return std::make_unique( cudf::data_type{type_id::STRUCT}, From ec69d741b85a645bf1955b9aa45c02e11db74618 Mon Sep 17 00:00:00 2001 From: Mithun RK Date: Fri, 31 Jul 2020 16:30:57 -0700 Subject: [PATCH 03/20] [struct] Added struct-support issue to CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 09bbc334f52..68cbc33efb9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -60,6 +60,7 @@ - PR #5782 Add Kafka support to custreamz - PR #5642 Add `GroupBy.groups()` - PR #5810 Make Cython subdirs packages and simplify package_data +- PR #5807 Initial support for struct columns ## Improvements From 02616170b8d1e130cf86b51eaf7bc1ca3a0f2bed Mon Sep 17 00:00:00 2001 From: Mithun RK Date: Mon, 3 Aug 2020 21:13:58 -0700 Subject: [PATCH 04/20] [struct] Switched to in-place bitmask_and() --- cpp/include/cudf/detail/null_mask.hpp | 20 ++++++ cpp/src/bitmask/null_mask.cu | 38 +++++++--- cpp/src/structs/structs_column_factories.cu | 80 ++++++++++++++------- 3 files changed, 102 insertions(+), 36 deletions(-) diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index b0b9ebd0359..c2bfb0500cc 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -54,6 +54,26 @@ rmm::device_buffer bitmask_and(std::vector const &masks, size_type mask_size, cudaStream_t stream, rmm::mr::device_memory_resource *mr); + +/** + * @brief Performs a bitwise AND of the specified bitmasks, + * and writes in place to destination + * + * @param dest_mask Destination to which the AND result is written + * @param masks The list of data pointers of the bitmasks to be ANDed + * @param begin_bits The bit offsets from which each mask is to be ANDed + * @param mask_size The number of bits to be ANDed in each mask + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned device_buffer + * @return rmm::device_buffer Output bitmask + */ +void inplace_bitmask_and(bitmask_type* dest_mask, + std::vector const &masks, + std::vector const &begin_bits, + size_type mask_size, + cudaStream_t stream, + rmm::mr::device_memory_resource *mr); + } // namespace detail } // namespace cudf diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 8f47bdeb5df..77a51d74969 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -372,11 +372,12 @@ struct to_word_index : public thrust::unary_function { namespace detail { // Bitwise AND of the masks -rmm::device_buffer bitmask_and(std::vector const &masks, - std::vector const &begin_bits, - size_type mask_size, - cudaStream_t stream, - rmm::mr::device_memory_resource *mr) +void inplace_bitmask_and(bitmask_type* dest_mask, + std::vector const &masks, + std::vector const &begin_bits, + size_type mask_size, + cudaStream_t stream, + rmm::mr::device_memory_resource *mr) { CUDF_EXPECTS(std::all_of(begin_bits.begin(), begin_bits.end(), [](auto b) { return b >= 0; }), "Invalid range."); @@ -384,19 +385,16 @@ rmm::device_buffer bitmask_and(std::vector const &masks, CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }), "Mask pointer cannot be null"); - rmm::device_buffer dest_mask{}; auto num_bytes = bitmask_allocation_size_bytes(mask_size); auto number_of_mask_words = num_bitmask_words(mask_size); - dest_mask = rmm::device_buffer{num_bytes, stream, mr}; - rmm::device_vector d_masks(masks); rmm::device_vector d_begin_bits(begin_bits); cudf::detail::grid_1d config(number_of_mask_words, 256); offset_bitmask_and<<>>( - static_cast(dest_mask.data()), + dest_mask, d_masks.data().get(), d_begin_bits.data().get(), d_masks.size(), @@ -404,6 +402,28 @@ rmm::device_buffer bitmask_and(std::vector const &masks, number_of_mask_words); CHECK_CUDA(stream); +} + +// Bitwise AND of the masks +rmm::device_buffer bitmask_and(std::vector const &masks, + std::vector const &begin_bits, + size_type mask_size, + cudaStream_t stream, + rmm::mr::device_memory_resource *mr) +{ + CUDF_EXPECTS(std::all_of(begin_bits.begin(), begin_bits.end(), [](auto b) { return b >= 0; }), + "Invalid range."); + CUDF_EXPECTS(mask_size > 0, "Invalid bit range."); + CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }), + "Mask pointer cannot be null"); + + rmm::device_buffer dest_mask{}; + auto num_bytes = bitmask_allocation_size_bytes(mask_size); + + auto number_of_mask_words = num_bitmask_words(mask_size); + + dest_mask = rmm::device_buffer{num_bytes, stream, mr}; + inplace_bitmask_and(static_cast(dest_mask.data()), masks, begin_bits, mask_size, stream, mr); return dest_mask; } diff --git a/cpp/src/structs/structs_column_factories.cu b/cpp/src/structs/structs_column_factories.cu index 7f27ce7dcb4..6768c2efb24 100644 --- a/cpp/src/structs/structs_column_factories.cu +++ b/cpp/src/structs/structs_column_factories.cu @@ -28,7 +28,8 @@ namespace cudf // Helper function to superimpose validity of parent struct // over the specified member (child) column. void superimpose_parent_nullmask( - rmm::device_buffer const& parent_null_mask, + bitmask_type const* parent_null_mask, + std::size_t parent_null_mask_size, size_type parent_null_count, column& child, cudaStream_t stream, @@ -37,41 +38,60 @@ namespace cudf { if (!child.nullable()) { - child.set_null_mask(std::move(rmm::device_buffer{parent_null_mask, stream, mr})); + // Child currently has no null mask. Copy parent's null mask. + child.set_null_mask( + std::move( + rmm::device_buffer{parent_null_mask, parent_null_mask_size, stream, mr} + ) + ); child.set_null_count(parent_null_count); } else { + // Child should have a null mask. + // `AND` the child's null mask with the parent's. + auto data_type{child.type()}; auto num_rows{child.size()}; - auto new_child_mask = - cudf::detail::bitmask_and( - { - reinterpret_cast(parent_null_mask.data()), - reinterpret_cast(child.mutable_view().null_mask()) - }, - {0, 0}, - child.size(), - stream, - mr - ); - - if (child.type().id() == cudf::type_id::STRUCT) - { - std::for_each( - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(child.num_children()), - [&new_child_mask, &child, stream, mr](auto i) - { - superimpose_parent_nullmask(new_child_mask, UNKNOWN_NULL_COUNT, child.child(i), stream, mr); - } - ); - } + auto current_child_mask = child.mutable_view().null_mask(); - child.set_null_mask(std::move(new_child_mask)); + cudf::detail::inplace_bitmask_and( + current_child_mask, + { + reinterpret_cast(parent_null_mask), + reinterpret_cast(current_child_mask) + }, + {0, 0}, + child.size(), + stream, + mr + ); child.set_null_count(UNKNOWN_NULL_COUNT); } + + // If the child is also a struct, repeat for all grandchildren. + if (child.type().id() == cudf::type_id::STRUCT) + { + const auto current_child_mask = child.mutable_view().null_mask(); + std::for_each( + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(child.num_children()), + [¤t_child_mask, + &child, + parent_null_mask_size, + stream, + mr](auto i) + { + superimpose_parent_nullmask(current_child_mask, + parent_null_mask_size, + UNKNOWN_NULL_COUNT, + child.child(i), + stream, + mr); + } + ); + } } } @@ -105,7 +125,13 @@ namespace cudf stream, mr ](auto & p_child) { - superimpose_parent_nullmask(null_mask, null_count, *p_child, stream, mr); + superimpose_parent_nullmask( + static_cast(null_mask.data()), + null_mask.size(), + null_count, + *p_child, + stream, + mr); } ); } From 1577732e83076c80971afcb46b0e44364eb15bc3 Mon Sep 17 00:00:00 2001 From: Mithun RK Date: Mon, 3 Aug 2020 21:50:56 -0700 Subject: [PATCH 05/20] [struct] Minor cleanup. Removed redundant error checks. Also, simplified for loops. --- cpp/src/bitmask/null_mask.cu | 8 +------ cpp/src/structs/structs_column_factories.cu | 24 ++++++--------------- 2 files changed, 7 insertions(+), 25 deletions(-) diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 77a51d74969..94e31551cfd 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -371,7 +371,7 @@ struct to_word_index : public thrust::unary_function { namespace detail { -// Bitwise AND of the masks +// Inplace Bitwise AND of the masks void inplace_bitmask_and(bitmask_type* dest_mask, std::vector const &masks, std::vector const &begin_bits, @@ -411,12 +411,6 @@ rmm::device_buffer bitmask_and(std::vector const &masks, cudaStream_t stream, rmm::mr::device_memory_resource *mr) { - CUDF_EXPECTS(std::all_of(begin_bits.begin(), begin_bits.end(), [](auto b) { return b >= 0; }), - "Invalid range."); - CUDF_EXPECTS(mask_size > 0, "Invalid bit range."); - CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }), - "Mask pointer cannot be null"); - rmm::device_buffer dest_mask{}; auto num_bytes = bitmask_allocation_size_bytes(mask_size); diff --git a/cpp/src/structs/structs_column_factories.cu b/cpp/src/structs/structs_column_factories.cu index 6768c2efb24..798cc2fba9c 100644 --- a/cpp/src/structs/structs_column_factories.cu +++ b/cpp/src/structs/structs_column_factories.cu @@ -112,28 +112,16 @@ namespace cudf CUDF_EXPECTS( std::all_of(child_columns.begin(), child_columns.end(), - [&](auto const& i){ return num_rows == i->size(); }), + [&](auto const& child_col){ return num_rows == child_col->size(); }), "Child columns must have the same number of rows as the Struct column."); if (!null_mask.is_empty()) { - std::for_each( - child_columns.begin(), - child_columns.end(), - [& null_mask, - null_count, - stream, - mr - ](auto & p_child) { - superimpose_parent_nullmask( - static_cast(null_mask.data()), - null_mask.size(), - null_count, - *p_child, - stream, - mr); - } - ); + for (auto& child : child_columns) { + superimpose_parent_nullmask( + static_cast(null_mask.data()), null_mask.size(), + null_count, *child, stream, mr); + } } return std::make_unique( From 7e9d6601b2d378028d01d146b9dcb53204aca894 Mon Sep 17 00:00:00 2001 From: Mithun RK Date: Mon, 3 Aug 2020 22:40:56 -0700 Subject: [PATCH 06/20] [struct] Added test for null-mask propagation (For non-null structs --- cpp/tests/structs/structs_column_tests.cu | 80 +++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/cpp/tests/structs/structs_column_tests.cu b/cpp/tests/structs/structs_column_tests.cu index 13627633126..094d098448f 100644 --- a/cpp/tests/structs/structs_column_tests.cu +++ b/cpp/tests/structs/structs_column_tests.cu @@ -286,6 +286,86 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestStructsContainingLists) // expected_last_two_lists_col->view()); } +TYPED_TEST(TypedStructColumnWrapperTest, TestNullMaskPropagationForNonNullStruct) +{ + // Struct> + + auto names = { + "Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Uberwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant" + }; + + auto num_rows {std::distance(names.begin(), names.end())}; + + // `Name` column has all valid values. + auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; + + auto ages_col = + cudf::test::fixed_width_column_wrapper{ + {48, 27, 25, 31, 351, 351}, + { 1, 1, 1, 1, 1, 1} // <-- No nulls in ages_col either. + }; + + auto struct_1 = cudf::test::structs_column_wrapper{ + {names_col, ages_col}, + {1, 1, 1, 1, 1, 1} // <-- Non-null, bottom level struct. + }; + + auto is_human_col = + cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, + { 1, 1, 0, 1, 1, 0} + }; + + auto struct_2 = cudf::test::structs_column_wrapper{ + {is_human_col, struct_1}, + {0, 1, 1, 1, 1, 1} // <-- First row is null, for top-level struct. + }.release(); + + // Verify that the child/grandchild columns are as expected. + + // Top-struct has 1 null (at index 0). + // Bottom-level struct had no nulls, but must now report nulls + auto expected_names_col = cudf::test::strings_column_wrapper( + names.begin(), + names.end(), + cudf::test::make_counting_transform_iterator(0, [](auto i){ return i!=0; })).release(); + + cudf::test::expect_columns_equal(*expected_names_col, struct_2->child(1).child(0)); + + auto expected_ages_col = cudf::test::fixed_width_column_wrapper{ + {48, 27, 25, 31, 351, 351}, + { 0, 1, 1, 1, 1, 1} + }.release(); + cudf::test::expect_columns_equal(*expected_ages_col, struct_2->child(1).child(1)); + + auto expected_bool_col = cudf::test::fixed_width_column_wrapper { + {true, true, false, false, false, false}, + { 0, 1, 0, 1, 1, 0} + }.release(); + + cudf::test::expect_columns_equal(*expected_bool_col, struct_2->child(0)); + + // Verify that recursive struct columns may be compared + // using expect_columns_equal. + + vector_of_columns expected_cols_1; + expected_cols_1.emplace_back(std::move(expected_names_col)); + expected_cols_1.emplace_back(std::move(expected_ages_col)); + auto expected_struct_1 = cudf::test::structs_column_wrapper(std::move(expected_cols_1), {1, 1, 1, 1, 1, 1}).release(); + + vector_of_columns expected_cols_2; + expected_cols_2.emplace_back(std::move(expected_bool_col)); + expected_cols_2.emplace_back(std::move(expected_struct_1)); + auto expected_struct_2 = cudf::test::structs_column_wrapper(std::move(expected_cols_2), {0, 1, 1, 1, 1, 1}).release(); + + cudf::test::expect_columns_equal(*expected_struct_2, *struct_2); +} + TYPED_TEST(TypedStructColumnWrapperTest, StructOfStructs) { From 7e197d66e378a7bdf0de1a3a2bbda43a4badec9d Mon Sep 17 00:00:00 2001 From: Mithun RK Date: Tue, 4 Aug 2020 16:02:49 -0700 Subject: [PATCH 07/20] [struct] More tests and cleanup. Added tests for List. Reordered some struct tests. Removed unnecessary reference_wrapper helper functions. --- cpp/src/structs/structs_column_factories.cu | 5 + cpp/tests/structs/structs_column_tests.cu | 130 +++++++++++++++++--- cpp/tests/utilities/column_wrapper.hpp | 5 - 3 files changed, 116 insertions(+), 24 deletions(-) diff --git a/cpp/src/structs/structs_column_factories.cu b/cpp/src/structs/structs_column_factories.cu index 798cc2fba9c..80b7643e3e2 100644 --- a/cpp/src/structs/structs_column_factories.cu +++ b/cpp/src/structs/structs_column_factories.cu @@ -115,6 +115,11 @@ namespace cudf [&](auto const& child_col){ return num_rows == child_col->size(); }), "Child columns must have the same number of rows as the Struct column."); + CUDF_EXPECTS( + null_count == 0 || !null_mask.is_empty(), + "Struct column with nulls must be nullable." + ); + if (!null_mask.is_empty()) { for (auto& child : child_columns) { diff --git a/cpp/tests/structs/structs_column_tests.cu b/cpp/tests/structs/structs_column_tests.cu index 094d098448f..54b94121ea2 100644 --- a/cpp/tests/structs/structs_column_tests.cu +++ b/cpp/tests/structs/structs_column_tests.cu @@ -286,7 +286,8 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestStructsContainingLists) // expected_last_two_lists_col->view()); } -TYPED_TEST(TypedStructColumnWrapperTest, TestNullMaskPropagationForNonNullStruct) + +TYPED_TEST(TypedStructColumnWrapperTest, StructOfStructs) { // Struct> @@ -307,12 +308,12 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestNullMaskPropagationForNonNullStruct auto ages_col = cudf::test::fixed_width_column_wrapper{ {48, 27, 25, 31, 351, 351}, - { 1, 1, 1, 1, 1, 1} // <-- No nulls in ages_col either. + { 1, 1, 1, 1, 1, 0} }; auto struct_1 = cudf::test::structs_column_wrapper{ {names_col, ages_col}, - {1, 1, 1, 1, 1, 1} // <-- Non-null, bottom level struct. + {1, 1, 1, 1, 0, 1} }; auto is_human_col = @@ -323,23 +324,20 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestNullMaskPropagationForNonNullStruct auto struct_2 = cudf::test::structs_column_wrapper{ {is_human_col, struct_1}, - {0, 1, 1, 1, 1, 1} // <-- First row is null, for top-level struct. + {0, 1, 1, 1, 1, 1} }.release(); // Verify that the child/grandchild columns are as expected. - - // Top-struct has 1 null (at index 0). - // Bottom-level struct had no nulls, but must now report nulls auto expected_names_col = cudf::test::strings_column_wrapper( names.begin(), names.end(), - cudf::test::make_counting_transform_iterator(0, [](auto i){ return i!=0; })).release(); + cudf::test::make_counting_transform_iterator(0, [](auto i){ return i!=0 && i!=4; })).release(); cudf::test::expect_columns_equal(*expected_names_col, struct_2->child(1).child(0)); auto expected_ages_col = cudf::test::fixed_width_column_wrapper{ {48, 27, 25, 31, 351, 351}, - { 0, 1, 1, 1, 1, 1} + { 0, 1, 1, 1, 0, 0} }.release(); cudf::test::expect_columns_equal(*expected_ages_col, struct_2->child(1).child(1)); @@ -356,18 +354,26 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestNullMaskPropagationForNonNullStruct vector_of_columns expected_cols_1; expected_cols_1.emplace_back(std::move(expected_names_col)); expected_cols_1.emplace_back(std::move(expected_ages_col)); - auto expected_struct_1 = cudf::test::structs_column_wrapper(std::move(expected_cols_1), {1, 1, 1, 1, 1, 1}).release(); + auto expected_struct_1 = + cudf::test::structs_column_wrapper( + std::move(expected_cols_1), + {1, 1, 1, 1, 0, 1} + ).release(); vector_of_columns expected_cols_2; expected_cols_2.emplace_back(std::move(expected_bool_col)); expected_cols_2.emplace_back(std::move(expected_struct_1)); - auto expected_struct_2 = cudf::test::structs_column_wrapper(std::move(expected_cols_2), {0, 1, 1, 1, 1, 1}).release(); + auto expected_struct_2 = + cudf::test::structs_column_wrapper( + std::move(expected_cols_2), + {0, 1, 1, 1, 1, 1} + ).release(); cudf::test::expect_columns_equal(*expected_struct_2, *struct_2); } -TYPED_TEST(TypedStructColumnWrapperTest, StructOfStructs) +TYPED_TEST(TypedStructColumnWrapperTest, TestNullMaskPropagationForNonNullStruct) { // Struct> @@ -388,12 +394,12 @@ TYPED_TEST(TypedStructColumnWrapperTest, StructOfStructs) auto ages_col = cudf::test::fixed_width_column_wrapper{ {48, 27, 25, 31, 351, 351}, - { 1, 1, 1, 1, 1, 0} + { 1, 1, 1, 1, 1, 1} // <-- No nulls in ages_col either. }; auto struct_1 = cudf::test::structs_column_wrapper{ {names_col, ages_col}, - {1, 1, 1, 1, 0, 1} + {1, 1, 1, 1, 1, 1} // <-- Non-null, bottom level struct. }; auto is_human_col = @@ -404,20 +410,23 @@ TYPED_TEST(TypedStructColumnWrapperTest, StructOfStructs) auto struct_2 = cudf::test::structs_column_wrapper{ {is_human_col, struct_1}, - {0, 1, 1, 1, 1, 1} + {0, 1, 1, 1, 1, 1} // <-- First row is null, for top-level struct. }.release(); // Verify that the child/grandchild columns are as expected. + + // Top-struct has 1 null (at index 0). + // Bottom-level struct had no nulls, but must now report nulls auto expected_names_col = cudf::test::strings_column_wrapper( names.begin(), names.end(), - cudf::test::make_counting_transform_iterator(0, [](auto i){ return i!=0 && i!=4; })).release(); + cudf::test::make_counting_transform_iterator(0, [](auto i){ return i!=0; })).release(); cudf::test::expect_columns_equal(*expected_names_col, struct_2->child(1).child(0)); auto expected_ages_col = cudf::test::fixed_width_column_wrapper{ {48, 27, 25, 31, 351, 351}, - { 0, 1, 1, 1, 0, 0} + { 0, 1, 1, 1, 1, 1} }.release(); cudf::test::expect_columns_equal(*expected_ages_col, struct_2->child(1).child(1)); @@ -434,17 +443,100 @@ TYPED_TEST(TypedStructColumnWrapperTest, StructOfStructs) vector_of_columns expected_cols_1; expected_cols_1.emplace_back(std::move(expected_names_col)); expected_cols_1.emplace_back(std::move(expected_ages_col)); - auto expected_struct_1 = cudf::test::structs_column_wrapper(std::move(expected_cols_1), {1, 1, 1, 1, 0, 1}).release(); + auto expected_struct_1 = + cudf::test::structs_column_wrapper( + std::move(expected_cols_1), + {1, 1, 1, 1, 1, 1} + ).release(); vector_of_columns expected_cols_2; expected_cols_2.emplace_back(std::move(expected_bool_col)); expected_cols_2.emplace_back(std::move(expected_struct_1)); - auto expected_struct_2 = cudf::test::structs_column_wrapper(std::move(expected_cols_2), {0, 1, 1, 1, 1, 1}).release(); + auto expected_struct_2 = + cudf::test::structs_column_wrapper( + std::move(expected_cols_2), + {0, 1, 1, 1, 1, 1} + ).release(); cudf::test::expect_columns_equal(*expected_struct_2, *struct_2); } +TEST_F(StructColumnWrapperTest, StructWithNoMembers) +{ + auto struct_col {cudf::test::structs_column_wrapper{}.release()}; + EXPECT_TRUE(struct_col->num_children() == 0); + EXPECT_TRUE(struct_col->null_count() == 0); + EXPECT_TRUE(struct_col->size() == 0); +} + + +TYPED_TEST(TypedStructColumnWrapperTest, StructsWithMembersWithDifferentRowCounts) +{ + auto numeric_col_5 = cudf::test::fixed_width_column_wrapper{{1,2,3,4,5}}; + auto bool_col_4 = cudf::test::fixed_width_column_wrapper{1,0,1,0}; + + EXPECT_THROW( + cudf::test::structs_column_wrapper( + {numeric_col_5, bool_col_4} + ), + cudf::logic_error + ); +} + + +TYPED_TEST(TypedStructColumnWrapperTest, TestListsOfStructs) +{ + // Test structs with two members: + // 1. Name: String + // 2. List: List + + std::initializer_list names = { + "Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Uberwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant" + }; + + auto num_rows {std::distance(names.begin(), names.end())}; + + // `Name` column has all valid values. + auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; + + // Numeric column has some nulls. + auto ages_col = + cudf::test::fixed_width_column_wrapper{ + {48, 27, 25, 31, 351, 351}, + { 1, 1, 1, 1, 1, 0} + }; + + auto struct_col = + cudf::test::structs_column_wrapper( + {names_col, ages_col}, + { 1, 1, 1, 0, 0, 1} + ).release(); + + auto expected_unchanged_struct_col = cudf::column(*struct_col); + + auto list_offsets_column = cudf::test::fixed_width_column_wrapper{0,2,3,5,6}.release(); + auto num_list_rows = list_offsets_column->size()-1; + + auto list_col = cudf::make_lists_column(num_list_rows, std::move(list_offsets_column), std::move(struct_col), cudf::UNKNOWN_NULL_COUNT, {}); + + // List of structs was constructed successfully. No exceptions. + // Verify that child columns is as it was set. + + cudf::test::expect_columns_equal(expected_unchanged_struct_col, cudf::lists_column_view(*list_col).child()); + +#ifndef NDEBUG + std::cout << "Printing list col: \n"; + cudf::test::print(*list_col); +#endif +} + + TEST_F(StructColumnWrapperTest, SimpleTestExpectStructColumnsEqual) { auto ints_col = cudf::test::fixed_width_column_wrapper{{0,1}, {0,0}}.release(); diff --git a/cpp/tests/utilities/column_wrapper.hpp b/cpp/tests/utilities/column_wrapper.hpp index 37902ba8f0e..084986a9b64 100644 --- a/cpp/tests/utilities/column_wrapper.hpp +++ b/cpp/tests/utilities/column_wrapper.hpp @@ -992,11 +992,6 @@ class structs_column_wrapper : public detail::column_wrapper init(std::move(child_columns), validity); } - static auto ref(detail::column_wrapper& column_wrapper) - { - return std::ref(column_wrapper); - } - private: void init(std::vector>&& child_columns, std::vector const& validity) From d2216ce44e8908637b549c5096ed47883dfe861d Mon Sep 17 00:00:00 2001 From: Mithun RK Date: Wed, 5 Aug 2020 00:16:50 -0700 Subject: [PATCH 08/20] [struct] Added structs to lists' tests. --- .../lists_column_wrapper_tests.cpp | 222 ++++++++++++++++++ 1 file changed, 222 insertions(+) diff --git a/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp b/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp index 4e39823fcf6..f744d8a27c4 100644 --- a/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp +++ b/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp @@ -21,6 +21,11 @@ #include #include #include +#include "cudf/column/column_factories.hpp" +#include "cudf/types.hpp" +#include "rmm/device_buffer.hpp" +#include "thrust/iterator/counting_iterator.h" +#include "thrust/iterator/transform_iterator.h" struct ListColumnWrapperTest : public cudf::test::BaseFixture { }; @@ -1307,3 +1312,220 @@ TEST_F(ListColumnWrapperTest, MismatchedHierarchies) EXPECT_THROW(expect_failure(), cudf::logic_error); } } + +TYPED_TEST(ListColumnWrapperTestTyped, ListsOfStructs) +{ + using namespace cudf; + + using T = TypeParam; + + auto num_struct_rows = 8; + auto numeric_column = test::fixed_width_column_wrapper {1,2,3,4,5,6,7,8}; + auto bool_column = test::fixed_width_column_wrapper{1,1,1,1,0,0,0,0}; + auto struct_column = test::structs_column_wrapper{{numeric_column, bool_column}}.release(); + EXPECT_EQ(struct_column->size(), num_struct_rows); + EXPECT_TRUE(!struct_column->nullable()); + + auto lists_column_offsets = test::fixed_width_column_wrapper{0,2,4,8}.release(); + auto num_lists = lists_column_offsets->size()-1; + auto lists_column = make_lists_column(num_lists, std::move(lists_column_offsets), std::move(struct_column), UNKNOWN_NULL_COUNT, {}); + + // Check if child column is unchanged. + + auto expected_numeric_column = test::fixed_width_column_wrapper {1,2,3,4,5,6,7,8}; + auto expected_bool_column = test::fixed_width_column_wrapper{1,1,1,1,0,0,0,0}; + auto expected_struct_column = test::structs_column_wrapper{{expected_numeric_column, expected_bool_column}}.release(); + + cudf::test::expect_columns_equal( + *expected_struct_column, + lists_column_view(*lists_column).child() + ); +} + +TYPED_TEST(ListColumnWrapperTestTyped, ListsOfStructsWithValidity) +{ + using namespace cudf; + + using T = TypeParam; + + auto num_struct_rows = 8; + auto numeric_column = test::fixed_width_column_wrapper{ + {1,2,3,4,5,6,7,8}, + {1,1,1,1,0,0,0,0} + }; + auto bool_column = test::fixed_width_column_wrapper{1,1,1,1,0,0,0,0}; + auto struct_column = test::structs_column_wrapper{{numeric_column, bool_column}}.release(); + EXPECT_EQ(struct_column->size(), num_struct_rows); + EXPECT_TRUE(!struct_column->nullable()); + + auto lists_column_offsets = test::fixed_width_column_wrapper{0,2,4,8}.release(); + auto list_null_mask = {1, 1, 0}; + auto num_lists = lists_column_offsets->size()-1; + auto lists_column = make_lists_column( + num_lists, + std::move(lists_column_offsets), + std::move(struct_column), + UNKNOWN_NULL_COUNT, + test::detail::make_null_mask(list_null_mask.begin(), list_null_mask.end()) + ); + + // Check if child column is unchanged. + + auto expected_numeric_column = test::fixed_width_column_wrapper{ + {1,2,3,4,5,6,7,8}, + {1,1,1,1,0,0,0,0} + }; + auto expected_bool_column = test::fixed_width_column_wrapper{1,1,1,1,0,0,0,0}; + auto expected_struct_column = test::structs_column_wrapper{ + {expected_numeric_column, expected_bool_column} + }.release(); + + cudf::test::expect_columns_equal( + *expected_struct_column, + lists_column_view(*lists_column).child() + ); +} + +TYPED_TEST(ListColumnWrapperTestTyped, ListsOfListsOfStructs) +{ + using namespace cudf; + + using T = TypeParam; + + auto num_struct_rows = 8; + auto numeric_column = test::fixed_width_column_wrapper {1,2,3,4,5,6,7,8}; + auto bool_column = test::fixed_width_column_wrapper{1,1,1,1,0,0,0,0}; + auto struct_column = test::structs_column_wrapper{{numeric_column, bool_column}}.release(); + EXPECT_EQ(struct_column->size(), num_struct_rows); + EXPECT_TRUE(!struct_column->nullable()); + + auto lists_column_offsets = test::fixed_width_column_wrapper{0,2,4,8}.release(); + auto num_lists = lists_column_offsets->size()-1; + auto lists_column = make_lists_column(num_lists, std::move(lists_column_offsets), std::move(struct_column), UNKNOWN_NULL_COUNT, {}); + + auto lists_of_lists_column_offsets = test::fixed_width_column_wrapper{0,2,3}.release(); + auto num_lists_of_lists = lists_of_lists_column_offsets->size()-1; + auto lists_of_lists_of_structs_column = + make_lists_column(num_lists_of_lists, std::move(lists_of_lists_column_offsets), std::move(lists_column), UNKNOWN_NULL_COUNT, {}); + + // Check if child column is unchanged. + + auto expected_numeric_column = test::fixed_width_column_wrapper {1,2,3,4,5,6,7,8}; + auto expected_bool_column = test::fixed_width_column_wrapper{1,1,1,1,0,0,0,0}; + auto expected_struct_column = test::structs_column_wrapper{{expected_numeric_column, expected_bool_column}}.release(); + + cudf::test::expect_columns_equal( + *expected_struct_column, + lists_column_view{lists_column_view{*lists_of_lists_of_structs_column}.child()}.child() + ); +} + +TYPED_TEST(ListColumnWrapperTestTyped, ListsOfListsOfStructsWithValidity) +{ + using namespace cudf; + + using T = TypeParam; + + auto num_struct_rows = 8; + auto numeric_column = test::fixed_width_column_wrapper{ + {1,2,3,4,5,6,7,8}, + {1,1,1,1,0,0,0,0} + }; + auto bool_column = test::fixed_width_column_wrapper{1,1,1,1,0,0,0,0}; + auto struct_column = test::structs_column_wrapper{{numeric_column, bool_column}}.release(); + EXPECT_EQ(struct_column->size(), num_struct_rows); + EXPECT_TRUE(!struct_column->nullable()); + + auto lists_column_offsets = test::fixed_width_column_wrapper{0,2,4,8}.release(); + auto num_lists = lists_column_offsets->size()-1; + auto list_null_mask = {1, 1, 0}; + auto lists_column = make_lists_column( + num_lists, + std::move(lists_column_offsets), + std::move(struct_column), + UNKNOWN_NULL_COUNT, + test::detail::make_null_mask(list_null_mask.begin(), list_null_mask.end())); + + auto lists_of_lists_column_offsets = test::fixed_width_column_wrapper{0,2,3}.release(); + auto num_lists_of_lists = lists_of_lists_column_offsets->size()-1; + auto list_of_lists_null_mask = {1,0}; + auto lists_of_lists_of_structs_column = make_lists_column( + num_lists_of_lists, + std::move(lists_of_lists_column_offsets), + std::move(lists_column), + UNKNOWN_NULL_COUNT, + test::detail::make_null_mask(list_of_lists_null_mask.begin(), list_of_lists_null_mask.end())); + + // Check if child column is unchanged. + + auto expected_numeric_column = test::fixed_width_column_wrapper{ + {1,2,3,4,5,6,7,8}, + {1,1,1,1,0,0,0,0} + }; + auto expected_bool_column = test::fixed_width_column_wrapper{1,1,1,1,0,0,0,0}; + auto expected_struct_column = test::structs_column_wrapper{{expected_numeric_column, expected_bool_column}}.release(); + + cudf::test::expect_columns_equal( + *expected_struct_column, + lists_column_view{lists_column_view{*lists_of_lists_of_structs_column}.child()}.child() + ); +} + +TYPED_TEST(ListColumnWrapperTestTyped, LargeListsOfStructsWithValidity) +{ + using namespace cudf; + + using T = TypeParam; + + auto num_struct_rows = 10000; + + // Creating Struct. + auto numeric_column = test::fixed_width_column_wrapper{ + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_struct_rows), + cudf::test::make_counting_transform_iterator(0, [](auto i){ return i%2==1; }) + }; + + auto bool_iterator = thrust::make_transform_iterator(thrust::make_counting_iterator(0), [](auto i) { return i%3==0; }); + auto bool_column = test::fixed_width_column_wrapper(bool_iterator, bool_iterator+num_struct_rows); + + auto struct_validity_iterator = cudf::test::make_counting_transform_iterator(0, [](auto i){ return i%5 == 0; }); + auto struct_column = test::structs_column_wrapper{ + {numeric_column, bool_column}, + std::vector(struct_validity_iterator, struct_validity_iterator+num_struct_rows) + }.release(); + + EXPECT_EQ(struct_column->size(), num_struct_rows); + + // Now, use struct_column to create a list column. + // Each list has 50 elements. + auto num_list_rows = num_struct_rows/50; + auto list_offset_iterator = test::make_counting_transform_iterator(0, [](auto i){ return i*50; }); + auto list_offset_column = test::fixed_width_column_wrapper(list_offset_iterator, list_offset_iterator+num_list_rows+1).release(); + auto list_column = make_lists_column(num_list_rows, std::move(list_offset_column), std::move(struct_column), cudf::UNKNOWN_NULL_COUNT, {}); + + // List construction succeeded. + // Verify that the child is unchanged. + + auto expected_numeric_column = test::fixed_width_column_wrapper{ + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_struct_rows), + cudf::test::make_counting_transform_iterator(0, [](auto i){ return i%2==1; }) + }; + + auto expected_bool_column = test::fixed_width_column_wrapper(bool_iterator, bool_iterator+num_struct_rows); + + auto expected_struct_column = test::structs_column_wrapper{ + {expected_numeric_column, expected_bool_column}, + std::vector(struct_validity_iterator, struct_validity_iterator+num_struct_rows) + }.release(); + + /* + + + cudf::test::expect_columns_equal( + *expected_struct_column, + lists_column_view(*lists_column).child() + ); + */ +} From 1d6b1ea3613090ebe30993f342de51bc84ca62c9 Mon Sep 17 00:00:00 2001 From: Mithun RK Date: Wed, 5 Aug 2020 00:21:42 -0700 Subject: [PATCH 09/20] [struct] Added structs to lists' tests. Removed dead code. Fixed typo in variable name. --- cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp b/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp index f744d8a27c4..eff3d67d413 100644 --- a/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp +++ b/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp @@ -1502,7 +1502,7 @@ TYPED_TEST(ListColumnWrapperTestTyped, LargeListsOfStructsWithValidity) auto num_list_rows = num_struct_rows/50; auto list_offset_iterator = test::make_counting_transform_iterator(0, [](auto i){ return i*50; }); auto list_offset_column = test::fixed_width_column_wrapper(list_offset_iterator, list_offset_iterator+num_list_rows+1).release(); - auto list_column = make_lists_column(num_list_rows, std::move(list_offset_column), std::move(struct_column), cudf::UNKNOWN_NULL_COUNT, {}); + auto lists_column = make_lists_column(num_list_rows, std::move(list_offset_column), std::move(struct_column), cudf::UNKNOWN_NULL_COUNT, {}); // List construction succeeded. // Verify that the child is unchanged. @@ -1520,12 +1520,8 @@ TYPED_TEST(ListColumnWrapperTestTyped, LargeListsOfStructsWithValidity) std::vector(struct_validity_iterator, struct_validity_iterator+num_struct_rows) }.release(); - /* - - cudf::test::expect_columns_equal( *expected_struct_column, lists_column_view(*lists_column).child() ); - */ } From 103790433c7ce41e388bec7a10fd05ceba07ed80 Mon Sep 17 00:00:00 2001 From: Mithun RK Date: Wed, 5 Aug 2020 00:33:45 -0700 Subject: [PATCH 10/20] [struct] Added doc for struct factory method. --- cpp/include/cudf/column/column_factories.hpp | 25 +++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp index 926936da4f8..454c13ef2c2 100644 --- a/cpp/include/cudf/column/column_factories.hpp +++ b/cpp/include/cudf/column/column_factories.hpp @@ -502,9 +502,32 @@ std::unique_ptr make_lists_column( cudaStream_t stream = 0, rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource()); +/** + * @brief Constructs a STRUCT column using specified child columns as members. + * + * Specified child/member columns and null_mask are adopted by resultant + * struct column. + * + * A struct column requires that all specified child columns have the same + * number of rows. A struct column's row count equals that of any/all + * of its child columns. A single struct row at any index is comprised of + * all the individual child column values at the same index, in the order + * specified in the list of child columns. + * + * The specified null mask governs which struct row has a null value. This + * is orthogonal to the null values of individual child columns. + * + * @param num_rows The number of struct values in the struct column. + * @param child_columns The list of child/members that the struct is comprised of. + * @param null_count The number of null values in the struct column. + * @param null_mask The bits specifying the null struct values in the column. + * @param stream Optional stream for use with all memory allocation and device kernels. + * @param mr Optional resource to use for device memory allocation. + * + */ std::unique_ptr make_structs_column( size_type num_rows, - std::vector>&& child_column, + std::vector>&& child_columns, size_type null_count, rmm::device_buffer&& null_mask, cudaStream_t stream = 0, From be7de050b2e73c040cdd1c86903e080ccb7f60d9 Mon Sep 17 00:00:00 2001 From: Mithun RK Date: Thu, 6 Aug 2020 10:03:40 -0700 Subject: [PATCH 11/20] [struct] clang-format --- .../cudf/column/column_device_view.cuh | 2 +- cpp/include/cudf/column/column_factories.hpp | 12 +- cpp/include/cudf/detail/gather.cuh | 10 +- cpp/include/cudf/detail/null_mask.hpp | 20 +- cpp/include/cudf/structs/struct_view.hpp | 3 +- .../cudf/structs/structs_column_view.hpp | 48 +- cpp/include/cudf/utilities/traits.hpp | 3 +- cpp/src/bitmask/null_mask.cu | 9 +- cpp/src/copying/get_element.cu | 1 - cpp/src/copying/scatter.cu | 2 +- cpp/src/dictionary/search.cu | 3 +- cpp/src/reductions/simple.cuh | 4 +- cpp/src/replace/clamp.cu | 15 +- cpp/src/search/search.cu | 7 +- cpp/src/structs/structs_column_factories.cu | 191 ++++--- cpp/src/structs/structs_column_view.cu | 16 +- cpp/tests/structs/structs_column_tests.cu | 467 +++++++----------- cpp/tests/utilities/column_utilities.cu | 50 +- cpp/tests/utilities/column_wrapper.hpp | 82 ++- cpp/tests/utilities/scalar_utilities.cu | 2 +- .../lists_column_wrapper_tests.cpp | 222 +++++---- 21 files changed, 512 insertions(+), 657 deletions(-) diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 666132ff9bd..fd225aec472 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -19,9 +19,9 @@ #include #include #include -#include #include #include +#include #include #include #include diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp index 454c13ef2c2..f41ef2b5c7c 100644 --- a/cpp/include/cudf/column/column_factories.hpp +++ b/cpp/include/cudf/column/column_factories.hpp @@ -504,26 +504,26 @@ std::unique_ptr make_lists_column( /** * @brief Constructs a STRUCT column using specified child columns as members. - * + * * Specified child/member columns and null_mask are adopted by resultant * struct column. - * + * * A struct column requires that all specified child columns have the same * number of rows. A struct column's row count equals that of any/all - * of its child columns. A single struct row at any index is comprised of + * of its child columns. A single struct row at any index is comprised of * all the individual child column values at the same index, in the order * specified in the list of child columns. - * + * * The specified null mask governs which struct row has a null value. This * is orthogonal to the null values of individual child columns. - * + * * @param num_rows The number of struct values in the struct column. * @param child_columns The list of child/members that the struct is comprised of. * @param null_count The number of null values in the struct column. * @param null_mask The bits specifying the null struct values in the column. * @param stream Optional stream for use with all memory allocation and device kernels. * @param mr Optional resource to use for device memory allocation. - * + * */ std::unique_ptr make_structs_column( size_type num_rows, diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index 8149a6c30ed..5f17ec31283 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -360,11 +360,11 @@ struct column_gatherer_impl { template struct column_gatherer_impl { std::unique_ptr operator()(column_view const& column, - MapItRoot gather_map_begin, - MapItRoot gather_map_end, - bool nullify_out_of_bounds, - cudaStream_t stream, - rmm::mr::device_memory_resource* mr) + MapItRoot gather_map_begin, + MapItRoot gather_map_end, + bool nullify_out_of_bounds, + cudaStream_t stream, + rmm::mr::device_memory_resource* mr) { CUDF_FAIL("Gather not yet supported on struct_view."); } diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index c2bfb0500cc..1c3046735e7 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -26,8 +26,8 @@ namespace detail { * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::vector segmented_count_set_bits(bitmask_type const* bitmask, - std::vector const& indices, +std::vector segmented_count_set_bits(bitmask_type const *bitmask, + std::vector const &indices, cudaStream_t stream = 0); /** @@ -35,19 +35,19 @@ std::vector segmented_count_set_bits(bitmask_type const* bitmask, * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::vector segmented_count_unset_bits(bitmask_type const* bitmask, - std::vector const& indices, +std::vector segmented_count_unset_bits(bitmask_type const *bitmask, + std::vector const &indices, cudaStream_t stream = 0); /** * @brief Returns a bitwise AND of the specified bitmasks - * + * * @param masks The list of data pointers of the bitmasks to be ANDed * @param begin_bits The bit offsets from which each mask is to be ANDed * @param mask_size The number of bits to be ANDed in each mask * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned device_buffer - * @return rmm::device_buffer Output bitmask + * @return rmm::device_buffer Output bitmask */ rmm::device_buffer bitmask_and(std::vector const &masks, std::vector const &begin_bits, @@ -56,18 +56,18 @@ rmm::device_buffer bitmask_and(std::vector const &masks, rmm::mr::device_memory_resource *mr); /** - * @brief Performs a bitwise AND of the specified bitmasks, + * @brief Performs a bitwise AND of the specified bitmasks, * and writes in place to destination - * + * * @param dest_mask Destination to which the AND result is written * @param masks The list of data pointers of the bitmasks to be ANDed * @param begin_bits The bit offsets from which each mask is to be ANDed * @param mask_size The number of bits to be ANDed in each mask * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned device_buffer - * @return rmm::device_buffer Output bitmask + * @return rmm::device_buffer Output bitmask */ -void inplace_bitmask_and(bitmask_type* dest_mask, +void inplace_bitmask_and(bitmask_type *dest_mask, std::vector const &masks, std::vector const &begin_bits, size_type mask_size, diff --git a/cpp/include/cudf/structs/struct_view.hpp b/cpp/include/cudf/structs/struct_view.hpp index d9733b29406..f15393789f0 100644 --- a/cpp/include/cudf/structs/struct_view.hpp +++ b/cpp/include/cudf/structs/struct_view.hpp @@ -24,12 +24,11 @@ namespace cudf { /** * @brief A non-owning, immutable view of device data that represents - * a struct with fields of arbitrary types (including primitives, lists, + * a struct with fields of arbitrary types (including primitives, lists, * and other structs) * */ class struct_view { - }; } // namespace cudf \ No newline at end of file diff --git a/cpp/include/cudf/structs/structs_column_view.hpp b/cpp/include/cudf/structs/structs_column_view.hpp index 31063d6aeee..8e10ef0e1ea 100644 --- a/cpp/include/cudf/structs/structs_column_view.hpp +++ b/cpp/include/cudf/structs/structs_column_view.hpp @@ -20,29 +20,25 @@ namespace cudf { - -class structs_column_view : private column_view -{ - - public: - - // Foundation members: - structs_column_view(structs_column_view const&) = default; - structs_column_view(structs_column_view &&) = default; - ~structs_column_view() = default; - structs_column_view& operator=(structs_column_view const&) = default; - structs_column_view& operator=(structs_column_view &&) = default; - - explicit structs_column_view(column_view const& rhs); - - using column_view::has_nulls; - using column_view::null_count; - using column_view::null_mask; - using column_view::offset; - using column_view::size; - using column_view::child_begin; - using column_view::child_end; - -}; // class structs_column_view; - -} // namespace cudf; +class structs_column_view : private column_view { + public: + // Foundation members: + structs_column_view(structs_column_view const&) = default; + structs_column_view(structs_column_view&&) = default; + ~structs_column_view() = default; + structs_column_view& operator=(structs_column_view const&) = default; + structs_column_view& operator=(structs_column_view&&) = default; + + explicit structs_column_view(column_view const& rhs); + + using column_view::child_begin; + using column_view::child_end; + using column_view::has_nulls; + using column_view::null_count; + using column_view::null_mask; + using column_view::offset; + using column_view::size; + +}; // class structs_column_view; + +} // namespace cudf diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index 580e2c2536a..8ae5e6b2f3f 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -531,8 +531,7 @@ constexpr inline bool is_compound(data_type type) template constexpr inline bool is_nested() { - return std::is_same::value - || std::is_same::value; + return std::is_same::value || std::is_same::value; } struct is_nested_impl { diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 94e31551cfd..0fada446dea 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -372,7 +372,7 @@ struct to_word_index : public thrust::unary_function { namespace detail { // Inplace Bitwise AND of the masks -void inplace_bitmask_and(bitmask_type* dest_mask, +void inplace_bitmask_and(bitmask_type *dest_mask, std::vector const &masks, std::vector const &begin_bits, size_type mask_size, @@ -417,7 +417,8 @@ rmm::device_buffer bitmask_and(std::vector const &masks, auto number_of_mask_words = num_bitmask_words(mask_size); dest_mask = rmm::device_buffer{num_bytes, stream, mr}; - inplace_bitmask_and(static_cast(dest_mask.data()), masks, begin_bits, mask_size, stream, mr); + inplace_bitmask_and( + static_cast(dest_mask.data()), masks, begin_bits, mask_size, stream, mr); return dest_mask; } @@ -677,7 +678,9 @@ rmm::device_buffer bitmask_and(table_view const &view, } } - if (masks.size() > 0) { return cudf::detail::bitmask_and(masks, offsets, view.num_rows(), stream, mr); } + if (masks.size() > 0) { + return cudf::detail::bitmask_and(masks, offsets, view.num_rows(), stream, mr); + } return null_mask; } diff --git a/cpp/src/copying/get_element.cu b/cpp/src/copying/get_element.cu index 4dbf9383d98..c8784765a6c 100644 --- a/cpp/src/copying/get_element.cu +++ b/cpp/src/copying/get_element.cu @@ -144,7 +144,6 @@ struct get_element_functor { { CUDF_FAIL("get_element_functor not supported for struct_view"); } - }; } // namespace diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index 66155530cc4..c1ec86337f5 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -25,10 +25,10 @@ #include #include #include -#include #include #include #include +#include #include #include diff --git a/cpp/src/dictionary/search.cu b/cpp/src/dictionary/search.cu index 522b25e8bba..8f1871bd2fc 100644 --- a/cpp/src/dictionary/search.cu +++ b/cpp/src/dictionary/search.cu @@ -80,7 +80,8 @@ struct find_index_fn { CUDF_FAIL("list_view column cannot be the keys column of a dictionary"); } - template ::value>* = nullptr> + template ::value>* = nullptr> std::unique_ptr> operator()(dictionary_column_view const& input, scalar const& key, rmm::mr::device_memory_resource* mr, diff --git a/cpp/src/reductions/simple.cuh b/cpp/src/reductions/simple.cuh index b0654163384..f69c38bb6cb 100644 --- a/cpp/src/reductions/simple.cuh +++ b/cpp/src/reductions/simple.cuh @@ -121,8 +121,8 @@ struct element_type_dispatcher { !(std::is_same::value || std::is_same::value)) // disable for list/struct views - || std::is_same::value - || std::is_same::value); + || std::is_same::value || + std::is_same::value); } public: diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu index bb686723061..e7628f966fc 100644 --- a/cpp/src/replace/clamp.cu +++ b/cpp/src/replace/clamp.cu @@ -326,14 +326,13 @@ std::unique_ptr dispatch_clamp::operator()( } template <> -std::unique_ptr dispatch_clamp::operator()( - column_view const& input, - scalar const& lo, - scalar const& lo_replace, - scalar const& hi, - scalar const& hi_replace, - rmm::mr::device_memory_resource* mr, - cudaStream_t stream) +std::unique_ptr dispatch_clamp::operator()(column_view const& input, + scalar const& lo, + scalar const& lo_replace, + scalar const& hi, + scalar const& hi_replace, + rmm::mr::device_memory_resource* mr, + cudaStream_t stream) { CUDF_FAIL("clamp for struct_view not supported"); } diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index 6299d237783..0938a406e7d 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -187,12 +187,11 @@ bool contains_scalar_dispatch::operator()(column_view const& co CUDF_FAIL("list_view type not supported yet"); } - template <> bool contains_scalar_dispatch::operator()(column_view const& col, - scalar const& value, - cudaStream_t stream, - rmm::mr::device_memory_resource* mr) + scalar const& value, + cudaStream_t stream, + rmm::mr::device_memory_resource* mr) { CUDF_FAIL("struct_view type not supported yet"); } diff --git a/cpp/src/structs/structs_column_factories.cu b/cpp/src/structs/structs_column_factories.cu index 80b7643e3e2..3424ce3fe0b 100644 --- a/cpp/src/structs/structs_column_factories.cu +++ b/cpp/src/structs/structs_column_factories.cu @@ -14,129 +14,104 @@ * limitations under the License. */ +#include #include #include -#include #include #include "cudf/types.hpp" #include "thrust/iterator/counting_iterator.h" -namespace cudf +namespace cudf { +namespace { +// Helper function to superimpose validity of parent struct +// over the specified member (child) column. +void superimpose_parent_nullmask(bitmask_type const* parent_null_mask, + std::size_t parent_null_mask_size, + size_type parent_null_count, + column& child, + cudaStream_t stream, + rmm::mr::device_memory_resource* mr) { - namespace - { - // Helper function to superimpose validity of parent struct - // over the specified member (child) column. - void superimpose_parent_nullmask( - bitmask_type const* parent_null_mask, - std::size_t parent_null_mask_size, - size_type parent_null_count, - column& child, - cudaStream_t stream, - rmm::mr::device_memory_resource* mr - ) - { - if (!child.nullable()) - { - // Child currently has no null mask. Copy parent's null mask. - child.set_null_mask( - std::move( - rmm::device_buffer{parent_null_mask, parent_null_mask_size, stream, mr} - ) - ); - child.set_null_count(parent_null_count); - } - else { - - // Child should have a null mask. - // `AND` the child's null mask with the parent's. + if (!child.nullable()) { + // Child currently has no null mask. Copy parent's null mask. + child.set_null_mask( + std::move(rmm::device_buffer{parent_null_mask, parent_null_mask_size, stream, mr})); + child.set_null_count(parent_null_count); + } else { + // Child should have a null mask. + // `AND` the child's null mask with the parent's. - auto data_type{child.type()}; - auto num_rows{child.size()}; + auto data_type{child.type()}; + auto num_rows{child.size()}; - auto current_child_mask = child.mutable_view().null_mask(); + auto current_child_mask = child.mutable_view().null_mask(); - cudf::detail::inplace_bitmask_and( - current_child_mask, - { - reinterpret_cast(parent_null_mask), - reinterpret_cast(current_child_mask) - }, - {0, 0}, - child.size(), - stream, - mr - ); - child.set_null_count(UNKNOWN_NULL_COUNT); - } - - // If the child is also a struct, repeat for all grandchildren. - if (child.type().id() == cudf::type_id::STRUCT) - { - const auto current_child_mask = child.mutable_view().null_mask(); - std::for_each( - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(child.num_children()), - [¤t_child_mask, - &child, - parent_null_mask_size, - stream, - mr](auto i) - { - superimpose_parent_nullmask(current_child_mask, - parent_null_mask_size, - UNKNOWN_NULL_COUNT, - child.child(i), - stream, - mr); - } - ); - } - } + cudf::detail::inplace_bitmask_and(current_child_mask, + {reinterpret_cast(parent_null_mask), + reinterpret_cast(current_child_mask)}, + {0, 0}, + child.size(), + stream, + mr); + child.set_null_count(UNKNOWN_NULL_COUNT); } - /// Column factory that adopts child columns. - std::unique_ptr make_structs_column( - size_type num_rows, - std::vector>&& child_columns, - size_type null_count, - rmm::device_buffer&& null_mask, - cudaStream_t stream, - rmm::mr::device_memory_resource* mr) - { - if (null_count > 0) - { - CUDF_EXPECTS(!null_mask.is_empty(), "Column with nulls must be nullable."); - } + // If the child is also a struct, repeat for all grandchildren. + if (child.type().id() == cudf::type_id::STRUCT) { + const auto current_child_mask = child.mutable_view().null_mask(); + std::for_each(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(child.num_children()), + [¤t_child_mask, &child, parent_null_mask_size, stream, mr](auto i) { + superimpose_parent_nullmask(current_child_mask, + parent_null_mask_size, + UNKNOWN_NULL_COUNT, + child.child(i), + stream, + mr); + }); + } +} +} // namespace - CUDF_EXPECTS( - std::all_of(child_columns.begin(), - child_columns.end(), - [&](auto const& child_col){ return num_rows == child_col->size(); }), - "Child columns must have the same number of rows as the Struct column."); +/// Column factory that adopts child columns. +std::unique_ptr make_structs_column( + size_type num_rows, + std::vector>&& child_columns, + size_type null_count, + rmm::device_buffer&& null_mask, + cudaStream_t stream, + rmm::mr::device_memory_resource* mr) +{ + if (null_count > 0) { + CUDF_EXPECTS(!null_mask.is_empty(), "Column with nulls must be nullable."); + } - CUDF_EXPECTS( - null_count == 0 || !null_mask.is_empty(), - "Struct column with nulls must be nullable." - ); + CUDF_EXPECTS(std::all_of(child_columns.begin(), + child_columns.end(), + [&](auto const& child_col) { return num_rows == child_col->size(); }), + "Child columns must have the same number of rows as the Struct column."); - if (!null_mask.is_empty()) - { - for (auto& child : child_columns) { - superimpose_parent_nullmask( - static_cast(null_mask.data()), null_mask.size(), - null_count, *child, stream, mr); - } - } + CUDF_EXPECTS(null_count == 0 || !null_mask.is_empty(), + "Struct column with nulls must be nullable."); - return std::make_unique( - cudf::data_type{type_id::STRUCT}, - num_rows, - rmm::device_buffer{0, stream, mr}, // Empty data buffer. Structs hold no data. - null_mask, - null_count, - std::move(child_columns) - ); + if (!null_mask.is_empty()) { + for (auto& child : child_columns) { + superimpose_parent_nullmask(static_cast(null_mask.data()), + null_mask.size(), + null_count, + *child, + stream, + mr); + } } -} // namespace cudf; + return std::make_unique( + cudf::data_type{type_id::STRUCT}, + num_rows, + rmm::device_buffer{0, stream, mr}, // Empty data buffer. Structs hold no data. + null_mask, + null_count, + std::move(child_columns)); +} + +} // namespace cudf diff --git a/cpp/src/structs/structs_column_view.cu b/cpp/src/structs/structs_column_view.cu index 4b2637b63bd..08370c2fde2 100644 --- a/cpp/src/structs/structs_column_view.cu +++ b/cpp/src/structs/structs_column_view.cu @@ -14,17 +14,15 @@ * limitations under the License. */ -#include #include +#include #include "cudf/utilities/error.hpp" - namespace cudf - { +namespace cudf { - structs_column_view::structs_column_view(column_view const& rhs) - : column_view{rhs} - { - CUDF_EXPECTS(type().id() == type_id::STRUCT, "structs_column_view only supports struct columns"); - } +structs_column_view::structs_column_view(column_view const& rhs) : column_view{rhs} +{ + CUDF_EXPECTS(type().id() == type_id::STRUCT, "structs_column_view only supports struct columns"); +} - } \ No newline at end of file +} // namespace cudf \ No newline at end of file diff --git a/cpp/tests/structs/structs_column_tests.cu b/cpp/tests/structs/structs_column_tests.cu index 54b94121ea2..384da7e8e29 100644 --- a/cpp/tests/structs/structs_column_tests.cu +++ b/cpp/tests/structs/structs_column_tests.cu @@ -14,14 +14,14 @@ * limitations under the License. */ +#include #include #include -#include +#include #include #include #include -#include #include #include #include @@ -43,12 +43,12 @@ using vector_of_columns = std::vector>; using cudf::size_type; -struct StructColumnWrapperTest : public cudf::test::BaseFixture -{}; +struct StructColumnWrapperTest : public cudf::test::BaseFixture { +}; -template -struct TypedStructColumnWrapperTest : public cudf::test::BaseFixture -{}; +template +struct TypedStructColumnWrapperTest : public cudf::test::BaseFixture { +}; using FixedWidthTypesNotBool = cudf::test::Concatsize()}; - int num_rows {names_col->size()}; + auto ages_col = cudf::test::fixed_width_column_wrapper{{48, 27, 25}}.release(); - auto ages_col = - cudf::test::fixed_width_column_wrapper{ - {48, 27, 25} - }.release(); - - auto is_human_col = - cudf::test::fixed_width_column_wrapper{ - {true, true, false} - }.release(); + auto is_human_col = cudf::test::fixed_width_column_wrapper{{true, true, false}}.release(); vector_of_columns cols; cols.push_back(std::move(names_col)); @@ -88,402 +80,303 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestColumnFactoryConstruction) EXPECT_EQ(num_rows, struct_col->size()); - auto struct_col_view {struct_col->view()}; - EXPECT_TRUE( - std::all_of( - struct_col_view.child_begin(), - struct_col_view.child_end(), - [&](auto const& child) { - return child.size() == num_rows; - } - ) - ); + auto struct_col_view{struct_col->view()}; + EXPECT_TRUE(std::all_of(struct_col_view.child_begin(), + struct_col_view.child_end(), + [&](auto const& child) { return child.size() == num_rows; })); // Check child columns for exactly correct values. vector_of_columns expected_children; + expected_children.emplace_back(cudf::test::strings_column_wrapper{ + "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Uberwald"} + .release()); expected_children.emplace_back( - cudf::test::strings_column_wrapper{ - "Samuel Vimes", - "Carrot Ironfoundersson", - "Angua von Uberwald" - }.release() - ); - expected_children.emplace_back(cudf::test::fixed_width_column_wrapper{ - 48, 27, 25 - }.release()); - expected_children.emplace_back(cudf::test::fixed_width_column_wrapper{ - true, true, false - }.release()); - - std::for_each( - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(0)+expected_children.size(), - [&](auto idx) { - cudf::test::expect_columns_equal( - struct_col_view.child(idx), - expected_children[idx]->view() - ); - } - ); + cudf::test::fixed_width_column_wrapper{48, 27, 25}.release()); + expected_children.emplace_back( + cudf::test::fixed_width_column_wrapper{true, true, false}.release()); + + std::for_each(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + expected_children.size(), + [&](auto idx) { + cudf::test::expect_columns_equal(struct_col_view.child(idx), + expected_children[idx]->view()); + }); } - // Test simple struct construction with nullmasks, through column wrappers. // When the struct row is null, the child column value must be null. TYPED_TEST(TypedStructColumnWrapperTest, TestColumnWrapperConstruction) { - std::initializer_list names = { - "Samuel Vimes", - "Carrot Ironfoundersson", - "Angua von Uberwald", - "Cheery Littlebottom", - "Detritus", - "Mr Slant" - }; + std::initializer_list names = {"Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Uberwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant"}; - auto num_rows {std::distance(names.begin(), names.end())}; + auto num_rows{std::distance(names.begin(), names.end())}; - auto names_col = cudf::test::strings_column_wrapper{ - names.begin(), - names.end() - }; + auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; + + auto ages_col = cudf::test::fixed_width_column_wrapper{{48, 27, 25, 31, 351, 351}, + {1, 1, 1, 1, 1, 0}}; - auto ages_col = - cudf::test::fixed_width_column_wrapper{ - {48, 27, 25, 31, 351, 351}, - { 1, 1, 1, 1, 1, 0} - }; - - auto is_human_col = - cudf::test::fixed_width_column_wrapper{ - {true, true, false, false, false, false}, - { 1, 1, 0, 1, 1, 0} - }; - - auto struct_col = - cudf::test::structs_column_wrapper{ - {names_col, ages_col, is_human_col}, - {1, 1, 1, 0, 1, 1} - }.release(); + auto is_human_col = cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}}; + + auto struct_col = + cudf::test::structs_column_wrapper{{names_col, ages_col, is_human_col}, {1, 1, 1, 0, 1, 1}} + .release(); EXPECT_EQ(num_rows, struct_col->size()); - auto struct_col_view {struct_col->view()}; - EXPECT_TRUE( - std::all_of( - struct_col_view.child_begin(), - struct_col_view.child_end(), - [&](auto const& child) { - return child.size() == num_rows; - } - ) - ); + auto struct_col_view{struct_col->view()}; + EXPECT_TRUE(std::all_of(struct_col_view.child_begin(), + struct_col_view.child_end(), + [&](auto const& child) { return child.size() == num_rows; })); // Check child columns for exactly correct values. vector_of_columns expected_children; expected_children.emplace_back( - cudf::test::strings_column_wrapper{ - names, - {1, 1, 1, 0, 1, 1} - }.release() - ); + cudf::test::strings_column_wrapper{names, {1, 1, 1, 0, 1, 1}}.release()); expected_children.emplace_back(cudf::test::fixed_width_column_wrapper{ {48, 27, 25, 31, 351, 351}, - { 1, 1, 1, 0, 1, 0} - }.release()); + {1, 1, 1, 0, 1, 0}}.release()); expected_children.emplace_back(cudf::test::fixed_width_column_wrapper{ {true, true, false, false, false, false}, - { 1, 1, 0, 0, 1, 0} - }.release()); - - std::for_each( - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(0)+expected_children.size(), - [&](auto idx) { - cudf::test::expect_columns_equal( - struct_col_view.child(idx), - expected_children[idx]->view() - ); - } - ); + {1, 1, 0, 0, 1, 0}}.release()); + + std::for_each(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + expected_children.size(), + [&](auto idx) { + cudf::test::expect_columns_equal(struct_col_view.child(idx), + expected_children[idx]->view()); + }); auto expected_struct_col = cudf::test::structs_column_wrapper{std::move(expected_children), {1, 1, 1, 0, 1, 1}}.release(); - cudf::test::expect_columns_equal(struct_col_view, expected_struct_col->view()); + cudf::test::expect_columns_equal(struct_col_view, expected_struct_col->view()); } - TYPED_TEST(TypedStructColumnWrapperTest, TestStructsContainingLists) { // Test structs with two members: // 1. Name: String // 2. List: List - std::initializer_list names = { - "Samuel Vimes", - "Carrot Ironfoundersson", - "Angua von Uberwald", - "Cheery Littlebottom", - "Detritus", - "Mr Slant" - }; + std::initializer_list names = {"Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Uberwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant"}; - auto num_rows {std::distance(names.begin(), names.end())}; + auto num_rows{std::distance(names.begin(), names.end())}; // `Name` column has all valid values. auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; // `List` member. - auto lists_col = cudf::test::lists_column_wrapper{ - {1,2,3}, - {4}, - {5,6}, - {}, - {7,8}, - {9} - }; + auto lists_col = + cudf::test::lists_column_wrapper{{1, 2, 3}, {4}, {5, 6}, {}, {7, 8}, {9}}; // Construct a Struct column of 6 rows, with the last two values set to null. - auto struct_col = cudf::test::structs_column_wrapper{ - {names_col, lists_col}, - {1, 1, 1, 1, 0, 0} - }.release(); + auto struct_col = + cudf::test::structs_column_wrapper{{names_col, lists_col}, {1, 1, 1, 1, 0, 0}}.release(); // Check that the last two rows are null for all members. - + // For `Name` member, indices 4 and 5 are null. auto expected_names_col = cudf::test::strings_column_wrapper{ - names.begin(), - names.end(), - cudf::test::make_counting_transform_iterator(0, [](auto i) { return i<4; } ) - }.release(); + names.begin(), names.end(), cudf::test::make_counting_transform_iterator(0, [](auto i) { + return i < 4; + })}.release(); cudf::test::expect_columns_equal(struct_col->view().child(0), expected_names_col->view()); - + // For the `List` member, indices 4, 5 should be null. // FIXME: The way list columns are currently compared is not ideal for testing - // structs' list members. Rather than comparing for equivalence, + // structs' list members. Rather than comparing for equivalence, // column_comparator_impl currently checks that list's data (child) // and offsets match perfectly. // This causes two "equivalent lists" to compare unequal, if the data columns // have different values at an index where the value is null. auto expected_last_two_lists_col = cudf::test::lists_column_wrapper{ { - {1,2,3}, + {1, 2, 3}, {4}, - {5,6}, + {5, 6}, {}, - {7,8}, // Null. - {9} // Null. + {7, 8}, // Null. + {9} // Null. }, - cudf::test::make_counting_transform_iterator(0, [](auto i) { return i==0; }) - }.release(); - + cudf::test::make_counting_transform_iterator(0, [](auto i) { + return i == 0; + })}.release(); + // FIXME: Uncomment after list comparison is fixed. // cudf::test::expect_columns_equal( - // struct_col->view().child(1), + // struct_col->view().child(1), // expected_last_two_lists_col->view()); } - TYPED_TEST(TypedStructColumnWrapperTest, StructOfStructs) { // Struct> - auto names = { - "Samuel Vimes", - "Carrot Ironfoundersson", - "Angua von Uberwald", - "Cheery Littlebottom", - "Detritus", - "Mr Slant" - }; + auto names = {"Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Uberwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant"}; - auto num_rows {std::distance(names.begin(), names.end())}; + auto num_rows{std::distance(names.begin(), names.end())}; // `Name` column has all valid values. auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; - auto ages_col = - cudf::test::fixed_width_column_wrapper{ - {48, 27, 25, 31, 351, 351}, - { 1, 1, 1, 1, 1, 0} - }; + auto ages_col = + cudf::test::fixed_width_column_wrapper{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}}; - auto struct_1 = cudf::test::structs_column_wrapper{ - {names_col, ages_col}, - {1, 1, 1, 1, 0, 1} - }; + auto struct_1 = cudf::test::structs_column_wrapper{{names_col, ages_col}, {1, 1, 1, 1, 0, 1}}; - auto is_human_col = - cudf::test::fixed_width_column_wrapper{ - {true, true, false, false, false, false}, - { 1, 1, 0, 1, 1, 0} - }; + auto is_human_col = cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}}; - auto struct_2 = cudf::test::structs_column_wrapper{ - {is_human_col, struct_1}, - {0, 1, 1, 1, 1, 1} - }.release(); + auto struct_2 = + cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release(); // Verify that the child/grandchild columns are as expected. - auto expected_names_col = cudf::test::strings_column_wrapper( - names.begin(), - names.end(), - cudf::test::make_counting_transform_iterator(0, [](auto i){ return i!=0 && i!=4; })).release(); + auto expected_names_col = + cudf::test::strings_column_wrapper( + names.begin(), + names.end(), + cudf::test::make_counting_transform_iterator(0, [](auto i) { return i != 0 && i != 4; })) + .release(); cudf::test::expect_columns_equal(*expected_names_col, struct_2->child(1).child(0)); auto expected_ages_col = cudf::test::fixed_width_column_wrapper{ - {48, 27, 25, 31, 351, 351}, - { 0, 1, 1, 1, 0, 0} - }.release(); + {48, 27, 25, 31, 351, 351}, + {0, 1, 1, 1, 0, 0}}.release(); cudf::test::expect_columns_equal(*expected_ages_col, struct_2->child(1).child(1)); - auto expected_bool_col = cudf::test::fixed_width_column_wrapper { + auto expected_bool_col = cudf::test::fixed_width_column_wrapper{ {true, true, false, false, false, false}, - { 0, 1, 0, 1, 1, 0} - }.release(); + {0, 1, 0, 1, 1, 0}}.release(); cudf::test::expect_columns_equal(*expected_bool_col, struct_2->child(0)); - // Verify that recursive struct columns may be compared + // Verify that recursive struct columns may be compared // using expect_columns_equal. vector_of_columns expected_cols_1; expected_cols_1.emplace_back(std::move(expected_names_col)); expected_cols_1.emplace_back(std::move(expected_ages_col)); - auto expected_struct_1 = - cudf::test::structs_column_wrapper( - std::move(expected_cols_1), - {1, 1, 1, 1, 0, 1} - ).release(); + auto expected_struct_1 = + cudf::test::structs_column_wrapper(std::move(expected_cols_1), {1, 1, 1, 1, 0, 1}).release(); vector_of_columns expected_cols_2; expected_cols_2.emplace_back(std::move(expected_bool_col)); expected_cols_2.emplace_back(std::move(expected_struct_1)); - auto expected_struct_2 = - cudf::test::structs_column_wrapper( - std::move(expected_cols_2), - {0, 1, 1, 1, 1, 1} - ).release(); + auto expected_struct_2 = + cudf::test::structs_column_wrapper(std::move(expected_cols_2), {0, 1, 1, 1, 1, 1}).release(); cudf::test::expect_columns_equal(*expected_struct_2, *struct_2); } - TYPED_TEST(TypedStructColumnWrapperTest, TestNullMaskPropagationForNonNullStruct) { // Struct> - auto names = { - "Samuel Vimes", - "Carrot Ironfoundersson", - "Angua von Uberwald", - "Cheery Littlebottom", - "Detritus", - "Mr Slant" - }; + auto names = {"Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Uberwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant"}; - auto num_rows {std::distance(names.begin(), names.end())}; + auto num_rows{std::distance(names.begin(), names.end())}; // `Name` column has all valid values. auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; - auto ages_col = - cudf::test::fixed_width_column_wrapper{ - {48, 27, 25, 31, 351, 351}, - { 1, 1, 1, 1, 1, 1} // <-- No nulls in ages_col either. - }; + auto ages_col = cudf::test::fixed_width_column_wrapper{ + {48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 1} // <-- No nulls in ages_col either. + }; auto struct_1 = cudf::test::structs_column_wrapper{ - {names_col, ages_col}, - {1, 1, 1, 1, 1, 1} // <-- Non-null, bottom level struct. + {names_col, ages_col}, {1, 1, 1, 1, 1, 1} // <-- Non-null, bottom level struct. }; - auto is_human_col = - cudf::test::fixed_width_column_wrapper{ - {true, true, false, false, false, false}, - { 1, 1, 0, 1, 1, 0} - }; + auto is_human_col = cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}}; - auto struct_2 = cudf::test::structs_column_wrapper{ - {is_human_col, struct_1}, - {0, 1, 1, 1, 1, 1} // <-- First row is null, for top-level struct. - }.release(); + auto struct_2 = + cudf::test::structs_column_wrapper{ + {is_human_col, struct_1}, {0, 1, 1, 1, 1, 1} // <-- First row is null, for top-level struct. + } + .release(); // Verify that the child/grandchild columns are as expected. // Top-struct has 1 null (at index 0). // Bottom-level struct had no nulls, but must now report nulls - auto expected_names_col = cudf::test::strings_column_wrapper( - names.begin(), - names.end(), - cudf::test::make_counting_transform_iterator(0, [](auto i){ return i!=0; })).release(); + auto expected_names_col = + cudf::test::strings_column_wrapper( + names.begin(), + names.end(), + cudf::test::make_counting_transform_iterator(0, [](auto i) { return i != 0; })) + .release(); cudf::test::expect_columns_equal(*expected_names_col, struct_2->child(1).child(0)); auto expected_ages_col = cudf::test::fixed_width_column_wrapper{ - {48, 27, 25, 31, 351, 351}, - { 0, 1, 1, 1, 1, 1} - }.release(); + {48, 27, 25, 31, 351, 351}, + {0, 1, 1, 1, 1, 1}}.release(); cudf::test::expect_columns_equal(*expected_ages_col, struct_2->child(1).child(1)); - auto expected_bool_col = cudf::test::fixed_width_column_wrapper { + auto expected_bool_col = cudf::test::fixed_width_column_wrapper{ {true, true, false, false, false, false}, - { 0, 1, 0, 1, 1, 0} - }.release(); + {0, 1, 0, 1, 1, 0}}.release(); cudf::test::expect_columns_equal(*expected_bool_col, struct_2->child(0)); - // Verify that recursive struct columns may be compared + // Verify that recursive struct columns may be compared // using expect_columns_equal. vector_of_columns expected_cols_1; expected_cols_1.emplace_back(std::move(expected_names_col)); expected_cols_1.emplace_back(std::move(expected_ages_col)); - auto expected_struct_1 = - cudf::test::structs_column_wrapper( - std::move(expected_cols_1), - {1, 1, 1, 1, 1, 1} - ).release(); + auto expected_struct_1 = + cudf::test::structs_column_wrapper(std::move(expected_cols_1), {1, 1, 1, 1, 1, 1}).release(); vector_of_columns expected_cols_2; expected_cols_2.emplace_back(std::move(expected_bool_col)); expected_cols_2.emplace_back(std::move(expected_struct_1)); - auto expected_struct_2 = - cudf::test::structs_column_wrapper( - std::move(expected_cols_2), - {0, 1, 1, 1, 1, 1} - ).release(); + auto expected_struct_2 = + cudf::test::structs_column_wrapper(std::move(expected_cols_2), {0, 1, 1, 1, 1, 1}).release(); cudf::test::expect_columns_equal(*expected_struct_2, *struct_2); } - TEST_F(StructColumnWrapperTest, StructWithNoMembers) { - auto struct_col {cudf::test::structs_column_wrapper{}.release()}; + auto struct_col{cudf::test::structs_column_wrapper{}.release()}; EXPECT_TRUE(struct_col->num_children() == 0); EXPECT_TRUE(struct_col->null_count() == 0); EXPECT_TRUE(struct_col->size() == 0); } - TYPED_TEST(TypedStructColumnWrapperTest, StructsWithMembersWithDifferentRowCounts) { - auto numeric_col_5 = cudf::test::fixed_width_column_wrapper{{1,2,3,4,5}}; - auto bool_col_4 = cudf::test::fixed_width_column_wrapper{1,0,1,0}; - - EXPECT_THROW( - cudf::test::structs_column_wrapper( - {numeric_col_5, bool_col_4} - ), - cudf::logic_error - ); -} + auto numeric_col_5 = cudf::test::fixed_width_column_wrapper{{1, 2, 3, 4, 5}}; + auto bool_col_4 = cudf::test::fixed_width_column_wrapper{1, 0, 1, 0}; + EXPECT_THROW(cudf::test::structs_column_wrapper({numeric_col_5, bool_col_4}), cudf::logic_error); +} TYPED_TEST(TypedStructColumnWrapperTest, TestListsOfStructs) { @@ -491,44 +384,42 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestListsOfStructs) // 1. Name: String // 2. List: List - std::initializer_list names = { - "Samuel Vimes", - "Carrot Ironfoundersson", - "Angua von Uberwald", - "Cheery Littlebottom", - "Detritus", - "Mr Slant" - }; + std::initializer_list names = {"Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Uberwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant"}; - auto num_rows {std::distance(names.begin(), names.end())}; + auto num_rows{std::distance(names.begin(), names.end())}; // `Name` column has all valid values. auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; // Numeric column has some nulls. - auto ages_col = - cudf::test::fixed_width_column_wrapper{ - {48, 27, 25, 31, 351, 351}, - { 1, 1, 1, 1, 1, 0} - }; + auto ages_col = cudf::test::fixed_width_column_wrapper{{48, 27, 25, 31, 351, 351}, + {1, 1, 1, 1, 1, 0}}; auto struct_col = - cudf::test::structs_column_wrapper( - {names_col, ages_col}, - { 1, 1, 1, 0, 0, 1} - ).release(); + cudf::test::structs_column_wrapper({names_col, ages_col}, {1, 1, 1, 0, 0, 1}).release(); auto expected_unchanged_struct_col = cudf::column(*struct_col); - auto list_offsets_column = cudf::test::fixed_width_column_wrapper{0,2,3,5,6}.release(); - auto num_list_rows = list_offsets_column->size()-1; + auto list_offsets_column = + cudf::test::fixed_width_column_wrapper{0, 2, 3, 5, 6}.release(); + auto num_list_rows = list_offsets_column->size() - 1; - auto list_col = cudf::make_lists_column(num_list_rows, std::move(list_offsets_column), std::move(struct_col), cudf::UNKNOWN_NULL_COUNT, {}); + auto list_col = cudf::make_lists_column(num_list_rows, + std::move(list_offsets_column), + std::move(struct_col), + cudf::UNKNOWN_NULL_COUNT, + {}); // List of structs was constructed successfully. No exceptions. // Verify that child columns is as it was set. - cudf::test::expect_columns_equal(expected_unchanged_struct_col, cudf::lists_column_view(*list_col).child()); + cudf::test::expect_columns_equal(expected_unchanged_struct_col, + cudf::lists_column_view(*list_col).child()); #ifndef NDEBUG std::cout << "Printing list col: \n"; @@ -536,17 +427,15 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestListsOfStructs) #endif } - TEST_F(StructColumnWrapperTest, SimpleTestExpectStructColumnsEqual) { - auto ints_col = cudf::test::fixed_width_column_wrapper{{0,1}, {0,0}}.release(); + auto ints_col = cudf::test::fixed_width_column_wrapper{{0, 1}, {0, 0}}.release(); vector_of_columns cols; cols.emplace_back(std::move(ints_col)); auto structs_col = cudf::test::structs_column_wrapper{std::move(cols)}; - + cudf::test::expect_columns_equal(structs_col, structs_col); } - CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index 8a842e97939..bbcd5d8d901 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -23,9 +23,9 @@ #include #include #include -#include -#include #include +#include +#include #include #include #include @@ -272,21 +272,19 @@ struct column_comparator_impl { bool print_all_differences, int depth) { - std::for_each( - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(0) + lhs.num_children(), - [&](auto i) { - cudf::type_dispatcher( - lhs.child(i).type(), - column_comparator{}, - lhs.child(i), - rhs.child(i), - print_all_differences, depth+1); - } - ); + std::for_each(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + lhs.num_children(), + [&](auto i) { + cudf::type_dispatcher(lhs.child(i).type(), + column_comparator{}, + lhs.child(i), + rhs.child(i), + print_all_differences, + depth + 1); + }); } }; - + template struct column_comparator { template @@ -422,19 +420,17 @@ std::string get_nested_type_str(cudf::column_view const& view) return cudf::jit::get_type_name(view.type()) + "<" + (lcv.size() > 0 ? get_nested_type_str(lcv.child()) : "") + ">"; } - - if (view.type().id() == cudf::type_id::STRUCT) { + if (view.type().id() == cudf::type_id::STRUCT) { std::ostringstream out; out << cudf::jit::get_type_name(view.type()) + "<"; - // std::for_each(view.child_begin(), view.child_end(), [&out](auto col){ out << get_nested_type_str(col);}); - std::transform( - view.child_begin(), - view.child_end(), - std::ostream_iterator(out, ","), - [&out](auto const col) {return get_nested_type_str(col);} - ); + // std::for_each(view.child_begin(), view.child_end(), [&out](auto col){ out << + // get_nested_type_str(col);}); + std::transform(view.child_begin(), + view.child_end(), + std::ostream_iterator(out, ","), + [&out](auto const col) { return get_nested_type_str(col); }); out << ">"; return out.str(); } @@ -616,10 +612,10 @@ struct column_view_printer { } std::transform( - view.child_begin(), view.child_end(), + view.child_begin(), + view.child_end(), std::ostream_iterator(out_stream, "\n"), - [&](auto child_column) {return detail::to_string(child_column, ", ", indent + " ");} - ); + [&](auto child_column) { return detail::to_string(child_column, ", ", indent + " "); }); out.push_back(out_stream.str()); } diff --git a/cpp/tests/utilities/column_wrapper.hpp b/cpp/tests/utilities/column_wrapper.hpp index 084986a9b64..27578474192 100644 --- a/cpp/tests/utilities/column_wrapper.hpp +++ b/cpp/tests/utilities/column_wrapper.hpp @@ -970,50 +970,48 @@ class lists_column_wrapper : public detail::column_wrapper { bool root = false; }; -class structs_column_wrapper : public detail::column_wrapper -{ - public: - - structs_column_wrapper(std::vector>&& child_columns, std::vector const& validity = {}) - { - init(std::move(child_columns), validity); - } - - structs_column_wrapper(std::initializer_list> child_column_wrappers, std::vector const& validity = {}) - { - std::vector> child_columns; - child_columns.reserve(child_column_wrappers.size()); - std::transform( - child_column_wrappers.begin(), - child_column_wrappers.end(), - std::back_inserter(child_columns), - [&](auto column_wrapper){return column_wrapper.get().release();} - ); - init(std::move(child_columns), validity); - } - - private: - - void init(std::vector>&& child_columns, std::vector const& validity) - { - size_type num_rows = child_columns.empty()? 0 : child_columns[0]->size(); - - CUDF_EXPECTS( - std::all_of(child_columns.begin(), child_columns.end(), [&](auto const& p_column) {return p_column->size() == num_rows;}), - "All struct member columns must have the same row count." - ); +class structs_column_wrapper : public detail::column_wrapper { + public: + structs_column_wrapper(std::vector>&& child_columns, + std::vector const& validity = {}) + { + init(std::move(child_columns), validity); + } - CUDF_EXPECTS( - validity.size() <= 0 || static_cast(validity.size()) == num_rows, - "Validity buffer must have as many elements as rows in the struct column." - ); + structs_column_wrapper( + std::initializer_list> child_column_wrappers, + std::vector const& validity = {}) + { + std::vector> child_columns; + child_columns.reserve(child_column_wrappers.size()); + std::transform(child_column_wrappers.begin(), + child_column_wrappers.end(), + std::back_inserter(child_columns), + [&](auto column_wrapper) { return column_wrapper.get().release(); }); + init(std::move(child_columns), validity); + } - wrapped = cudf::make_structs_column( - num_rows, - std::move(child_columns), - validity.size() <= 0? 0 : cudf::UNKNOWN_NULL_COUNT, - validity.size() <= 0? rmm::device_buffer{0} : detail::make_null_mask(validity.begin(), validity.end())); - } + private: + void init(std::vector>&& child_columns, + std::vector const& validity) + { + size_type num_rows = child_columns.empty() ? 0 : child_columns[0]->size(); + + CUDF_EXPECTS(std::all_of(child_columns.begin(), + child_columns.end(), + [&](auto const& p_column) { return p_column->size() == num_rows; }), + "All struct member columns must have the same row count."); + + CUDF_EXPECTS(validity.size() <= 0 || static_cast(validity.size()) == num_rows, + "Validity buffer must have as many elements as rows in the struct column."); + + wrapped = cudf::make_structs_column( + num_rows, + std::move(child_columns), + validity.size() <= 0 ? 0 : cudf::UNKNOWN_NULL_COUNT, + validity.size() <= 0 ? rmm::device_buffer{0} + : detail::make_null_mask(validity.begin(), validity.end())); + } }; } // namespace test diff --git a/cpp/tests/utilities/scalar_utilities.cu b/cpp/tests/utilities/scalar_utilities.cu index c2e273251be..841d5adb839 100644 --- a/cpp/tests/utilities/scalar_utilities.cu +++ b/cpp/tests/utilities/scalar_utilities.cu @@ -72,7 +72,7 @@ void compare_scalar_functor::operator()(cudf::scalar const& lhs template <> void compare_scalar_functor::operator()(cudf::scalar const& lhs, - cudf::scalar const& rhs) + cudf::scalar const& rhs) { CUDF_FAIL("Unsupported scalar compare type: struct_view"); } diff --git a/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp b/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp index eff3d67d413..1e7726c518e 100644 --- a/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp +++ b/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp @@ -1320,26 +1320,26 @@ TYPED_TEST(ListColumnWrapperTestTyped, ListsOfStructs) using T = TypeParam; auto num_struct_rows = 8; - auto numeric_column = test::fixed_width_column_wrapper {1,2,3,4,5,6,7,8}; - auto bool_column = test::fixed_width_column_wrapper{1,1,1,1,0,0,0,0}; - auto struct_column = test::structs_column_wrapper{{numeric_column, bool_column}}.release(); + auto numeric_column = test::fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7, 8}; + auto bool_column = test::fixed_width_column_wrapper{1, 1, 1, 1, 0, 0, 0, 0}; + auto struct_column = test::structs_column_wrapper{{numeric_column, bool_column}}.release(); EXPECT_EQ(struct_column->size(), num_struct_rows); EXPECT_TRUE(!struct_column->nullable()); - auto lists_column_offsets = test::fixed_width_column_wrapper{0,2,4,8}.release(); - auto num_lists = lists_column_offsets->size()-1; - auto lists_column = make_lists_column(num_lists, std::move(lists_column_offsets), std::move(struct_column), UNKNOWN_NULL_COUNT, {}); + auto lists_column_offsets = test::fixed_width_column_wrapper{0, 2, 4, 8}.release(); + auto num_lists = lists_column_offsets->size() - 1; + auto lists_column = make_lists_column( + num_lists, std::move(lists_column_offsets), std::move(struct_column), UNKNOWN_NULL_COUNT, {}); // Check if child column is unchanged. - auto expected_numeric_column = test::fixed_width_column_wrapper {1,2,3,4,5,6,7,8}; - auto expected_bool_column = test::fixed_width_column_wrapper{1,1,1,1,0,0,0,0}; - auto expected_struct_column = test::structs_column_wrapper{{expected_numeric_column, expected_bool_column}}.release(); + auto expected_numeric_column = test::fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7, 8}; + auto expected_bool_column = test::fixed_width_column_wrapper{1, 1, 1, 1, 0, 0, 0, 0}; + auto expected_struct_column = + test::structs_column_wrapper{{expected_numeric_column, expected_bool_column}}.release(); - cudf::test::expect_columns_equal( - *expected_struct_column, - lists_column_view(*lists_column).child() - ); + cudf::test::expect_columns_equal(*expected_struct_column, + lists_column_view(*lists_column).child()); } TYPED_TEST(ListColumnWrapperTestTyped, ListsOfStructsWithValidity) @@ -1349,41 +1349,33 @@ TYPED_TEST(ListColumnWrapperTestTyped, ListsOfStructsWithValidity) using T = TypeParam; auto num_struct_rows = 8; - auto numeric_column = test::fixed_width_column_wrapper{ - {1,2,3,4,5,6,7,8}, - {1,1,1,1,0,0,0,0} - }; - auto bool_column = test::fixed_width_column_wrapper{1,1,1,1,0,0,0,0}; + auto numeric_column = + test::fixed_width_column_wrapper{{1, 2, 3, 4, 5, 6, 7, 8}, {1, 1, 1, 1, 0, 0, 0, 0}}; + auto bool_column = test::fixed_width_column_wrapper{1, 1, 1, 1, 0, 0, 0, 0}; auto struct_column = test::structs_column_wrapper{{numeric_column, bool_column}}.release(); EXPECT_EQ(struct_column->size(), num_struct_rows); EXPECT_TRUE(!struct_column->nullable()); - auto lists_column_offsets = test::fixed_width_column_wrapper{0,2,4,8}.release(); - auto list_null_mask = {1, 1, 0}; - auto num_lists = lists_column_offsets->size()-1; - auto lists_column = make_lists_column( - num_lists, - std::move(lists_column_offsets), - std::move(struct_column), - UNKNOWN_NULL_COUNT, - test::detail::make_null_mask(list_null_mask.begin(), list_null_mask.end()) - ); + auto lists_column_offsets = test::fixed_width_column_wrapper{0, 2, 4, 8}.release(); + auto list_null_mask = {1, 1, 0}; + auto num_lists = lists_column_offsets->size() - 1; + auto lists_column = + make_lists_column(num_lists, + std::move(lists_column_offsets), + std::move(struct_column), + UNKNOWN_NULL_COUNT, + test::detail::make_null_mask(list_null_mask.begin(), list_null_mask.end())); // Check if child column is unchanged. - auto expected_numeric_column = test::fixed_width_column_wrapper{ - {1,2,3,4,5,6,7,8}, - {1,1,1,1,0,0,0,0} - }; - auto expected_bool_column = test::fixed_width_column_wrapper{1,1,1,1,0,0,0,0}; - auto expected_struct_column = test::structs_column_wrapper{ - {expected_numeric_column, expected_bool_column} - }.release(); + auto expected_numeric_column = + test::fixed_width_column_wrapper{{1, 2, 3, 4, 5, 6, 7, 8}, {1, 1, 1, 1, 0, 0, 0, 0}}; + auto expected_bool_column = test::fixed_width_column_wrapper{1, 1, 1, 1, 0, 0, 0, 0}; + auto expected_struct_column = + test::structs_column_wrapper{{expected_numeric_column, expected_bool_column}}.release(); - cudf::test::expect_columns_equal( - *expected_struct_column, - lists_column_view(*lists_column).child() - ); + cudf::test::expect_columns_equal(*expected_struct_column, + lists_column_view(*lists_column).child()); } TYPED_TEST(ListColumnWrapperTestTyped, ListsOfListsOfStructs) @@ -1393,31 +1385,37 @@ TYPED_TEST(ListColumnWrapperTestTyped, ListsOfListsOfStructs) using T = TypeParam; auto num_struct_rows = 8; - auto numeric_column = test::fixed_width_column_wrapper {1,2,3,4,5,6,7,8}; - auto bool_column = test::fixed_width_column_wrapper{1,1,1,1,0,0,0,0}; - auto struct_column = test::structs_column_wrapper{{numeric_column, bool_column}}.release(); + auto numeric_column = test::fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7, 8}; + auto bool_column = test::fixed_width_column_wrapper{1, 1, 1, 1, 0, 0, 0, 0}; + auto struct_column = test::structs_column_wrapper{{numeric_column, bool_column}}.release(); EXPECT_EQ(struct_column->size(), num_struct_rows); EXPECT_TRUE(!struct_column->nullable()); - auto lists_column_offsets = test::fixed_width_column_wrapper{0,2,4,8}.release(); - auto num_lists = lists_column_offsets->size()-1; - auto lists_column = make_lists_column(num_lists, std::move(lists_column_offsets), std::move(struct_column), UNKNOWN_NULL_COUNT, {}); + auto lists_column_offsets = test::fixed_width_column_wrapper{0, 2, 4, 8}.release(); + auto num_lists = lists_column_offsets->size() - 1; + auto lists_column = make_lists_column( + num_lists, std::move(lists_column_offsets), std::move(struct_column), UNKNOWN_NULL_COUNT, {}); - auto lists_of_lists_column_offsets = test::fixed_width_column_wrapper{0,2,3}.release(); - auto num_lists_of_lists = lists_of_lists_column_offsets->size()-1; + auto lists_of_lists_column_offsets = + test::fixed_width_column_wrapper{0, 2, 3}.release(); + auto num_lists_of_lists = lists_of_lists_column_offsets->size() - 1; auto lists_of_lists_of_structs_column = - make_lists_column(num_lists_of_lists, std::move(lists_of_lists_column_offsets), std::move(lists_column), UNKNOWN_NULL_COUNT, {}); + make_lists_column(num_lists_of_lists, + std::move(lists_of_lists_column_offsets), + std::move(lists_column), + UNKNOWN_NULL_COUNT, + {}); // Check if child column is unchanged. - auto expected_numeric_column = test::fixed_width_column_wrapper {1,2,3,4,5,6,7,8}; - auto expected_bool_column = test::fixed_width_column_wrapper{1,1,1,1,0,0,0,0}; - auto expected_struct_column = test::structs_column_wrapper{{expected_numeric_column, expected_bool_column}}.release(); + auto expected_numeric_column = test::fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7, 8}; + auto expected_bool_column = test::fixed_width_column_wrapper{1, 1, 1, 1, 0, 0, 0, 0}; + auto expected_struct_column = + test::structs_column_wrapper{{expected_numeric_column, expected_bool_column}}.release(); cudf::test::expect_columns_equal( *expected_struct_column, - lists_column_view{lists_column_view{*lists_of_lists_of_structs_column}.child()}.child() - ); + lists_column_view{lists_column_view{*lists_of_lists_of_structs_column}.child()}.child()); } TYPED_TEST(ListColumnWrapperTestTyped, ListsOfListsOfStructsWithValidity) @@ -1427,48 +1425,45 @@ TYPED_TEST(ListColumnWrapperTestTyped, ListsOfListsOfStructsWithValidity) using T = TypeParam; auto num_struct_rows = 8; - auto numeric_column = test::fixed_width_column_wrapper{ - {1,2,3,4,5,6,7,8}, - {1,1,1,1,0,0,0,0} - }; - auto bool_column = test::fixed_width_column_wrapper{1,1,1,1,0,0,0,0}; + auto numeric_column = + test::fixed_width_column_wrapper{{1, 2, 3, 4, 5, 6, 7, 8}, {1, 1, 1, 1, 0, 0, 0, 0}}; + auto bool_column = test::fixed_width_column_wrapper{1, 1, 1, 1, 0, 0, 0, 0}; auto struct_column = test::structs_column_wrapper{{numeric_column, bool_column}}.release(); EXPECT_EQ(struct_column->size(), num_struct_rows); EXPECT_TRUE(!struct_column->nullable()); - auto lists_column_offsets = test::fixed_width_column_wrapper{0,2,4,8}.release(); - auto num_lists = lists_column_offsets->size()-1; - auto list_null_mask = {1, 1, 0}; - auto lists_column = make_lists_column( - num_lists, - std::move(lists_column_offsets), - std::move(struct_column), - UNKNOWN_NULL_COUNT, - test::detail::make_null_mask(list_null_mask.begin(), list_null_mask.end())); - - auto lists_of_lists_column_offsets = test::fixed_width_column_wrapper{0,2,3}.release(); - auto num_lists_of_lists = lists_of_lists_column_offsets->size()-1; - auto list_of_lists_null_mask = {1,0}; + auto lists_column_offsets = test::fixed_width_column_wrapper{0, 2, 4, 8}.release(); + auto num_lists = lists_column_offsets->size() - 1; + auto list_null_mask = {1, 1, 0}; + auto lists_column = + make_lists_column(num_lists, + std::move(lists_column_offsets), + std::move(struct_column), + UNKNOWN_NULL_COUNT, + test::detail::make_null_mask(list_null_mask.begin(), list_null_mask.end())); + + auto lists_of_lists_column_offsets = + test::fixed_width_column_wrapper{0, 2, 3}.release(); + auto num_lists_of_lists = lists_of_lists_column_offsets->size() - 1; + auto list_of_lists_null_mask = {1, 0}; auto lists_of_lists_of_structs_column = make_lists_column( - num_lists_of_lists, - std::move(lists_of_lists_column_offsets), - std::move(lists_column), - UNKNOWN_NULL_COUNT, + num_lists_of_lists, + std::move(lists_of_lists_column_offsets), + std::move(lists_column), + UNKNOWN_NULL_COUNT, test::detail::make_null_mask(list_of_lists_null_mask.begin(), list_of_lists_null_mask.end())); // Check if child column is unchanged. - auto expected_numeric_column = test::fixed_width_column_wrapper{ - {1,2,3,4,5,6,7,8}, - {1,1,1,1,0,0,0,0} - }; - auto expected_bool_column = test::fixed_width_column_wrapper{1,1,1,1,0,0,0,0}; - auto expected_struct_column = test::structs_column_wrapper{{expected_numeric_column, expected_bool_column}}.release(); + auto expected_numeric_column = + test::fixed_width_column_wrapper{{1, 2, 3, 4, 5, 6, 7, 8}, {1, 1, 1, 1, 0, 0, 0, 0}}; + auto expected_bool_column = test::fixed_width_column_wrapper{1, 1, 1, 1, 0, 0, 0, 0}; + auto expected_struct_column = + test::structs_column_wrapper{{expected_numeric_column, expected_bool_column}}.release(); cudf::test::expect_columns_equal( *expected_struct_column, - lists_column_view{lists_column_view{*lists_of_lists_of_structs_column}.child()}.child() - ); + lists_column_view{lists_column_view{*lists_of_lists_of_structs_column}.child()}.child()); } TYPED_TEST(ListColumnWrapperTestTyped, LargeListsOfStructsWithValidity) @@ -1483,26 +1478,36 @@ TYPED_TEST(ListColumnWrapperTestTyped, LargeListsOfStructsWithValidity) auto numeric_column = test::fixed_width_column_wrapper{ thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_struct_rows), - cudf::test::make_counting_transform_iterator(0, [](auto i){ return i%2==1; }) - }; + cudf::test::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 1; })}; - auto bool_iterator = thrust::make_transform_iterator(thrust::make_counting_iterator(0), [](auto i) { return i%3==0; }); - auto bool_column = test::fixed_width_column_wrapper(bool_iterator, bool_iterator+num_struct_rows); + auto bool_iterator = thrust::make_transform_iterator(thrust::make_counting_iterator(0), + [](auto i) { return i % 3 == 0; }); + auto bool_column = + test::fixed_width_column_wrapper(bool_iterator, bool_iterator + num_struct_rows); - auto struct_validity_iterator = cudf::test::make_counting_transform_iterator(0, [](auto i){ return i%5 == 0; }); - auto struct_column = test::structs_column_wrapper{ - {numeric_column, bool_column}, - std::vector(struct_validity_iterator, struct_validity_iterator+num_struct_rows) - }.release(); + auto struct_validity_iterator = + cudf::test::make_counting_transform_iterator(0, [](auto i) { return i % 5 == 0; }); + auto struct_column = + test::structs_column_wrapper{ + {numeric_column, bool_column}, + std::vector(struct_validity_iterator, struct_validity_iterator + num_struct_rows)} + .release(); EXPECT_EQ(struct_column->size(), num_struct_rows); // Now, use struct_column to create a list column. // Each list has 50 elements. - auto num_list_rows = num_struct_rows/50; - auto list_offset_iterator = test::make_counting_transform_iterator(0, [](auto i){ return i*50; }); - auto list_offset_column = test::fixed_width_column_wrapper(list_offset_iterator, list_offset_iterator+num_list_rows+1).release(); - auto lists_column = make_lists_column(num_list_rows, std::move(list_offset_column), std::move(struct_column), cudf::UNKNOWN_NULL_COUNT, {}); + auto num_list_rows = num_struct_rows / 50; + auto list_offset_iterator = + test::make_counting_transform_iterator(0, [](auto i) { return i * 50; }); + auto list_offset_column = test::fixed_width_column_wrapper( + list_offset_iterator, list_offset_iterator + num_list_rows + 1) + .release(); + auto lists_column = make_lists_column(num_list_rows, + std::move(list_offset_column), + std::move(struct_column), + cudf::UNKNOWN_NULL_COUNT, + {}); // List construction succeeded. // Verify that the child is unchanged. @@ -1510,18 +1515,17 @@ TYPED_TEST(ListColumnWrapperTestTyped, LargeListsOfStructsWithValidity) auto expected_numeric_column = test::fixed_width_column_wrapper{ thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_struct_rows), - cudf::test::make_counting_transform_iterator(0, [](auto i){ return i%2==1; }) - }; + cudf::test::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 1; })}; - auto expected_bool_column = test::fixed_width_column_wrapper(bool_iterator, bool_iterator+num_struct_rows); + auto expected_bool_column = + test::fixed_width_column_wrapper(bool_iterator, bool_iterator + num_struct_rows); - auto expected_struct_column = test::structs_column_wrapper{ - {expected_numeric_column, expected_bool_column}, - std::vector(struct_validity_iterator, struct_validity_iterator+num_struct_rows) - }.release(); - - cudf::test::expect_columns_equal( - *expected_struct_column, - lists_column_view(*lists_column).child() - ); + auto expected_struct_column = + test::structs_column_wrapper{ + {expected_numeric_column, expected_bool_column}, + std::vector(struct_validity_iterator, struct_validity_iterator + num_struct_rows)} + .release(); + + cudf::test::expect_columns_equal(*expected_struct_column, + lists_column_view(*lists_column).child()); } From b4a879fbafd5fc7cd38c8b7376d9fc391e62f153 Mon Sep 17 00:00:00 2001 From: Mithun RK Date: Thu, 6 Aug 2020 11:57:08 -0700 Subject: [PATCH 12/20] [struct] Added struct headers to meta.yaml --- conda/recipes/libcudf/meta.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index e3a5c40b8fd..6f193827dcb 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -151,6 +151,8 @@ test: - test -f $PREFIX/include/cudf/strings/substring.hpp - test -f $PREFIX/include/cudf/strings/translate.hpp - test -f $PREFIX/include/cudf/strings/wrap.hpp + - test -f $PREFIX/include/cudf/structs/structs_column_view.hpp + - test -f $PREFIX/include/cudf/structs/struct_view.hpp - test -f $PREFIX/include/cudf/table/table.hpp - test -f $PREFIX/include/cudf/table/table_view.hpp - test -f $PREFIX/include/cudf/transform.hpp From 5dbfd5e49bd0932990777344984f02496e879bdb Mon Sep 17 00:00:00 2001 From: Mithun RK Date: Thu, 6 Aug 2020 15:25:32 -0700 Subject: [PATCH 13/20] [struct] Minor fixes from review: 1. Added documentation to child_begin(), child_end() 2. Removed stray comments/dead-code. 3. Newlines at the end of source file. --- cpp/include/cudf/column/column_view.hpp | 11 +++++++++-- cpp/include/cudf/structs/struct_view.hpp | 2 +- cpp/tests/utilities/column_utilities.cu | 2 -- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index 7d217de90f6..d11a072251b 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -335,8 +335,15 @@ class column_view : public detail::column_view_base { **/ size_type num_children() const noexcept { return _children.size(); } - auto child_begin() const noexcept { return _children.begin(); } - auto child_end() const noexcept { return _children.end(); } + /** + * @brief Returns iterator to the beginning of the ordered sequence of child column-views. + */ + auto child_begin() const noexcept { return _children.cbegin(); } + + /** + * @brief Returns iterator to the end of the ordered sequence of child column-views. + */ + auto child_end() const noexcept { return _children.cend(); } private: std::vector _children{}; ///< Based on element type, children diff --git a/cpp/include/cudf/structs/struct_view.hpp b/cpp/include/cudf/structs/struct_view.hpp index f15393789f0..6240feb009e 100644 --- a/cpp/include/cudf/structs/struct_view.hpp +++ b/cpp/include/cudf/structs/struct_view.hpp @@ -31,4 +31,4 @@ namespace cudf { class struct_view { }; -} // namespace cudf \ No newline at end of file +} // namespace cudf diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index bbcd5d8d901..f47c6076d4a 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -425,8 +425,6 @@ std::string get_nested_type_str(cudf::column_view const& view) std::ostringstream out; out << cudf::jit::get_type_name(view.type()) + "<"; - // std::for_each(view.child_begin(), view.child_end(), [&out](auto col){ out << - // get_nested_type_str(col);}); std::transform(view.child_begin(), view.child_end(), std::ostream_iterator(out, ","), From db852530e87aefeea4754b054149a0366ba00a91 Mon Sep 17 00:00:00 2001 From: Mithun RK Date: Fri, 7 Aug 2020 10:40:56 -0700 Subject: [PATCH 14/20] [struct] Switch tests to check column equivalence --- cpp/tests/structs/structs_column_tests.cu | 34 +++++++++++------------ 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/cpp/tests/structs/structs_column_tests.cu b/cpp/tests/structs/structs_column_tests.cu index 384da7e8e29..c8189958078 100644 --- a/cpp/tests/structs/structs_column_tests.cu +++ b/cpp/tests/structs/structs_column_tests.cu @@ -98,7 +98,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestColumnFactoryConstruction) std::for_each(thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + expected_children.size(), [&](auto idx) { - cudf::test::expect_columns_equal(struct_col_view.child(idx), + cudf::test::expect_columns_equivalent(struct_col_view.child(idx), expected_children[idx]->view()); }); } @@ -149,14 +149,14 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestColumnWrapperConstruction) std::for_each(thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + expected_children.size(), [&](auto idx) { - cudf::test::expect_columns_equal(struct_col_view.child(idx), + cudf::test::expect_columns_equivalent(struct_col_view.child(idx), expected_children[idx]->view()); }); auto expected_struct_col = cudf::test::structs_column_wrapper{std::move(expected_children), {1, 1, 1, 0, 1, 1}}.release(); - cudf::test::expect_columns_equal(struct_col_view, expected_struct_col->view()); + cudf::test::expect_columns_equivalent(struct_col_view, expected_struct_col->view()); } TYPED_TEST(TypedStructColumnWrapperTest, TestStructsContainingLists) @@ -193,7 +193,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestStructsContainingLists) return i < 4; })}.release(); - cudf::test::expect_columns_equal(struct_col->view().child(0), expected_names_col->view()); + cudf::test::expect_columns_equivalent(struct_col->view().child(0), expected_names_col->view()); // For the `List` member, indices 4, 5 should be null. // FIXME: The way list columns are currently compared is not ideal for testing @@ -216,7 +216,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestStructsContainingLists) })}.release(); // FIXME: Uncomment after list comparison is fixed. - // cudf::test::expect_columns_equal( + // cudf::test::expect_columns_equivalent( // struct_col->view().child(1), // expected_last_two_lists_col->view()); } @@ -256,21 +256,21 @@ TYPED_TEST(TypedStructColumnWrapperTest, StructOfStructs) cudf::test::make_counting_transform_iterator(0, [](auto i) { return i != 0 && i != 4; })) .release(); - cudf::test::expect_columns_equal(*expected_names_col, struct_2->child(1).child(0)); + cudf::test::expect_columns_equivalent(*expected_names_col, struct_2->child(1).child(0)); auto expected_ages_col = cudf::test::fixed_width_column_wrapper{ {48, 27, 25, 31, 351, 351}, {0, 1, 1, 1, 0, 0}}.release(); - cudf::test::expect_columns_equal(*expected_ages_col, struct_2->child(1).child(1)); + cudf::test::expect_columns_equivalent(*expected_ages_col, struct_2->child(1).child(1)); auto expected_bool_col = cudf::test::fixed_width_column_wrapper{ {true, true, false, false, false, false}, {0, 1, 0, 1, 1, 0}}.release(); - cudf::test::expect_columns_equal(*expected_bool_col, struct_2->child(0)); + cudf::test::expect_columns_equivalent(*expected_bool_col, struct_2->child(0)); // Verify that recursive struct columns may be compared - // using expect_columns_equal. + // using expect_columns_equivalent. vector_of_columns expected_cols_1; expected_cols_1.emplace_back(std::move(expected_names_col)); @@ -284,7 +284,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, StructOfStructs) auto expected_struct_2 = cudf::test::structs_column_wrapper(std::move(expected_cols_2), {0, 1, 1, 1, 1, 1}).release(); - cudf::test::expect_columns_equal(*expected_struct_2, *struct_2); + cudf::test::expect_columns_equivalent(*expected_struct_2, *struct_2); } TYPED_TEST(TypedStructColumnWrapperTest, TestNullMaskPropagationForNonNullStruct) @@ -331,21 +331,21 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestNullMaskPropagationForNonNullStruct cudf::test::make_counting_transform_iterator(0, [](auto i) { return i != 0; })) .release(); - cudf::test::expect_columns_equal(*expected_names_col, struct_2->child(1).child(0)); + cudf::test::expect_columns_equivalent(*expected_names_col, struct_2->child(1).child(0)); auto expected_ages_col = cudf::test::fixed_width_column_wrapper{ {48, 27, 25, 31, 351, 351}, {0, 1, 1, 1, 1, 1}}.release(); - cudf::test::expect_columns_equal(*expected_ages_col, struct_2->child(1).child(1)); + cudf::test::expect_columns_equivalent(*expected_ages_col, struct_2->child(1).child(1)); auto expected_bool_col = cudf::test::fixed_width_column_wrapper{ {true, true, false, false, false, false}, {0, 1, 0, 1, 1, 0}}.release(); - cudf::test::expect_columns_equal(*expected_bool_col, struct_2->child(0)); + cudf::test::expect_columns_equivalent(*expected_bool_col, struct_2->child(0)); // Verify that recursive struct columns may be compared - // using expect_columns_equal. + // using expect_columns_equivalent. vector_of_columns expected_cols_1; expected_cols_1.emplace_back(std::move(expected_names_col)); @@ -359,7 +359,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestNullMaskPropagationForNonNullStruct auto expected_struct_2 = cudf::test::structs_column_wrapper(std::move(expected_cols_2), {0, 1, 1, 1, 1, 1}).release(); - cudf::test::expect_columns_equal(*expected_struct_2, *struct_2); + cudf::test::expect_columns_equivalent(*expected_struct_2, *struct_2); } TEST_F(StructColumnWrapperTest, StructWithNoMembers) @@ -418,7 +418,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestListsOfStructs) // List of structs was constructed successfully. No exceptions. // Verify that child columns is as it was set. - cudf::test::expect_columns_equal(expected_unchanged_struct_col, + cudf::test::expect_columns_equivalent(expected_unchanged_struct_col, cudf::lists_column_view(*list_col).child()); #ifndef NDEBUG @@ -435,7 +435,7 @@ TEST_F(StructColumnWrapperTest, SimpleTestExpectStructColumnsEqual) cols.emplace_back(std::move(ints_col)); auto structs_col = cudf::test::structs_column_wrapper{std::move(cols)}; - cudf::test::expect_columns_equal(structs_col, structs_col); + cudf::test::expect_columns_equivalent(structs_col, structs_col); } CUDF_TEST_PROGRAM_MAIN() From a48a5b7c7ebd5952026862f213d6609a2e1b21bd Mon Sep 17 00:00:00 2001 From: Mithun RK Date: Fri, 7 Aug 2020 11:28:21 -0700 Subject: [PATCH 15/20] [struct] clang-format changes in struct tests --- cpp/tests/structs/structs_column_tests.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/tests/structs/structs_column_tests.cu b/cpp/tests/structs/structs_column_tests.cu index c8189958078..4794878df1f 100644 --- a/cpp/tests/structs/structs_column_tests.cu +++ b/cpp/tests/structs/structs_column_tests.cu @@ -99,7 +99,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestColumnFactoryConstruction) thrust::make_counting_iterator(0) + expected_children.size(), [&](auto idx) { cudf::test::expect_columns_equivalent(struct_col_view.child(idx), - expected_children[idx]->view()); + expected_children[idx]->view()); }); } @@ -150,7 +150,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestColumnWrapperConstruction) thrust::make_counting_iterator(0) + expected_children.size(), [&](auto idx) { cudf::test::expect_columns_equivalent(struct_col_view.child(idx), - expected_children[idx]->view()); + expected_children[idx]->view()); }); auto expected_struct_col = @@ -419,7 +419,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestListsOfStructs) // Verify that child columns is as it was set. cudf::test::expect_columns_equivalent(expected_unchanged_struct_col, - cudf::lists_column_view(*list_col).child()); + cudf::lists_column_view(*list_col).child()); #ifndef NDEBUG std::cout << "Printing list col: \n"; From 671ec1c9fcae923b64a903c8ad9af6369d04b4c8 Mon Sep 17 00:00:00 2001 From: Mithun RK Date: Fri, 7 Aug 2020 13:51:26 -0700 Subject: [PATCH 16/20] [struct] Corrected nullmask size check in struct factory --- cpp/src/structs/structs_column_factories.cu | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/cpp/src/structs/structs_column_factories.cu b/cpp/src/structs/structs_column_factories.cu index 3424ce3fe0b..f642a207ce3 100644 --- a/cpp/src/structs/structs_column_factories.cu +++ b/cpp/src/structs/structs_column_factories.cu @@ -82,18 +82,14 @@ std::unique_ptr make_structs_column( cudaStream_t stream, rmm::mr::device_memory_resource* mr) { - if (null_count > 0) { - CUDF_EXPECTS(!null_mask.is_empty(), "Column with nulls must be nullable."); - } + CUDF_EXPECTS(null_count <= 0 || !null_mask.is_empty(), + "Struct column with nulls must be nullable."); CUDF_EXPECTS(std::all_of(child_columns.begin(), child_columns.end(), [&](auto const& child_col) { return num_rows == child_col->size(); }), "Child columns must have the same number of rows as the Struct column."); - CUDF_EXPECTS(null_count == 0 || !null_mask.is_empty(), - "Struct column with nulls must be nullable."); - if (!null_mask.is_empty()) { for (auto& child : child_columns) { superimpose_parent_nullmask(static_cast(null_mask.data()), From 6b9082e74adfdc6e15b70d401f01dadc5f597be5 Mon Sep 17 00:00:00 2001 From: Mithun RK Date: Mon, 10 Aug 2020 12:25:47 -0700 Subject: [PATCH 17/20] [struct] Incorporated review changes. 1. Added documentation for struct_column_wrapper. 2. Removed unnecessary std::move for child null masks. --- cpp/src/structs/structs_column_factories.cu | 3 +- cpp/tests/utilities/column_wrapper.hpp | 50 +++++++++++++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/cpp/src/structs/structs_column_factories.cu b/cpp/src/structs/structs_column_factories.cu index f642a207ce3..94af05d1f93 100644 --- a/cpp/src/structs/structs_column_factories.cu +++ b/cpp/src/structs/structs_column_factories.cu @@ -34,8 +34,7 @@ void superimpose_parent_nullmask(bitmask_type const* parent_null_mask, { if (!child.nullable()) { // Child currently has no null mask. Copy parent's null mask. - child.set_null_mask( - std::move(rmm::device_buffer{parent_null_mask, parent_null_mask_size, stream, mr})); + child.set_null_mask(rmm::device_buffer{parent_null_mask, parent_null_mask_size, stream, mr}); child.set_null_count(parent_null_count); } else { // Child should have a null mask. diff --git a/cpp/tests/utilities/column_wrapper.hpp b/cpp/tests/utilities/column_wrapper.hpp index 18f45eb2da1..ff65385e543 100644 --- a/cpp/tests/utilities/column_wrapper.hpp +++ b/cpp/tests/utilities/column_wrapper.hpp @@ -958,14 +958,64 @@ class lists_column_wrapper : public detail::column_wrapper { bool root = false; }; +/** + * @brief `column_wrapper` derived class for wrapping columns of structs. + */ class structs_column_wrapper : public detail::column_wrapper { public: + /** + * @brief Constructs a struct column from the specified list of pre-constructed child columns. + * + * The child columns are "adopted" by the struct column constructed here. + * + * Example usage: + * @code{.cpp} + * // The following constructs a column for struct< int, string >. + * auto child_int_col = fixed_width_column_wrapper{ 1, 2, 3, 4, 5 }.release(); + * auto child_string_col = string_column_wrapper {"All", "the", "leaves", "are", + * "brown"}.release(); + * + * std::vector> child_columns; + * child_columns.push_back(std::move(child_int_col)); + * child_columns.push_back(std::move(child_string_col)); + * + * struct_column_wrapper struct_column_wrapper{ + * child_cols, + * {1,0,1,0,1} // Validity. + * }; + * + * auto struct_col {struct_column_wrapper.release()}; + * @endcode + * + * @param child_columns The vector of pre-constructed child columns + * @param validity The vector of bools representing the column validity values + */ structs_column_wrapper(std::vector>&& child_columns, std::vector const& validity = {}) { init(std::move(child_columns), validity); } + /** + * @brief Constructs a struct column from the list of column wrappers for child columns. + * + * Example usage: + * @code{.cpp} + * // The following constructs a column for struct< int, string >. + * fixed_width_column_wrapper child_int_col_wrapper{ 1, 2, 3, 4, 5 }; + * string_column_wrapper child_string_col_wrapper {"All", "the", "leaves", "are", "brown"}; + * + * struct_column_wrapper struct_column_wrapper{ + * {child_int_col_wrapper, child_string_col_wrapper} + * {1,0,1,0,1} // Validity. + * }; + * + * auto struct_col {struct_column_wrapper.release()}; + * @endcode + * + * @param child_columns_wrappers The list of child column wrappers + * @param validity The vector of bools representing the column validity values + */ structs_column_wrapper( std::initializer_list> child_column_wrappers, std::vector const& validity = {}) From bb93f7ab89b4c3e9b5b09f320e746d30e9a7267e Mon Sep 17 00:00:00 2001 From: Mithun RK Date: Tue, 11 Aug 2020 18:23:03 -0700 Subject: [PATCH 18/20] [struct] More review-related corrections 1. Added tests for deeper nesting. 2. Fixed license headers. 3. Added convenience methods to structs_column_wrapper. --- cpp/src/scalar/scalar_factories.cpp | 2 +- cpp/src/search/search.cu | 2 +- cpp/src/structs/structs_column_factories.cu | 2 +- cpp/src/structs/structs_column_view.cu | 2 +- cpp/tests/structs/structs_column_tests.cu | 110 ++++++++++++++++++++ cpp/tests/utilities/column_wrapper.hpp | 54 ++++++++++ 6 files changed, 168 insertions(+), 4 deletions(-) diff --git a/cpp/src/scalar/scalar_factories.cpp b/cpp/src/scalar/scalar_factories.cpp index 4d89557123d..a0d7082c905 100644 --- a/cpp/src/scalar/scalar_factories.cpp +++ b/cpp/src/scalar/scalar_factories.cpp @@ -104,7 +104,7 @@ std::unique_ptr default_scalar_functor::operator()() template <> std::unique_ptr default_scalar_functor::operator()() { - CUDF_FAIL("list_view type not supported"); + CUDF_FAIL("struct_view type not supported"); } } // namespace diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index 0938a406e7d..e31efdc54a7 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -295,7 +295,7 @@ std::unique_ptr multi_contains_dispatch::operator()( rmm::mr::device_memory_resource* mr, cudaStream_t stream) { - CUDF_FAIL("list_view type not supported"); + CUDF_FAIL("struct_view type not supported"); } std::unique_ptr contains(column_view const& haystack, diff --git a/cpp/src/structs/structs_column_factories.cu b/cpp/src/structs/structs_column_factories.cu index 94af05d1f93..2e239fce5f3 100644 --- a/cpp/src/structs/structs_column_factories.cu +++ b/cpp/src/structs/structs_column_factories.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/structs/structs_column_view.cu b/cpp/src/structs/structs_column_view.cu index 08370c2fde2..f9cb345de6f 100644 --- a/cpp/src/structs/structs_column_view.cu +++ b/cpp/src/structs/structs_column_view.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/tests/structs/structs_column_tests.cu b/cpp/tests/structs/structs_column_tests.cu index 4794878df1f..0b849a941f8 100644 --- a/cpp/tests/structs/structs_column_tests.cu +++ b/cpp/tests/structs/structs_column_tests.cu @@ -30,6 +30,8 @@ #include #include "cudf/column/column_factories.hpp" #include "cudf/detail/utilities/device_operators.cuh" +#include "cudf/null_mask.hpp" +#include "cudf/structs/structs_column_view.hpp" #include "cudf/table/table_view.hpp" #include "cudf/types.hpp" #include "cudf/utilities/error.hpp" @@ -427,6 +429,114 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestListsOfStructs) #endif } +TYPED_TEST(TypedStructColumnWrapperTest, ListOfStructOfList) +{ + using namespace cudf::test; + + auto list_col = lists_column_wrapper{ + {{0},{1},{},{3},{4},{5,5},{6},{},{8},{9}}, + cudf::test::make_counting_transform_iterator(0, [](auto i){return i%2;}) + }; + + // TODO: Struct cannot be compared with expect_columns_equal(), + // if the struct has null values. After lists support "equivalence" + // comparisons, the structs column needs to be modified to add nulls. + auto struct_of_lists_col = structs_column_wrapper{ {list_col} }.release(); + + auto list_of_struct_of_list_validity = make_counting_transform_iterator(0, [](auto i){ return i%3; }); + auto list_of_struct_of_list = cudf::make_lists_column( + 5, + std::move(fixed_width_column_wrapper{0,2,4,6,8,10}.release()), + std::move(struct_of_lists_col), + cudf::UNKNOWN_NULL_COUNT, + detail::make_null_mask(list_of_struct_of_list_validity, list_of_struct_of_list_validity+5) + ); + + // Compare with expected values. + + auto expected_level0_list = lists_column_wrapper{ + {{}, {1}, {}, {3}, {}, {5,5}, {}, {}, {}, {9}}, + make_counting_transform_iterator(0, [](auto i){return i%2;}) + }; + + auto expected_level2_struct = structs_column_wrapper{{expected_level0_list}}.release(); + + expect_columns_equivalent( + cudf::lists_column_view(*list_of_struct_of_list).child(), + *expected_level2_struct + ); + + auto expected_level3_list = cudf::make_lists_column( + 5, + std::move(fixed_width_column_wrapper{0,2,4,6,8,10}.release()), + std::move(expected_level2_struct), + cudf::UNKNOWN_NULL_COUNT, + detail::make_null_mask(list_of_struct_of_list_validity, list_of_struct_of_list_validity+5) + ); + + expect_columns_equivalent(*list_of_struct_of_list, *expected_level3_list); +} + +TYPED_TEST(TypedStructColumnWrapperTest, StructOfListOfStruct) +{ + using namespace cudf::test; + + auto ints_col = fixed_width_column_wrapper{ + {0,1,2,3,4,5,6,7,8,9}, + make_counting_transform_iterator(0, [](auto i){ return i%2; }) + }; + + auto structs_col = structs_column_wrapper{ + {ints_col}, + make_counting_transform_iterator(0, [](auto i){ return i < 6;}) // Last 4 structs are null. + }.release(); + + auto list_validity = make_counting_transform_iterator(0, [](auto i){ return i%3; }); + auto lists_col = cudf::make_lists_column( + 5, + std::move(fixed_width_column_wrapper{0,2,4,6,8,10}.release()), + std::move(structs_col), + cudf::UNKNOWN_NULL_COUNT, + detail::make_null_mask(list_validity, list_validity+5)); + + std::vector> cols; + cols.push_back(std::move(lists_col)); + auto struct_of_list_of_struct = structs_column_wrapper{std::move(cols)}.release(); + + // Check that the struct is constructed as expected. + + auto expected_ints_col = fixed_width_column_wrapper{ + {0,1,0,3,0,5,0,0,0,0}, + {0,1,0,1,0,1,0,0,0,0} + }; + + auto expected_structs_col = structs_column_wrapper{ + {expected_ints_col}, + {1,1,1,1,1,1,0,0,0,0} + }.release(); + + auto expected_lists_col = cudf::make_lists_column( + 5, + std::move(fixed_width_column_wrapper{0,2,4,6,8,10}.release()), + std::move(expected_structs_col), + cudf::UNKNOWN_NULL_COUNT, + detail::make_null_mask(list_validity, list_validity+5)); + + // Test that the lists child column is as expected. + cudf::test::expect_columns_equivalent( + *expected_lists_col, + struct_of_list_of_struct->child(0) + ); + + // Test that the outer struct column is as expected. + cols.clear(); + cols.push_back(std::move(expected_lists_col)); + cudf::test::expect_columns_equivalent( + *(structs_column_wrapper{std::move(cols)}.release()), + *struct_of_list_of_struct + ); +} + TEST_F(StructColumnWrapperTest, SimpleTestExpectStructColumnsEqual) { auto ints_col = cudf::test::fixed_width_column_wrapper{{0, 1}, {0, 0}}.release(); diff --git a/cpp/tests/utilities/column_wrapper.hpp b/cpp/tests/utilities/column_wrapper.hpp index ff65385e543..5872fb516d2 100644 --- a/cpp/tests/utilities/column_wrapper.hpp +++ b/cpp/tests/utilities/column_wrapper.hpp @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -1029,7 +1030,42 @@ class structs_column_wrapper : public detail::column_wrapper { init(std::move(child_columns), validity); } + /** + * @brief Constructs a struct column from the list of column wrappers for child columns. + * + * Example usage: + * @code{.cpp} + * // The following constructs a column for struct< int, string >. + * fixed_width_column_wrapper child_int_col_wrapper{ 1, 2, 3, 4, 5 }; + * string_column_wrapper child_string_col_wrapper {"All", "the", "leaves", "are", "brown"}; + * + * struct_column_wrapper struct_column_wrapper{ + * {child_int_col_wrapper, child_string_col_wrapper} + * cudf::test::make_counting_transform_iterator(0, [](auto i){ return i%2; }) // Validity. + * }; + * + * auto struct_col {struct_column_wrapper.release()}; + * @endcode + * + * @param child_columns_wrappers The list of child column wrappers + * @param validity Iterator returning the per-row validity bool + */ + template + structs_column_wrapper( + std::initializer_list> child_column_wrappers, + V validity_iter + ) { + std::vector> child_columns; + child_columns.reserve(child_column_wrappers.size()); + std::transform(child_column_wrappers.begin(), + child_column_wrappers.end(), + std::back_inserter(child_columns), + [&](auto column_wrapper) { return column_wrapper.get().release(); }); + init(std::move(child_columns), validity_iter); + } + private: + void init(std::vector>&& child_columns, std::vector const& validity) { @@ -1050,6 +1086,24 @@ class structs_column_wrapper : public detail::column_wrapper { validity.size() <= 0 ? rmm::device_buffer{0} : detail::make_null_mask(validity.begin(), validity.end())); } + + template + void init(std::vector>&& child_columns, + V validity_iterator) + { + size_type num_rows = child_columns.empty() ? 0 : child_columns[0]->size(); + + CUDF_EXPECTS(std::all_of(child_columns.begin(), + child_columns.end(), + [&](auto const& p_column) { return p_column->size() == num_rows; }), + "All struct member columns must have the same row count."); + + std::vector validity(num_rows); + std::copy(validity_iterator, validity_iterator+num_rows, validity.begin()); + + init(std::move(child_columns), validity); + } + }; } // namespace test From 906e04c862c3d1dbcce3cc8e749cb5270285444e Mon Sep 17 00:00:00 2001 From: Mithun RK Date: Tue, 11 Aug 2020 21:20:31 -0700 Subject: [PATCH 19/20] [struct] clang-format again. --- cpp/tests/structs/structs_column_tests.cu | 97 ++++++++++------------- cpp/tests/utilities/column_wrapper.hpp | 17 ++-- 2 files changed, 49 insertions(+), 65 deletions(-) diff --git a/cpp/tests/structs/structs_column_tests.cu b/cpp/tests/structs/structs_column_tests.cu index 0b849a941f8..865717b3abe 100644 --- a/cpp/tests/structs/structs_column_tests.cu +++ b/cpp/tests/structs/structs_column_tests.cu @@ -434,45 +434,40 @@ TYPED_TEST(TypedStructColumnWrapperTest, ListOfStructOfList) using namespace cudf::test; auto list_col = lists_column_wrapper{ - {{0},{1},{},{3},{4},{5,5},{6},{},{8},{9}}, - cudf::test::make_counting_transform_iterator(0, [](auto i){return i%2;}) - }; + {{0}, {1}, {}, {3}, {4}, {5, 5}, {6}, {}, {8}, {9}}, + cudf::test::make_counting_transform_iterator(0, [](auto i) { return i % 2; })}; // TODO: Struct cannot be compared with expect_columns_equal(), // if the struct has null values. After lists support "equivalence" // comparisons, the structs column needs to be modified to add nulls. - auto struct_of_lists_col = structs_column_wrapper{ {list_col} }.release(); + auto struct_of_lists_col = structs_column_wrapper{{list_col}}.release(); - auto list_of_struct_of_list_validity = make_counting_transform_iterator(0, [](auto i){ return i%3; }); + auto list_of_struct_of_list_validity = + make_counting_transform_iterator(0, [](auto i) { return i % 3; }); auto list_of_struct_of_list = cudf::make_lists_column( - 5, - std::move(fixed_width_column_wrapper{0,2,4,6,8,10}.release()), + 5, + std::move(fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release()), std::move(struct_of_lists_col), cudf::UNKNOWN_NULL_COUNT, - detail::make_null_mask(list_of_struct_of_list_validity, list_of_struct_of_list_validity+5) - ); + detail::make_null_mask(list_of_struct_of_list_validity, list_of_struct_of_list_validity + 5)); // Compare with expected values. auto expected_level0_list = lists_column_wrapper{ - {{}, {1}, {}, {3}, {}, {5,5}, {}, {}, {}, {9}}, - make_counting_transform_iterator(0, [](auto i){return i%2;}) - }; + {{}, {1}, {}, {3}, {}, {5, 5}, {}, {}, {}, {9}}, + make_counting_transform_iterator(0, [](auto i) { return i % 2; })}; auto expected_level2_struct = structs_column_wrapper{{expected_level0_list}}.release(); - expect_columns_equivalent( - cudf::lists_column_view(*list_of_struct_of_list).child(), - *expected_level2_struct - ); + expect_columns_equivalent(cudf::lists_column_view(*list_of_struct_of_list).child(), + *expected_level2_struct); auto expected_level3_list = cudf::make_lists_column( - 5, - std::move(fixed_width_column_wrapper{0,2,4,6,8,10}.release()), + 5, + std::move(fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release()), std::move(expected_level2_struct), cudf::UNKNOWN_NULL_COUNT, - detail::make_null_mask(list_of_struct_of_list_validity, list_of_struct_of_list_validity+5) - ); + detail::make_null_mask(list_of_struct_of_list_validity, list_of_struct_of_list_validity + 5)); expect_columns_equivalent(*list_of_struct_of_list, *expected_level3_list); } @@ -482,22 +477,23 @@ TYPED_TEST(TypedStructColumnWrapperTest, StructOfListOfStruct) using namespace cudf::test; auto ints_col = fixed_width_column_wrapper{ - {0,1,2,3,4,5,6,7,8,9}, - make_counting_transform_iterator(0, [](auto i){ return i%2; }) - }; + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + make_counting_transform_iterator(0, [](auto i) { return i % 2; })}; - auto structs_col = structs_column_wrapper{ - {ints_col}, - make_counting_transform_iterator(0, [](auto i){ return i < 6;}) // Last 4 structs are null. - }.release(); + auto structs_col = + structs_column_wrapper{ + {ints_col}, + make_counting_transform_iterator(0, [](auto i) { return i < 6; }) // Last 4 structs are null. + } + .release(); - auto list_validity = make_counting_transform_iterator(0, [](auto i){ return i%3; }); - auto lists_col = cudf::make_lists_column( - 5, - std::move(fixed_width_column_wrapper{0,2,4,6,8,10}.release()), - std::move(structs_col), - cudf::UNKNOWN_NULL_COUNT, - detail::make_null_mask(list_validity, list_validity+5)); + auto list_validity = make_counting_transform_iterator(0, [](auto i) { return i % 3; }); + auto lists_col = cudf::make_lists_column( + 5, + std::move(fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release()), + std::move(structs_col), + cudf::UNKNOWN_NULL_COUNT, + detail::make_null_mask(list_validity, list_validity + 5)); std::vector> cols; cols.push_back(std::move(lists_col)); @@ -505,36 +501,27 @@ TYPED_TEST(TypedStructColumnWrapperTest, StructOfListOfStruct) // Check that the struct is constructed as expected. - auto expected_ints_col = fixed_width_column_wrapper{ - {0,1,0,3,0,5,0,0,0,0}, - {0,1,0,1,0,1,0,0,0,0} - }; + auto expected_ints_col = fixed_width_column_wrapper{{0, 1, 0, 3, 0, 5, 0, 0, 0, 0}, + {0, 1, 0, 1, 0, 1, 0, 0, 0, 0}}; - auto expected_structs_col = structs_column_wrapper{ - {expected_ints_col}, - {1,1,1,1,1,1,0,0,0,0} - }.release(); + auto expected_structs_col = + structs_column_wrapper{{expected_ints_col}, {1, 1, 1, 1, 1, 1, 0, 0, 0, 0}}.release(); auto expected_lists_col = cudf::make_lists_column( - 5, - std::move(fixed_width_column_wrapper{0,2,4,6,8,10}.release()), - std::move(expected_structs_col), - cudf::UNKNOWN_NULL_COUNT, - detail::make_null_mask(list_validity, list_validity+5)); + 5, + std::move(fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release()), + std::move(expected_structs_col), + cudf::UNKNOWN_NULL_COUNT, + detail::make_null_mask(list_validity, list_validity + 5)); // Test that the lists child column is as expected. - cudf::test::expect_columns_equivalent( - *expected_lists_col, - struct_of_list_of_struct->child(0) - ); + cudf::test::expect_columns_equivalent(*expected_lists_col, struct_of_list_of_struct->child(0)); // Test that the outer struct column is as expected. cols.clear(); cols.push_back(std::move(expected_lists_col)); - cudf::test::expect_columns_equivalent( - *(structs_column_wrapper{std::move(cols)}.release()), - *struct_of_list_of_struct - ); + cudf::test::expect_columns_equivalent(*(structs_column_wrapper{std::move(cols)}.release()), + *struct_of_list_of_struct); } TEST_F(StructColumnWrapperTest, SimpleTestExpectStructColumnsEqual) diff --git a/cpp/tests/utilities/column_wrapper.hpp b/cpp/tests/utilities/column_wrapper.hpp index 5872fb516d2..90021c486f8 100644 --- a/cpp/tests/utilities/column_wrapper.hpp +++ b/cpp/tests/utilities/column_wrapper.hpp @@ -1050,22 +1050,21 @@ class structs_column_wrapper : public detail::column_wrapper { * @param child_columns_wrappers The list of child column wrappers * @param validity Iterator returning the per-row validity bool */ - template + template structs_column_wrapper( std::initializer_list> child_column_wrappers, - V validity_iter - ) { + V validity_iter) + { std::vector> child_columns; child_columns.reserve(child_column_wrappers.size()); std::transform(child_column_wrappers.begin(), child_column_wrappers.end(), std::back_inserter(child_columns), [&](auto column_wrapper) { return column_wrapper.get().release(); }); - init(std::move(child_columns), validity_iter); + init(std::move(child_columns), validity_iter); } private: - void init(std::vector>&& child_columns, std::vector const& validity) { @@ -1087,9 +1086,8 @@ class structs_column_wrapper : public detail::column_wrapper { : detail::make_null_mask(validity.begin(), validity.end())); } - template - void init(std::vector>&& child_columns, - V validity_iterator) + template + void init(std::vector>&& child_columns, V validity_iterator) { size_type num_rows = child_columns.empty() ? 0 : child_columns[0]->size(); @@ -1099,11 +1097,10 @@ class structs_column_wrapper : public detail::column_wrapper { "All struct member columns must have the same row count."); std::vector validity(num_rows); - std::copy(validity_iterator, validity_iterator+num_rows, validity.begin()); + std::copy(validity_iterator, validity_iterator + num_rows, validity.begin()); init(std::move(child_columns), validity); } - }; } // namespace test From 33808224b162064526facc7dfaee984a40089686 Mon Sep 17 00:00:00 2001 From: Mithun RK Date: Tue, 11 Aug 2020 22:31:56 -0700 Subject: [PATCH 20/20] [struct] Added tests for empty columns. --- cpp/tests/structs/structs_column_tests.cu | 57 +++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/cpp/tests/structs/structs_column_tests.cu b/cpp/tests/structs/structs_column_tests.cu index 865717b3abe..a33170340f9 100644 --- a/cpp/tests/structs/structs_column_tests.cu +++ b/cpp/tests/structs/structs_column_tests.cu @@ -524,6 +524,63 @@ TYPED_TEST(TypedStructColumnWrapperTest, StructOfListOfStruct) *struct_of_list_of_struct); } +TYPED_TEST(TypedStructColumnWrapperTest, EmptyColumnsOfStructs) +{ + using namespace cudf::test; + + { + // Empty struct column. + auto empty_struct_column = structs_column_wrapper{}.release(); + EXPECT_TRUE(empty_struct_column->num_children() == 0); + EXPECT_TRUE(empty_struct_column->size() == 0); + EXPECT_TRUE(empty_struct_column->null_count() == 0); + } + + { + // Empty struct column. + auto empty_list_column = lists_column_wrapper{}; + auto struct_column = structs_column_wrapper{{empty_list_column}}.release(); + EXPECT_TRUE(struct_column->num_children() == 1); + EXPECT_TRUE(struct_column->size() == 0); + EXPECT_TRUE(struct_column->null_count() == 0); + + auto empty_list_of_structs = + cudf::make_lists_column(0, + fixed_width_column_wrapper{0}.release(), + std::move(struct_column), + cudf::UNKNOWN_NULL_COUNT, + {}); + + EXPECT_TRUE(empty_list_of_structs->size() == 0); + EXPECT_TRUE(empty_list_of_structs->null_count() == 0); + + auto child_struct_column = cudf::lists_column_view(*empty_list_of_structs).child(); + EXPECT_TRUE(child_struct_column.num_children() == 1); + EXPECT_TRUE(child_struct_column.size() == 0); + EXPECT_TRUE(child_struct_column.null_count() == 0); + } + + // TODO: Uncomment test after adding support to compare empty + // lists whose child columns may not be empty. + // { + // auto non_empty_column_of_numbers = + // fixed_width_column_wrapper{1,2,3,4,5}.release(); + // + // auto list_offsets = + // fixed_width_column_wrapper{0}.release(); + // + // auto empty_list_column = + // cudf::make_lists_column( + // 0, std::move(list_offsets), std::move(non_empty_column_of_numbers), 0, {}); + // + // expect_columns_equivalent(*lists_column_wrapper{}.release(), *empty_list_column); + // auto struct_column = structs_column_wrapper{{empty_list_column}}.release(); + // EXPECT_TRUE(struct_column->num_children() == 1); + // EXPECT_TRUE(struct_column->size() == 0); + // EXPECT_TRUE(struct_column->null_count() == 0); + // } +} + TEST_F(StructColumnWrapperTest, SimpleTestExpectStructColumnsEqual) { auto ints_col = cudf::test::fixed_width_column_wrapper{{0, 1}, {0, 0}}.release();