From 61dec86d02f4dd801afcd8425de32b38ed90cbcd Mon Sep 17 00:00:00 2001 From: Dave Baranec Date: Tue, 11 May 2021 18:42:06 -0500 Subject: [PATCH 1/4] struct_scalar support --- cpp/CMakeLists.txt | 2 +- cpp/include/cudf/scalar/scalar.hpp | 54 ++++++++ cpp/include/cudf/scalar/scalar_factories.hpp | 30 ++++- .../detail/structs_column_factories.hpp | 122 ++++++++++++++++++ cpp/include/cudf/types.hpp | 3 +- ...lumn_factories.cpp => column_factories.cu} | 17 ++- cpp/src/scalar/scalar.cpp | 49 +++++++ cpp/src/scalar/scalar_factories.cpp | 16 ++- cpp/src/structs/structs_column_factories.cu | 67 ++-------- cpp/src/structs/utilities.cu | 50 +++++++ cpp/src/structs/utilities.hpp | 19 +++ cpp/tests/column/factories_test.cpp | 34 ++++- cpp/tests/scalar/factories_test.cpp | 35 ++++- cpp/tests/scalar/scalar_test.cpp | 94 +++++++++++++- 14 files changed, 523 insertions(+), 69 deletions(-) create mode 100644 cpp/include/cudf/structs/detail/structs_column_factories.hpp rename cpp/src/column/{column_factories.cpp => column_factories.cu} (93%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 198690e37ff..75789b10443 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -157,7 +157,7 @@ add_library(cudf src/bitmask/null_mask.cu src/column/column.cu src/column/column_device_view.cu - src/column/column_factories.cpp + src/column/column_factories.cu src/column/column_view.cpp src/comms/ipc/ipc.cpp src/copying/concatenate.cu diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index 3de8762c763..a5b4ee4a2ab 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -572,5 +573,58 @@ class list_scalar : public scalar { cudf::column _data; }; +/** + * @brief An owning class to represent a struct value in device memory + */ +class struct_scalar : public scalar { + public: + struct_scalar(); + ~struct_scalar() = default; + struct_scalar(struct_scalar&& other) = default; + struct_scalar(struct_scalar const& other) = default; + struct_scalar& operator=(struct_scalar const& other) = delete; + struct_scalar& operator=(struct_scalar&& other) = delete; + + /** + * @brief Construct a new struct scalar object from table_view + * + * The input table_view is deep-copied. + * + * @param data The table data to copy. + * @param is_valid Whether the value held by the scalar is valid + * @param stream CUDA stream used for device memory operations. + * @param mr Device memory resource to use for device memory allocation + */ + struct_scalar(table_view const& data, + bool is_valid = true, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + + /** + * @brief Construct a new struct scalar object from a host_span of column_views + * + * The input column_views are deep-copied. + * + * @param data The column_views to copy. + * @param is_valid Whether the value held by the scalar is valid + * @param stream CUDA stream used for device memory operations. + * @param mr Device memory resource to use for device memory allocation + */ + struct_scalar(host_span data, + bool is_valid = true, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + + /** + * @brief Returns a non-owning, immutable view to underlying device data + */ + table_view view() const; + + private: + table _data; + + void superimpose_nulls(rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +}; + /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/scalar/scalar_factories.hpp b/cpp/include/cudf/scalar/scalar_factories.hpp index a0a0a22091e..f9c13193722 100644 --- a/cpp/include/cudf/scalar/scalar_factories.hpp +++ b/cpp/include/cudf/scalar/scalar_factories.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -156,5 +156,33 @@ std::unique_ptr make_fixed_point_scalar( return std::make_unique>(value, scale, true, stream, mr); } +/** + * @brief Construct a struct scalar using the given table_view. + * + * The columns must have 1 row. + * + * @param data The columnar data to store in the scalar object + * @param stream CUDA stream used for device memory operations. + * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool. + */ +std::unique_ptr make_struct_scalar( + table_view const& data, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Construct a struct scalar using the given span of column views. + * + * The columns must have 1 row. + * + * @param value The columnar data to store in the scalar object + * @param stream CUDA stream used for device memory operations. + * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool. + */ +std::unique_ptr make_struct_scalar( + host_span data, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/structs/detail/structs_column_factories.hpp b/cpp/include/cudf/structs/detail/structs_column_factories.hpp new file mode 100644 index 00000000000..fda97222c73 --- /dev/null +++ b/cpp/include/cudf/structs/detail/structs_column_factories.hpp @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +#include + +#include +#include +namespace cudf { +namespace de + + // Helper function to superimpose validity of parent struct + // over the specified member (child) column. + void + superimpose_parent_nullmask(bitmask_type const* parent_null_mask, + std::size_t parent_null_mask_size, + size_type parent_null_count, + column& child, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + if (!child.nullable()) { + // Child currently has no null mask. Copy parent's null mask. + child.set_null_mask(rmm::device_buffer{parent_null_mask, parent_null_mask_size, stream, mr}); + child.set_null_count(parent_null_count); + } else { + // Child should have a null mask. + // `AND` the child's null mask with the parent's. + + auto current_child_mask = child.mutable_view().null_mask(); + + std::vector masks{ + reinterpret_cast(parent_null_mask), + reinterpret_cast(current_child_mask)}; + std::vector begin_bits{0, 0}; + cudf::detail::inplace_bitmask_and( + device_span(current_child_mask, num_bitmask_words(child.size())), + masks, + begin_bits, + child.size(), + stream, + mr); + child.set_null_count(UNKNOWN_NULL_COUNT); + } + + // If the child is also a struct, repeat for all grandchildren. + if (child.type().id() == cudf::type_id::STRUCT) { + const auto current_child_mask = child.mutable_view().null_mask(); + std::for_each(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(child.num_children()), + [¤t_child_mask, &child, parent_null_mask_size, stream, mr](auto i) { + superimpose_parent_nullmask(current_child_mask, + parent_null_mask_size, + UNKNOWN_NULL_COUNT, + child.child(i), + stream, + mr); + }); + } +} // namespace + // devoidsuperimpose_parent_nullmask(bitmask_typeconst*parent_null_mask,std::size_tparent_null_mask_size,size_typeparent_null_count,column&child,rmm::cuda_stream_viewstream,rmm::mr::device_memory_resource*mr) + // devoidsuperimpose_parent_nullmask(bitmask_typeconst*parent_null_mask,std::size_tparent_null_mask_size,size_typeparent_null_count,column&child,rmm::cuda_stream_viewstream,rmm::mr::device_memory_resource*mr) + // devoidsuperimpose_parent_nullmask(bitmask_typeconst*parent_null_mask,std::size_tparent_null_mask_size,size_typeparent_null_count,column&child,rmm::cuda_stream_viewstream,rmm::mr::device_memory_resource*mr) + // devoidsuperimpose_parent_nullmask(bitmask_typeconst*parent_null_mask,std::size_tparent_null_mask_size,size_typeparent_null_count,column&child,rmm::cuda_stream_viewstream,rmm::mr::device_memory_resource*mr) +} // namespace cudf + +/// Column factory that adopts child columns. +std::unique_ptr make_structs_column( + size_type num_rows, + std::vector>&& child_columns, + size_type null_count, + rmm::device_buffer&& null_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_EXPECTS(null_count <= 0 || !null_mask.is_empty(), + "Struct column with nulls must be nullable."); + + CUDF_EXPECTS(std::all_of(child_columns.begin(), + child_columns.end(), + [&](auto const& child_col) { return num_rows == child_col->size(); }), + "Child columns must have the same number of rows as the Struct column."); + + if (!null_mask.is_empty()) { + for (auto& child : child_columns) { + superimpose_parent_nullmask(static_cast(null_mask.data()), + null_mask.size(), + null_count, + *child, + stream, + mr); + } + } + + return std::make_unique( + cudf::data_type{type_id::STRUCT}, + num_rows, + rmm::device_buffer{0, stream, mr}, // Empty data buffer. Structs hold no data. + null_mask, + null_count, + std::move(child_columns)); +} + +} // namespace cudf diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index b08fccc0d66..ddba575cb07 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -61,6 +61,7 @@ class scalar; // clang-format off class list_scalar; +class struct_scalar; class string_scalar; template class numeric_scalar; template class fixed_point_scalar; @@ -74,8 +75,6 @@ template class timestamp_scalar_device_view; template class duration_scalar_device_view; // clang-format on -class struct_scalar; - class table; class table_view; class mutable_table_view; diff --git a/cpp/src/column/column_factories.cpp b/cpp/src/column/column_factories.cu similarity index 93% rename from cpp/src/column/column_factories.cpp rename to cpp/src/column/column_factories.cu index 03339c2e0a8..d009db2d5b4 100644 --- a/cpp/src/column/column_factories.cpp +++ b/cpp/src/column/column_factories.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -230,8 +231,18 @@ std::unique_ptr column_from_scalar_dispatch::operator() const&>(value); + auto iter = thrust::make_constant_iterator(0); + + auto children = detail::gather(ss.view(), iter, iter + size, out_of_bounds_policy::NULLIFY, stream, mr); + auto const is_valid = ss.is_valid(); + return make_structs_column(size, + std::move(children->release()), + is_valid ? 0 : size, + is_valid ? rmm::device_buffer{} : detail::create_null_mask(size, mask_state::ALL_NULL, stream, mr), + stream, + mr); } std::unique_ptr make_column_from_scalar(scalar const& s, diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp index 858d2c063b3..9aa9f074dc6 100644 --- a/cpp/src/scalar/scalar.cpp +++ b/cpp/src/scalar/scalar.cpp @@ -14,7 +14,10 @@ * limitations under the License. */ +#include "structs/utilities.hpp" + #include +#include #include #include #include @@ -446,4 +449,50 @@ list_scalar::list_scalar(cudf::column&& data, column_view list_scalar::view() const { return _data.view(); } +struct_scalar::struct_scalar() : scalar(data_type(type_id::STRUCT)) {} + +struct_scalar::struct_scalar(table_view const& data, + bool is_valid, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + : scalar(data_type(type_id::STRUCT), is_valid, stream, mr), _data(data, stream, mr) +{ + CUDF_EXPECTS( + std::all_of(data.begin(), data.end(), [](column_view const& col) { return col.size() == 1; }), + "Struct scalar inputs must have exactly 1 row"); + + // validity pushdown + if (!is_valid) { superimpose_nulls(stream, mr); } +} + +struct_scalar::struct_scalar(host_span data, + bool is_valid, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + : scalar(data_type(type_id::STRUCT), is_valid, stream, mr), + _data(table_view{std::vector{data.begin(), data.end()}}, stream, mr) +{ + CUDF_EXPECTS( + std::all_of(data.begin(), data.end(), [](column_view const& col) { return col.size() == 1; }), + "Struct scalar inputs must have exactly 1 row"); + + // validity pushdown + if (!is_valid) { superimpose_nulls(stream, mr); } +} + +table_view struct_scalar::view() const { return _data.view(); } + +void struct_scalar::superimpose_nulls(rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + // push validity mask down + std::vector host_validity({0}); + auto validity = cudf::detail::make_device_uvector_sync(host_validity, stream, mr); + auto iter = thrust::make_counting_iterator(0); + std::for_each(iter, iter + _data.num_columns(), [&](size_type i) { + cudf::structs::detail::superimpose_parent_nulls( + validity.data(), 1, 1, _data.get_column(i), stream, mr); + }); +} + } // namespace cudf diff --git a/cpp/src/scalar/scalar_factories.cpp b/cpp/src/scalar/scalar_factories.cpp index 5714eaee864..4b9e7889970 100644 --- a/cpp/src/scalar/scalar_factories.cpp +++ b/cpp/src/scalar/scalar_factories.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -97,6 +97,20 @@ std::unique_ptr make_fixed_width_scalar(data_type type, return type_dispatcher(type, scalar_construction_helper{}, stream, mr); } +std::unique_ptr make_struct_scalar(table_view const& data, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return std::make_unique(data, true, stream, mr); +} + +std::unique_ptr make_struct_scalar(host_span data, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return std::make_unique(data, true, stream, mr); +} + namespace { struct default_scalar_functor { template diff --git a/cpp/src/structs/structs_column_factories.cu b/cpp/src/structs/structs_column_factories.cu index 330cecd1815..e8a529e228c 100644 --- a/cpp/src/structs/structs_column_factories.cu +++ b/cpp/src/structs/structs_column_factories.cu @@ -14,10 +14,10 @@ * limitations under the License. */ +#include "structs/utilities.hpp" + #include -#include #include - #include #include @@ -25,56 +25,6 @@ #include #include namespace cudf { -namespace { -// Helper function to superimpose validity of parent struct -// over the specified member (child) column. -void superimpose_parent_nullmask(bitmask_type const* parent_null_mask, - std::size_t parent_null_mask_size, - size_type parent_null_count, - column& child, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - if (!child.nullable()) { - // Child currently has no null mask. Copy parent's null mask. - child.set_null_mask(rmm::device_buffer{parent_null_mask, parent_null_mask_size, stream, mr}); - child.set_null_count(parent_null_count); - } else { - // Child should have a null mask. - // `AND` the child's null mask with the parent's. - - auto current_child_mask = child.mutable_view().null_mask(); - - std::vector masks{ - reinterpret_cast(parent_null_mask), - reinterpret_cast(current_child_mask)}; - std::vector begin_bits{0, 0}; - cudf::detail::inplace_bitmask_and( - device_span(current_child_mask, num_bitmask_words(child.size())), - masks, - begin_bits, - child.size(), - stream, - mr); - child.set_null_count(UNKNOWN_NULL_COUNT); - } - - // If the child is also a struct, repeat for all grandchildren. - if (child.type().id() == cudf::type_id::STRUCT) { - const auto current_child_mask = child.mutable_view().null_mask(); - std::for_each(thrust::make_counting_iterator(0), - thrust::make_counting_iterator(child.num_children()), - [¤t_child_mask, &child, parent_null_mask_size, stream, mr](auto i) { - superimpose_parent_nullmask(current_child_mask, - parent_null_mask_size, - UNKNOWN_NULL_COUNT, - child.child(i), - stream, - mr); - }); - } -} -} // namespace /// Column factory that adopts child columns. std::unique_ptr make_structs_column( @@ -95,12 +45,13 @@ std::unique_ptr make_structs_column( if (!null_mask.is_empty()) { for (auto& child : child_columns) { - superimpose_parent_nullmask(static_cast(null_mask.data()), - null_mask.size(), - null_count, - *child, - stream, - mr); + cudf::structs::detail::superimpose_parent_nulls( + static_cast(null_mask.data()), + null_mask.size(), + null_count, + *child, + stream, + mr); } } diff --git a/cpp/src/structs/utilities.cu b/cpp/src/structs/utilities.cu index 0e944cd975c..56b9ca392d0 100644 --- a/cpp/src/structs/utilities.cu +++ b/cpp/src/structs/utilities.cu @@ -16,6 +16,7 @@ #include +#include #include #include #include @@ -167,6 +168,55 @@ flatten_nested_columns(table_view const& input, return flattened_table{input, column_order, null_precedence, nullability}(); } +// Helper function to superimpose validity of parent struct +// over the specified member (child) column. +void superimpose_parent_nulls(bitmask_type const* parent_null_mask, + std::size_t parent_null_mask_size, + size_type parent_null_count, + column& child, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + if (!child.nullable()) { + // Child currently has no null mask. Copy parent's null mask. + child.set_null_mask(rmm::device_buffer{parent_null_mask, parent_null_mask_size, stream, mr}); + child.set_null_count(parent_null_count); + } else { + // Child should have a null mask. + // `AND` the child's null mask with the parent's. + + auto current_child_mask = child.mutable_view().null_mask(); + + std::vector masks{ + reinterpret_cast(parent_null_mask), + reinterpret_cast(current_child_mask)}; + std::vector begin_bits{0, 0}; + cudf::detail::inplace_bitmask_and( + device_span(current_child_mask, num_bitmask_words(child.size())), + masks, + begin_bits, + child.size(), + stream, + mr); + child.set_null_count(UNKNOWN_NULL_COUNT); + } + + // If the child is also a struct, repeat for all grandchildren. + if (child.type().id() == cudf::type_id::STRUCT) { + const auto current_child_mask = child.mutable_view().null_mask(); + std::for_each(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(child.num_children()), + [¤t_child_mask, &child, parent_null_mask_size, stream, mr](auto i) { + superimpose_parent_nulls(current_child_mask, + parent_null_mask_size, + UNKNOWN_NULL_COUNT, + child.child(i), + stream, + mr); + }); + } +} + } // namespace detail } // namespace structs } // namespace cudf diff --git a/cpp/src/structs/utilities.hpp b/cpp/src/structs/utilities.hpp index ddd28fe70be..ac246e3d0ab 100644 --- a/cpp/src/structs/utilities.hpp +++ b/cpp/src/structs/utilities.hpp @@ -76,6 +76,25 @@ flatten_nested_columns(table_view const& input, std::vector const& null_precedence, column_nullability nullability = column_nullability::MATCH_INCOMING); +/** + * @brief Pushdown nulls from a parent mask into a child column, using AND. + * + * This function will recurse through all struct descendants. + * + * @param parent_null_mask The mask to be applied to descendants + * @param parent_null_mask_size Size (in rows) of the null mask + * @param parent_null_count Null count in the null mask + * @param column Column to apply the null mask to. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate new device memory. + */ +void superimpose_parent_nulls(bitmask_type const* parent_null_mask, + std::size_t parent_null_mask_size, + size_type parent_null_count, + column& child, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + } // namespace detail } // namespace structs } // namespace cudf diff --git a/cpp/tests/column/factories_test.cpp b/cpp/tests/column/factories_test.cpp index d30929b90c6..683611ef81b 100644 --- a/cpp/tests/column/factories_test.cpp +++ b/cpp/tests/column/factories_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -460,3 +461,34 @@ TEST_F(ColumnFactoryTest, DictionaryFromStringScalarError) cudf::string_scalar value("hello", false); EXPECT_THROW(cudf::make_dictionary_from_scalar(value, 1), cudf::logic_error); } + +void struct_from_scalar(bool is_valid) +{ + using LCW = cudf::test::lists_column_wrapper; + + cudf::test::fixed_width_column_wrapper col0{1}; + cudf::test::strings_column_wrapper col1{"abc"}; + cudf::test::lists_column_wrapper col2{{1, 2, 3}}; + cudf::test::lists_column_wrapper col3{LCW{}}; + + std::vector src_children({col0, col1, col2, col3}); + auto value = cudf::struct_scalar(src_children, is_valid); + cudf::test::structs_column_wrapper struct_col({col0, col1, col2, col3}, {is_valid}); + + auto const num_rows = 32; + auto result = cudf::make_column_from_scalar(value, num_rows); + + // generate a column of size num_rows + std::vector cols; + auto iter = thrust::make_counting_iterator(0); + std::transform(iter, iter + num_rows, std::back_inserter(cols), [&](int i) { + return static_cast(struct_col); + }); + auto expected = cudf::concatenate(cols); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, *expected); +} + +TEST_F(ColumnFactoryTest, FromStructScalar) { struct_from_scalar(true); } + +TEST_F(ColumnFactoryTest, FromStructScalarNull) { struct_from_scalar(false); } \ No newline at end of file diff --git a/cpp/tests/scalar/factories_test.cpp b/cpp/tests/scalar/factories_test.cpp index fd0a92a0168..a3a218fe82c 100644 --- a/cpp/tests/scalar/factories_test.cpp +++ b/cpp/tests/scalar/factories_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,8 @@ */ #include +#include +#include #include #include @@ -161,4 +163,35 @@ TYPED_TEST(FixedPointScalarFactory, ValueProvided) EXPECT_TRUE(s->is_valid()); } +struct StructScalarFactory : public ScalarFactoryTest { +}; + +TEST_F(StructScalarFactory, Basic) +{ + cudf::test::fixed_width_column_wrapper col0{1}; + cudf::test::strings_column_wrapper col1{"abc"}; + cudf::test::lists_column_wrapper col2{{1, 2, 3}}; + cudf::test::structs_column_wrapper struct_col({col0, col1, col2}); + cudf::column_view cv = static_cast(struct_col); + std::vector children(cv.child_begin(), cv.child_end()); + + // table_view constructor + { + auto sc = cudf::make_struct_scalar(cudf::table_view{children}); + auto s = static_cast*>(sc.get()); + auto sview = s->view(); + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view{children}, s->view()); + } + + // host_span constructor + { + auto sc = cudf::make_struct_scalar(cudf::host_span{children}); + auto s = static_cast*>(sc.get()); + auto sview = s->view(); + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view{children}, s->view()); + } +} + CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/scalar/scalar_test.cpp b/cpp/tests/scalar/scalar_test.cpp index b869569bea3..871f9c570d1 100644 --- a/cpp/tests/scalar/scalar_test.cpp +++ b/cpp/tests/scalar/scalar_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -231,4 +232,95 @@ TEST_F(ListScalarTest, MoveConstructorNested) EXPECT_EQ(s.view().num_children(), 0); } +struct StructScalarTest : public cudf::test::BaseFixture { +}; + +TEST_F(StructScalarTest, Basic) +{ + cudf::test::fixed_width_column_wrapper col0{1}; + cudf::test::strings_column_wrapper col1{"abc"}; + cudf::test::lists_column_wrapper col2{{1, 2, 3}}; + cudf::test::structs_column_wrapper struct_col({col0, col1, col2}); + cudf::column_view cv = static_cast(struct_col); + std::vector children(cv.child_begin(), cv.child_end()); + + // table_view constructor + { + auto s = cudf::struct_scalar(children, true); + auto sview = s.view(); + EXPECT_TRUE(s.is_valid()); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view{children}, s.view()); + } + + // host_span constructor + { + auto s = cudf::struct_scalar(cudf::host_span{children}, true); + auto sview = s.view(); + EXPECT_TRUE(s.is_valid()); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view{children}, s.view()); + } +} + +TEST_F(StructScalarTest, BasicNulls) +{ + cudf::test::fixed_width_column_wrapper col0{1}; + cudf::test::strings_column_wrapper col1{"abc"}; + cudf::test::lists_column_wrapper col2{{1, 2, 3}}; + std::vector src_children({col0, col1, col2}); + + std::vector> src_columns; + + // structs_column_wrapper takes ownership of the incoming columns, so make a copy + src_columns.push_back(std::make_unique(src_children[0])); + src_columns.push_back(std::make_unique(src_children[1])); + src_columns.push_back(std::make_unique(src_children[2])); + cudf::test::structs_column_wrapper valid_struct_col(std::move(src_columns), {1}); + cudf::column_view vcv = static_cast(valid_struct_col); + std::vector valid_children(vcv.child_begin(), vcv.child_end()); + + // structs_column_wrapper takes ownership of the incoming columns, so make a copy + src_columns.push_back(std::make_unique(src_children[0])); + src_columns.push_back(std::make_unique(src_children[1])); + src_columns.push_back(std::make_unique(src_children[2])); + cudf::test::structs_column_wrapper invalid_struct_col(std::move(src_columns), {0}); + cudf::column_view icv = static_cast(invalid_struct_col); + std::vector invalid_children(icv.child_begin(), icv.child_end()); + + // table_view constructor + { + auto s = cudf::struct_scalar(cudf::table_view{src_children}, true); + auto sview = s.view(); + EXPECT_TRUE(s.is_valid()); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view{valid_children}, s.view()); + } + // host_span constructor + { + auto s = cudf::struct_scalar(cudf::host_span{src_children}, true); + auto sview = s.view(); + EXPECT_TRUE(s.is_valid()); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view{valid_children}, s.view()); + } + + // with nulls, we expect the incoming children to get nullified by passing false to + // the scalar constructor itself. so we use the unmodified `children` as the input, but + // we compare against the modified `invalid_children` produced by the source column as + // proof that the scalar did the validity pushdown. + + // table_view constructor + { + auto s = cudf::struct_scalar(cudf::table_view{src_children}, false); + auto sview = s.view(); + EXPECT_TRUE(!s.is_valid()); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view{invalid_children}, s.view()); + } + + // host_span constructor + { + auto s = cudf::struct_scalar(cudf::host_span{src_children}, false); + auto sview = s.view(); + EXPECT_TRUE(!s.is_valid()); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view{invalid_children}, s.view()); + } +} + CUDF_TEST_PROGRAM_MAIN() From a3ec7f25628d76b6ec6dc10c246ce6e4bb19c8c7 Mon Sep 17 00:00:00 2001 From: Dave Baranec Date: Tue, 11 May 2021 18:59:42 -0500 Subject: [PATCH 2/4] Remove a rogue file that somehow got committed. --- .../detail/structs_column_factories.hpp | 122 ------------------ cpp/tests/scalar/factories_test.cpp | 8 +- 2 files changed, 4 insertions(+), 126 deletions(-) delete mode 100644 cpp/include/cudf/structs/detail/structs_column_factories.hpp diff --git a/cpp/include/cudf/structs/detail/structs_column_factories.hpp b/cpp/include/cudf/structs/detail/structs_column_factories.hpp deleted file mode 100644 index fda97222c73..00000000000 --- a/cpp/include/cudf/structs/detail/structs_column_factories.hpp +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -#include - -#include - -#include -#include -namespace cudf { -namespace de - - // Helper function to superimpose validity of parent struct - // over the specified member (child) column. - void - superimpose_parent_nullmask(bitmask_type const* parent_null_mask, - std::size_t parent_null_mask_size, - size_type parent_null_count, - column& child, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - if (!child.nullable()) { - // Child currently has no null mask. Copy parent's null mask. - child.set_null_mask(rmm::device_buffer{parent_null_mask, parent_null_mask_size, stream, mr}); - child.set_null_count(parent_null_count); - } else { - // Child should have a null mask. - // `AND` the child's null mask with the parent's. - - auto current_child_mask = child.mutable_view().null_mask(); - - std::vector masks{ - reinterpret_cast(parent_null_mask), - reinterpret_cast(current_child_mask)}; - std::vector begin_bits{0, 0}; - cudf::detail::inplace_bitmask_and( - device_span(current_child_mask, num_bitmask_words(child.size())), - masks, - begin_bits, - child.size(), - stream, - mr); - child.set_null_count(UNKNOWN_NULL_COUNT); - } - - // If the child is also a struct, repeat for all grandchildren. - if (child.type().id() == cudf::type_id::STRUCT) { - const auto current_child_mask = child.mutable_view().null_mask(); - std::for_each(thrust::make_counting_iterator(0), - thrust::make_counting_iterator(child.num_children()), - [¤t_child_mask, &child, parent_null_mask_size, stream, mr](auto i) { - superimpose_parent_nullmask(current_child_mask, - parent_null_mask_size, - UNKNOWN_NULL_COUNT, - child.child(i), - stream, - mr); - }); - } -} // namespace - // devoidsuperimpose_parent_nullmask(bitmask_typeconst*parent_null_mask,std::size_tparent_null_mask_size,size_typeparent_null_count,column&child,rmm::cuda_stream_viewstream,rmm::mr::device_memory_resource*mr) - // devoidsuperimpose_parent_nullmask(bitmask_typeconst*parent_null_mask,std::size_tparent_null_mask_size,size_typeparent_null_count,column&child,rmm::cuda_stream_viewstream,rmm::mr::device_memory_resource*mr) - // devoidsuperimpose_parent_nullmask(bitmask_typeconst*parent_null_mask,std::size_tparent_null_mask_size,size_typeparent_null_count,column&child,rmm::cuda_stream_viewstream,rmm::mr::device_memory_resource*mr) - // devoidsuperimpose_parent_nullmask(bitmask_typeconst*parent_null_mask,std::size_tparent_null_mask_size,size_typeparent_null_count,column&child,rmm::cuda_stream_viewstream,rmm::mr::device_memory_resource*mr) -} // namespace cudf - -/// Column factory that adopts child columns. -std::unique_ptr make_structs_column( - size_type num_rows, - std::vector>&& child_columns, - size_type null_count, - rmm::device_buffer&& null_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - CUDF_EXPECTS(null_count <= 0 || !null_mask.is_empty(), - "Struct column with nulls must be nullable."); - - CUDF_EXPECTS(std::all_of(child_columns.begin(), - child_columns.end(), - [&](auto const& child_col) { return num_rows == child_col->size(); }), - "Child columns must have the same number of rows as the Struct column."); - - if (!null_mask.is_empty()) { - for (auto& child : child_columns) { - superimpose_parent_nullmask(static_cast(null_mask.data()), - null_mask.size(), - null_count, - *child, - stream, - mr); - } - } - - return std::make_unique( - cudf::data_type{type_id::STRUCT}, - num_rows, - rmm::device_buffer{0, stream, mr}, // Empty data buffer. Structs hold no data. - null_mask, - null_count, - std::move(child_columns)); -} - -} // namespace cudf diff --git a/cpp/tests/scalar/factories_test.cpp b/cpp/tests/scalar/factories_test.cpp index a3a218fe82c..ade90947568 100644 --- a/cpp/tests/scalar/factories_test.cpp +++ b/cpp/tests/scalar/factories_test.cpp @@ -177,8 +177,8 @@ TEST_F(StructScalarFactory, Basic) // table_view constructor { - auto sc = cudf::make_struct_scalar(cudf::table_view{children}); - auto s = static_cast*>(sc.get()); + auto sc = cudf::make_struct_scalar(cudf::table_view{children}); + auto s = static_cast*>(sc.get()); auto sview = s->view(); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view{children}, s->view()); @@ -186,8 +186,8 @@ TEST_F(StructScalarFactory, Basic) // host_span constructor { - auto sc = cudf::make_struct_scalar(cudf::host_span{children}); - auto s = static_cast*>(sc.get()); + auto sc = cudf::make_struct_scalar(cudf::host_span{children}); + auto s = static_cast*>(sc.get()); auto sview = s->view(); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view{children}, s->view()); From cc26de7bd10da349c5a2c899eafc29d3b422a52f Mon Sep 17 00:00:00 2001 From: Dave Baranec Date: Fri, 14 May 2021 11:46:04 -0500 Subject: [PATCH 3/4] PR review changes. --- cpp/CMakeLists.txt | 3 +- cpp/include/cudf/scalar/scalar.hpp | 3 +- cpp/include/cudf/scalar/scalar_factories.hpp | 2 +- cpp/src/column/column_factories.cpp | 175 ++++++++++++++++++ cpp/src/column/column_factories.cu | 175 ++---------------- cpp/src/scalar/scalar.cpp | 25 +-- cpp/src/scalar/scalar_factories.cpp | 2 +- cpp/src/structs/structs_column_factories.cu | 7 +- .../structs/{utilities.cu => utilities.cpp} | 14 +- cpp/src/structs/utilities.hpp | 5 +- cpp/tests/column/factories_test.cpp | 2 +- cpp/tests/scalar/scalar_test.cpp | 18 +- 12 files changed, 224 insertions(+), 207 deletions(-) create mode 100644 cpp/src/column/column_factories.cpp rename cpp/src/structs/{utilities.cu => utilities.cpp} (92%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 95294310ffd..b663f92360c 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -156,6 +156,7 @@ add_library(cudf src/bitmask/is_element_valid.cpp src/column/column.cu src/column/column_device_view.cu + src/column/column_factories.cpp src/column/column_factories.cu src/column/column_view.cpp src/comms/ipc/ipc.cpp @@ -373,7 +374,7 @@ add_library(cudf src/structs/copying/concatenate.cu src/structs/structs_column_factories.cu src/structs/structs_column_view.cpp - src/structs/utilities.cu + src/structs/utilities.cpp src/table/table.cpp src/table/table_device_view.cu src/table/table_view.cpp diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index a5b4ee4a2ab..3025c01d747 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -610,7 +610,7 @@ class struct_scalar : public scalar { * @param stream CUDA stream used for device memory operations. * @param mr Device memory resource to use for device memory allocation */ - struct_scalar(host_span data, + struct_scalar(host_span data, bool is_valid = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -623,6 +623,7 @@ class struct_scalar : public scalar { private: table _data; + void init(bool is_valid, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); void superimpose_nulls(rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); }; diff --git a/cpp/include/cudf/scalar/scalar_factories.hpp b/cpp/include/cudf/scalar/scalar_factories.hpp index f980bed05e7..b96a8c65a04 100644 --- a/cpp/include/cudf/scalar/scalar_factories.hpp +++ b/cpp/include/cudf/scalar/scalar_factories.hpp @@ -192,7 +192,7 @@ std::unique_ptr make_struct_scalar( * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool. */ std::unique_ptr make_struct_scalar( - host_span data, + host_span data, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/src/column/column_factories.cpp b/cpp/src/column/column_factories.cpp new file mode 100644 index 00000000000..2587271193d --- /dev/null +++ b/cpp/src/column/column_factories.cpp @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace cudf { +namespace { +struct size_of_helper { + cudf::data_type type; + template ()>* = nullptr> + constexpr int operator()() const + { + CUDF_FAIL("Invalid, non fixed-width element type."); + return 0; + } + + template () && not is_fixed_point()>* = nullptr> + constexpr int operator()() const noexcept + { + return sizeof(T); + } + + template ()>* = nullptr> + constexpr int operator()() const noexcept + { + // Only want the sizeof fixed_point::Rep as fixed_point::scale is stored in data_type + return sizeof(typename T::rep); + } +}; +} // namespace + +std::size_t size_of(data_type element_type) +{ + CUDF_EXPECTS(is_fixed_width(element_type), "Invalid element type."); + return cudf::type_dispatcher(element_type, size_of_helper{element_type}); +} + +// Empty column of specified type +std::unique_ptr make_empty_column(data_type type) +{ + CUDF_EXPECTS(type.id() == type_id::EMPTY || !cudf::is_nested(type), + "make_empty_column is invalid to call on nested types"); + return std::make_unique(type, 0, rmm::device_buffer{}); +} + +// Allocate storage for a specified number of numeric elements +std::unique_ptr make_numeric_column(data_type type, + size_type size, + mask_state state, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + CUDF_EXPECTS(is_numeric(type), "Invalid, non-numeric type."); + + return std::make_unique(type, + size, + rmm::device_buffer{size * cudf::size_of(type), stream, mr}, + detail::create_null_mask(size, state, stream, mr), + state_null_count(state, size), + std::vector>{}); +} + +// Allocate storage for a specified number of numeric elements +std::unique_ptr make_fixed_point_column(data_type type, + size_type size, + mask_state state, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + CUDF_EXPECTS(is_fixed_point(type), "Invalid, non-fixed_point type."); + + return std::make_unique(type, + size, + rmm::device_buffer{size * cudf::size_of(type), stream, mr}, + detail::create_null_mask(size, state, stream, mr), + state_null_count(state, size), + std::vector>{}); +} + +// Allocate storage for a specified number of timestamp elements +std::unique_ptr make_timestamp_column(data_type type, + size_type size, + mask_state state, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + CUDF_EXPECTS(is_timestamp(type), "Invalid, non-timestamp type."); + + return std::make_unique(type, + size, + rmm::device_buffer{size * cudf::size_of(type), stream, mr}, + detail::create_null_mask(size, state, stream, mr), + state_null_count(state, size), + std::vector>{}); +} + +// Allocate storage for a specified number of duration elements +std::unique_ptr make_duration_column(data_type type, + size_type size, + mask_state state, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + CUDF_EXPECTS(is_duration(type), "Invalid, non-duration type."); + + return std::make_unique(type, + size, + rmm::device_buffer{size * cudf::size_of(type), stream, mr}, + detail::create_null_mask(size, state, stream, mr), + state_null_count(state, size), + std::vector>{}); +} + +// Allocate storage for a specified number of fixed width elements +std::unique_ptr make_fixed_width_column(data_type type, + size_type size, + mask_state state, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + CUDF_EXPECTS(is_fixed_width(type), "Invalid, non-fixed-width type."); + + // clang-format off + if (is_timestamp (type)) return make_timestamp_column (type, size, state, stream, mr); + else if (is_duration (type)) return make_duration_column (type, size, state, stream, mr); + else if (is_fixed_point(type)) return make_fixed_point_column(type, size, state, stream, mr); + else return make_numeric_column (type, size, state, stream, mr); + /// clang-format on +} + +std::unique_ptr make_dictionary_from_scalar(scalar const& s, + size_type size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + if (size == 0) return make_empty_column(data_type{type_id::DICTIONARY32}); + CUDF_EXPECTS(s.is_valid(), "cannot create a dictionary with a null key"); + return make_dictionary_column( + make_column_from_scalar(s, 1, stream, mr), + make_column_from_scalar(numeric_scalar(0), size, stream, mr), + rmm::device_buffer{0, stream, mr}, + 0); +} + +} // namespace cudf \ No newline at end of file diff --git a/cpp/src/column/column_factories.cu b/cpp/src/column/column_factories.cu index d009db2d5b4..60e642ea3d5 100644 --- a/cpp/src/column/column_factories.cu +++ b/cpp/src/column/column_factories.cu @@ -17,148 +17,13 @@ #include #include #include -#include -#include #include -#include #include -#include #include -#include -#include - -#include namespace cudf { -namespace { -struct size_of_helper { - cudf::data_type type; - template ()>* = nullptr> - constexpr int operator()() const - { - CUDF_FAIL("Invalid, non fixed-width element type."); - return 0; - } - - template () && not is_fixed_point()>* = nullptr> - constexpr int operator()() const noexcept - { - return sizeof(T); - } - - template ()>* = nullptr> - constexpr int operator()() const noexcept - { - // Only want the sizeof fixed_point::Rep as fixed_point::scale is stored in data_type - return sizeof(typename T::rep); - } -}; -} // namespace - -std::size_t size_of(data_type element_type) -{ - CUDF_EXPECTS(is_fixed_width(element_type), "Invalid element type."); - return cudf::type_dispatcher(element_type, size_of_helper{element_type}); -} - -// Empty column of specified type -std::unique_ptr make_empty_column(data_type type) -{ - CUDF_EXPECTS(type.id() == type_id::EMPTY || !cudf::is_nested(type), - "make_empty_column is invalid to call on nested types"); - return std::make_unique(type, 0, rmm::device_buffer{}); -} - -// Allocate storage for a specified number of numeric elements -std::unique_ptr make_numeric_column(data_type type, - size_type size, - mask_state state, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - CUDF_EXPECTS(is_numeric(type), "Invalid, non-numeric type."); - - return std::make_unique(type, - size, - rmm::device_buffer{size * cudf::size_of(type), stream, mr}, - detail::create_null_mask(size, state, stream, mr), - state_null_count(state, size), - std::vector>{}); -} - -// Allocate storage for a specified number of numeric elements -std::unique_ptr make_fixed_point_column(data_type type, - size_type size, - mask_state state, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - CUDF_EXPECTS(is_fixed_point(type), "Invalid, non-fixed_point type."); - - return std::make_unique(type, - size, - rmm::device_buffer{size * cudf::size_of(type), stream, mr}, - detail::create_null_mask(size, state, stream, mr), - state_null_count(state, size), - std::vector>{}); -} - -// Allocate storage for a specified number of timestamp elements -std::unique_ptr make_timestamp_column(data_type type, - size_type size, - mask_state state, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - CUDF_EXPECTS(is_timestamp(type), "Invalid, non-timestamp type."); - - return std::make_unique(type, - size, - rmm::device_buffer{size * cudf::size_of(type), stream, mr}, - detail::create_null_mask(size, state, stream, mr), - state_null_count(state, size), - std::vector>{}); -} - -// Allocate storage for a specified number of duration elements -std::unique_ptr make_duration_column(data_type type, - size_type size, - mask_state state, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - CUDF_EXPECTS(is_duration(type), "Invalid, non-duration type."); - - return std::make_unique(type, - size, - rmm::device_buffer{size * cudf::size_of(type), stream, mr}, - detail::create_null_mask(size, state, stream, mr), - state_null_count(state, size), - std::vector>{}); -} -// Allocate storage for a specified number of fixed width elements -std::unique_ptr make_fixed_width_column(data_type type, - size_type size, - mask_state state, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - CUDF_EXPECTS(is_fixed_width(type), "Invalid, non-fixed-width type."); - - // clang-format off - if (is_timestamp (type)) return make_timestamp_column (type, size, state, stream, mr); - else if (is_duration (type)) return make_duration_column (type, size, state, stream, mr); - else if (is_fixed_point(type)) return make_fixed_point_column(type, size, state, stream, mr); - else return make_numeric_column (type, size, state, stream, mr); - /// clang-format on -} +namespace { struct column_from_scalar_dispatch { template @@ -187,11 +52,8 @@ std::unique_ptr column_from_scalar_dispatch::operator()(value.type(), - size, - rmm::device_buffer{0, stream, mr}, - null_mask, - size); + return std::make_unique( + value.type(), size, rmm::device_buffer{0, stream, mr}, null_mask, size); // Create a strings column_view with all nulls and no children. // Since we are setting every row to the scalar, the fill() never needs to access @@ -231,20 +93,25 @@ std::unique_ptr column_from_scalar_dispatch::operator() const&>(value); +{ + auto ss = static_cast const&>(value); auto iter = thrust::make_constant_iterator(0); - auto children = detail::gather(ss.view(), iter, iter + size, out_of_bounds_policy::NULLIFY, stream, mr); + auto children = + detail::gather(ss.view(), iter, iter + size, out_of_bounds_policy::NULLIFY, stream, mr); auto const is_valid = ss.is_valid(); - return make_structs_column(size, - std::move(children->release()), + return make_structs_column(size, + std::move(children->release()), is_valid ? 0 : size, - is_valid ? rmm::device_buffer{} : detail::create_null_mask(size, mask_state::ALL_NULL, stream, mr), + is_valid + ? rmm::device_buffer{} + : detail::create_null_mask(size, mask_state::ALL_NULL, stream, mr), stream, mr); } +} // anonymous namespace + std::unique_ptr make_column_from_scalar(scalar const& s, size_type size, rmm::cuda_stream_view stream, @@ -254,18 +121,4 @@ std::unique_ptr make_column_from_scalar(scalar const& s, return type_dispatcher(s.type(), column_from_scalar_dispatch{}, s, size, stream, mr); } -std::unique_ptr make_dictionary_from_scalar(scalar const& s, - size_type size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - if (size == 0) return make_empty_column(data_type{type_id::DICTIONARY32}); - CUDF_EXPECTS(s.is_valid(), "cannot create a dictionary with a null key"); - return make_dictionary_column( - make_column_from_scalar(s, 1, stream, mr), - make_column_from_scalar(numeric_scalar(0), size, stream, mr), - rmm::device_buffer{0, stream, mr}, - 0); -} - } // namespace cudf diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp index 9aa9f074dc6..252d1af3948 100644 --- a/cpp/src/scalar/scalar.cpp +++ b/cpp/src/scalar/scalar.cpp @@ -457,31 +457,34 @@ struct_scalar::struct_scalar(table_view const& data, rmm::mr::device_memory_resource* mr) : scalar(data_type(type_id::STRUCT), is_valid, stream, mr), _data(data, stream, mr) { - CUDF_EXPECTS( - std::all_of(data.begin(), data.end(), [](column_view const& col) { return col.size() == 1; }), - "Struct scalar inputs must have exactly 1 row"); - - // validity pushdown - if (!is_valid) { superimpose_nulls(stream, mr); } + init(is_valid, stream, mr); } -struct_scalar::struct_scalar(host_span data, +struct_scalar::struct_scalar(host_span data, bool is_valid, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) : scalar(data_type(type_id::STRUCT), is_valid, stream, mr), _data(table_view{std::vector{data.begin(), data.end()}}, stream, mr) { + init(is_valid, stream, mr); +} + +table_view struct_scalar::view() const { return _data.view(); } + +void struct_scalar::init(bool is_valid, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + table_view tv = static_cast(_data); CUDF_EXPECTS( - std::all_of(data.begin(), data.end(), [](column_view const& col) { return col.size() == 1; }), + std::all_of(tv.begin(), tv.end(), [](column_view const& col) { return col.size() == 1; }), "Struct scalar inputs must have exactly 1 row"); // validity pushdown if (!is_valid) { superimpose_nulls(stream, mr); } } -table_view struct_scalar::view() const { return _data.view(); } - void struct_scalar::superimpose_nulls(rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -491,7 +494,7 @@ void struct_scalar::superimpose_nulls(rmm::cuda_stream_view stream, auto iter = thrust::make_counting_iterator(0); std::for_each(iter, iter + _data.num_columns(), [&](size_type i) { cudf::structs::detail::superimpose_parent_nulls( - validity.data(), 1, 1, _data.get_column(i), stream, mr); + validity.data(), 1, _data.get_column(i), stream, mr); }); } diff --git a/cpp/src/scalar/scalar_factories.cpp b/cpp/src/scalar/scalar_factories.cpp index 053baa004fb..e1d71b279d6 100644 --- a/cpp/src/scalar/scalar_factories.cpp +++ b/cpp/src/scalar/scalar_factories.cpp @@ -111,7 +111,7 @@ std::unique_ptr make_struct_scalar(table_view const& data, return std::make_unique(data, true, stream, mr); } -std::unique_ptr make_struct_scalar(host_span data, +std::unique_ptr make_struct_scalar(host_span data, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/structs/structs_column_factories.cu b/cpp/src/structs/structs_column_factories.cu index e8a529e228c..de58cbb0d68 100644 --- a/cpp/src/structs/structs_column_factories.cu +++ b/cpp/src/structs/structs_column_factories.cu @@ -46,12 +46,7 @@ std::unique_ptr make_structs_column( if (!null_mask.is_empty()) { for (auto& child : child_columns) { cudf::structs::detail::superimpose_parent_nulls( - static_cast(null_mask.data()), - null_mask.size(), - null_count, - *child, - stream, - mr); + static_cast(null_mask.data()), null_count, *child, stream, mr); } } diff --git a/cpp/src/structs/utilities.cu b/cpp/src/structs/utilities.cpp similarity index 92% rename from cpp/src/structs/utilities.cu rename to cpp/src/structs/utilities.cpp index 56b9ca392d0..6cc537d2042 100644 --- a/cpp/src/structs/utilities.cu +++ b/cpp/src/structs/utilities.cpp @@ -171,7 +171,6 @@ flatten_nested_columns(table_view const& input, // Helper function to superimpose validity of parent struct // over the specified member (child) column. void superimpose_parent_nulls(bitmask_type const* parent_null_mask, - std::size_t parent_null_mask_size, size_type parent_null_count, column& child, rmm::cuda_stream_view stream, @@ -179,7 +178,8 @@ void superimpose_parent_nulls(bitmask_type const* parent_null_mask, { if (!child.nullable()) { // Child currently has no null mask. Copy parent's null mask. - child.set_null_mask(rmm::device_buffer{parent_null_mask, parent_null_mask_size, stream, mr}); + child.set_null_mask(rmm::device_buffer{ + parent_null_mask, cudf::bitmask_allocation_size_bytes(child.size()), stream, mr}); child.set_null_count(parent_null_count); } else { // Child should have a null mask. @@ -206,13 +206,9 @@ void superimpose_parent_nulls(bitmask_type const* parent_null_mask, const auto current_child_mask = child.mutable_view().null_mask(); std::for_each(thrust::make_counting_iterator(0), thrust::make_counting_iterator(child.num_children()), - [¤t_child_mask, &child, parent_null_mask_size, stream, mr](auto i) { - superimpose_parent_nulls(current_child_mask, - parent_null_mask_size, - UNKNOWN_NULL_COUNT, - child.child(i), - stream, - mr); + [¤t_child_mask, &child, stream, mr](auto i) { + superimpose_parent_nulls( + current_child_mask, UNKNOWN_NULL_COUNT, child.child(i), stream, mr); }); } } diff --git a/cpp/src/structs/utilities.hpp b/cpp/src/structs/utilities.hpp index ac246e3d0ab..eee9ca63146 100644 --- a/cpp/src/structs/utilities.hpp +++ b/cpp/src/structs/utilities.hpp @@ -79,17 +79,16 @@ flatten_nested_columns(table_view const& input, /** * @brief Pushdown nulls from a parent mask into a child column, using AND. * - * This function will recurse through all struct descendants. + * This function will recurse through all struct descendants. It is expected that + * the size of `parent_null_mask` in bits is the same as `child.size()` * * @param parent_null_mask The mask to be applied to descendants - * @param parent_null_mask_size Size (in rows) of the null mask * @param parent_null_count Null count in the null mask * @param column Column to apply the null mask to. * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate new device memory. */ void superimpose_parent_nulls(bitmask_type const* parent_null_mask, - std::size_t parent_null_mask_size, size_type parent_null_count, column& child, rmm::cuda_stream_view stream, diff --git a/cpp/tests/column/factories_test.cpp b/cpp/tests/column/factories_test.cpp index 683611ef81b..71f65eedd91 100644 --- a/cpp/tests/column/factories_test.cpp +++ b/cpp/tests/column/factories_test.cpp @@ -491,4 +491,4 @@ void struct_from_scalar(bool is_valid) TEST_F(ColumnFactoryTest, FromStructScalar) { struct_from_scalar(true); } -TEST_F(ColumnFactoryTest, FromStructScalarNull) { struct_from_scalar(false); } \ No newline at end of file +TEST_F(ColumnFactoryTest, FromStructScalarNull) { struct_from_scalar(false); } diff --git a/cpp/tests/scalar/scalar_test.cpp b/cpp/tests/scalar/scalar_test.cpp index 871f9c570d1..7a12c2fd27d 100644 --- a/cpp/tests/scalar/scalar_test.cpp +++ b/cpp/tests/scalar/scalar_test.cpp @@ -246,16 +246,14 @@ TEST_F(StructScalarTest, Basic) // table_view constructor { - auto s = cudf::struct_scalar(children, true); - auto sview = s.view(); + auto s = cudf::struct_scalar(children, true); EXPECT_TRUE(s.is_valid()); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view{children}, s.view()); } // host_span constructor { - auto s = cudf::struct_scalar(cudf::host_span{children}, true); - auto sview = s.view(); + auto s = cudf::struct_scalar(cudf::host_span{children}, true); EXPECT_TRUE(s.is_valid()); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view{children}, s.view()); } @@ -288,15 +286,13 @@ TEST_F(StructScalarTest, BasicNulls) // table_view constructor { - auto s = cudf::struct_scalar(cudf::table_view{src_children}, true); - auto sview = s.view(); + auto s = cudf::struct_scalar(cudf::table_view{src_children}, true); EXPECT_TRUE(s.is_valid()); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view{valid_children}, s.view()); } // host_span constructor { - auto s = cudf::struct_scalar(cudf::host_span{src_children}, true); - auto sview = s.view(); + auto s = cudf::struct_scalar(cudf::host_span{src_children}, true); EXPECT_TRUE(s.is_valid()); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view{valid_children}, s.view()); } @@ -308,16 +304,14 @@ TEST_F(StructScalarTest, BasicNulls) // table_view constructor { - auto s = cudf::struct_scalar(cudf::table_view{src_children}, false); - auto sview = s.view(); + auto s = cudf::struct_scalar(cudf::table_view{src_children}, false); EXPECT_TRUE(!s.is_valid()); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view{invalid_children}, s.view()); } // host_span constructor { - auto s = cudf::struct_scalar(cudf::host_span{src_children}, false); - auto sview = s.view(); + auto s = cudf::struct_scalar(cudf::host_span{src_children}, false); EXPECT_TRUE(!s.is_valid()); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view{invalid_children}, s.view()); } From d58938d88e3d453640264c065e0f5e6abc77242f Mon Sep 17 00:00:00 2001 From: Dave Baranec Date: Mon, 17 May 2021 10:30:48 -0500 Subject: [PATCH 4/4] Removed unused variables. Misc cleanup. --- cpp/src/column/column_factories.cpp | 2 +- cpp/src/scalar/scalar.cpp | 2 +- cpp/src/structs/structs_column_factories.cu | 2 +- cpp/tests/scalar/factories_test.cpp | 10 ++++------ 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/cpp/src/column/column_factories.cpp b/cpp/src/column/column_factories.cpp index 2587271193d..86059a72e8f 100644 --- a/cpp/src/column/column_factories.cpp +++ b/cpp/src/column/column_factories.cpp @@ -172,4 +172,4 @@ std::unique_ptr make_dictionary_from_scalar(scalar const& s, 0); } -} // namespace cudf \ No newline at end of file +} // namespace cudf diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp index 252d1af3948..f21b1c7ca20 100644 --- a/cpp/src/scalar/scalar.cpp +++ b/cpp/src/scalar/scalar.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "structs/utilities.hpp" +#include #include #include diff --git a/cpp/src/structs/structs_column_factories.cu b/cpp/src/structs/structs_column_factories.cu index de58cbb0d68..d8b94d1c448 100644 --- a/cpp/src/structs/structs_column_factories.cu +++ b/cpp/src/structs/structs_column_factories.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "structs/utilities.hpp" +#include #include #include diff --git a/cpp/tests/scalar/factories_test.cpp b/cpp/tests/scalar/factories_test.cpp index ade90947568..e2f2c26a16e 100644 --- a/cpp/tests/scalar/factories_test.cpp +++ b/cpp/tests/scalar/factories_test.cpp @@ -177,18 +177,16 @@ TEST_F(StructScalarFactory, Basic) // table_view constructor { - auto sc = cudf::make_struct_scalar(cudf::table_view{children}); - auto s = static_cast*>(sc.get()); - auto sview = s->view(); + auto sc = cudf::make_struct_scalar(cudf::table_view{children}); + auto s = static_cast*>(sc.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view{children}, s->view()); } // host_span constructor { - auto sc = cudf::make_struct_scalar(cudf::host_span{children}); - auto s = static_cast*>(sc.get()); - auto sview = s->view(); + auto sc = cudf::make_struct_scalar(cudf::host_span{children}); + auto s = static_cast*>(sc.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view{children}, s->view()); }