From fa0048d34ca24efb3d541c9499d82a10f82d03ea Mon Sep 17 00:00:00 2001 From: Dave Baranec Date: Tue, 25 May 2021 15:35:44 -0500 Subject: [PATCH 1/2] Add empty_like(scalar). --- cpp/include/cudf/copying.hpp | 8 +++ cpp/src/copying/copy.cpp | 83 +++++++++++++++++++++++++++++ cpp/src/copying/copy.cu | 2 +- cpp/tests/copying/utility_tests.cpp | 78 +++++++++++++++++++++++++++ 4 files changed, 170 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index bb44e33f786..c9a4eab2154 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -182,6 +182,14 @@ enum class mask_allocation_policy { */ std::unique_ptr empty_like(column_view const& input); +/** + * @brief Initializes and returns an empty column of the same type as the `input`. + * + * @param[in] input Scalar to emulate + * @return std::unique_ptr An empty column of same type as `input` + */ +std::unique_ptr empty_like(scalar const& input); + /** * @brief Creates an uninitialized new column of the same size and type as the `input`. * Supports only fixed-width types. diff --git a/cpp/src/copying/copy.cpp b/cpp/src/copying/copy.cpp index 50bf168037d..670c147aa7e 100644 --- a/cpp/src/copying/copy.cpp +++ b/cpp/src/copying/copy.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -44,6 +45,79 @@ inline mask_state should_allocate_mask(mask_allocation_policy mask_alloc, bool m } } +/** + * @brief Functor to produce an empty column of the same type as the + * input scalar. + * + * In the case of nested types, full column hierarchy is preserved. + */ +template +struct scalar_empty_like_functor_impl { + std::unique_ptr operator()(scalar const& input) + { + return cudf::make_empty_column(input.type()); + } +}; + +template <> +struct scalar_empty_like_functor_impl { + std::unique_ptr operator()(scalar const& input) + { + return cudf::strings::detail::make_empty_strings_column(rmm::cuda_stream_default, + rmm::mr::get_current_device_resource()); + } +}; + +template <> +struct scalar_empty_like_functor_impl { + std::unique_ptr operator()(scalar const& input) + { + auto ls = static_cast(&input); + + // TODO: add a manual constructor for lists_column_view. + column_view offsets{cudf::data_type{cudf::type_id::INT32}, 0, nullptr}; + std::vector children; + children.push_back(offsets); + children.push_back(ls->view()); + column_view lcv{cudf::data_type{cudf::type_id::LIST}, 0, nullptr, nullptr, 0, 0, children}; + + return empty_like(lcv); + } +}; + +template <> +struct scalar_empty_like_functor_impl { + std::unique_ptr operator()(scalar const& input) + { + auto ss = static_cast(&input); + + // TODO: add a manual constructor for structs_column_view + // TODO: add cudf::get_element() support for structs + cudf::table_view tbl = ss->view(); + std::vector children(tbl.begin(), tbl.end()); + column_view scv{cudf::data_type{cudf::type_id::STRUCT}, 0, nullptr, nullptr, 0, 0, children}; + + return empty_like(scv); + } +}; + +template <> +struct scalar_empty_like_functor_impl { + std::unique_ptr operator()(scalar const& input) + { + CUDF_FAIL("Dictionary scalars not supported"); + } +}; + +struct scalar_empty_like_functor { + template + std::unique_ptr operator()(scalar const& input) + { + scalar_empty_like_functor_impl func; + return func(input); + } +}; + } // namespace /* @@ -91,6 +165,15 @@ std::unique_ptr empty_like(column_view const& input) input.type(), 0, rmm::device_buffer{}, rmm::device_buffer{}, 0, std::move(children)); } +/* + * Initializes and returns an empty column of the same type as the `input`. + */ +std::unique_ptr empty_like(scalar const& input) +{ + CUDF_FUNC_RANGE(); + return type_dispatcher(input.type(), detail::scalar_empty_like_functor{}, input); +}; + /* * Creates a table of empty columns with the same types as the `input_table` */ diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index f12c9dcf006..9f8e6f7bdcb 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -289,7 +289,7 @@ std::unique_ptr copy_if_else(Left const& lhs, CUDF_EXPECTS(boolean_mask.type() == data_type(type_id::BOOL8), "Boolean mask column must be of type type_id::BOOL8"); - if (boolean_mask.is_empty()) { return cudf::make_empty_column(lhs.type()); } + if (boolean_mask.is_empty()) { return cudf::empty_like(lhs); } auto bool_mask_device_p = column_device_view::create(boolean_mask); column_device_view bool_mask_device = *bool_mask_device_p; diff --git a/cpp/tests/copying/utility_tests.cpp b/cpp/tests/copying/utility_tests.cpp index 8c54252afbd..c7bbe4199f0 100644 --- a/cpp/tests/copying/utility_tests.cpp +++ b/cpp/tests/copying/utility_tests.cpp @@ -72,6 +72,84 @@ TEST_F(EmptyLikeStringTest, ColumnStringTest) check_empty_string_columns(got->view(), strings); } +template +struct EmptyLikeScalarTest : public cudf::test::BaseFixture { +}; + +TYPED_TEST_CASE(EmptyLikeScalarTest, cudf::test::FixedWidthTypes); + +TYPED_TEST(EmptyLikeScalarTest, FixedWidth) +{ + // make a column + auto input = make_fixed_width_column( + cudf::data_type{cudf::type_to_id()}, 1, rmm::device_buffer{}); + // get a scalar out of it + std::unique_ptr sc = cudf::get_element(*input, 0); + + // empty_like(column) -> column + auto expected = cudf::empty_like(*input); + // empty_like(scalar) -> column + auto result = cudf::empty_like(*sc); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *result); +} + +struct EmptyLikeScalarStringTest : public EmptyLikeScalarTest { +}; + +TEST_F(EmptyLikeScalarStringTest, String) +{ + // make a column + cudf::test::strings_column_wrapper input{"abc"}; + + // get a scalar out of it + std::unique_ptr sc = cudf::get_element(input, 0); + + // empty_like(column) -> column + auto expected = cudf::empty_like(input); + // empty_like(scalar) -> column + auto result = cudf::empty_like(*sc); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *result); +} + +struct EmptyLikeScalarListTest : public EmptyLikeScalarTest { +}; + +TEST_F(EmptyLikeScalarListTest, List) +{ + // make a column + cudf::test::lists_column_wrapper input{{{"abc", "def"}, {"h", "ijk"}}, + {{"123", "456"}, {"78"}}}; + // get a scalar out of it + std::unique_ptr sc = cudf::get_element(input, 0); + + // empty_like(column) -> column + auto expected = cudf::empty_like(input); + // empty_like(scalar) -> column + auto result = cudf::empty_like(*sc); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *result); +} + +struct EmptyLikeScalarStructTest : public EmptyLikeScalarTest { +}; + +TEST_F(EmptyLikeScalarStructTest, Struct) +{ + cudf::test::lists_column_wrapper col0{{{"abc", "def"}, {"h", "ijk"}}}; + cudf::test::strings_column_wrapper col1{"abc"}; + cudf::test::fixed_width_column_wrapper col2{1.0f}; + // scalar. TODO: make cudf::get_element() work for struct scalars + cudf::table_view tbl({col0, col1, col2}); + cudf::struct_scalar sc(tbl); + // column + cudf::test::structs_column_wrapper input({col0, col1, col2}); + + // empty_like(column) -> column + auto expected = cudf::empty_like(input); + // empty_like(scalar) -> column + auto result = cudf::empty_like(sc); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *result); +} + std::unique_ptr create_table(cudf::size_type size, cudf::mask_state state) { auto num_column_1 = make_numeric_column(cudf::data_type{cudf::type_id::INT64}, size, state); From 1c0ba8467bbe507fd2433c31de0c41b6a4e0c6f0 Mon Sep 17 00:00:00 2001 From: Dave Baranec Date: Tue, 25 May 2021 16:35:48 -0500 Subject: [PATCH 2/2] Add tests for empty nested types (columns and scalars) for copy_if_else. --- cpp/tests/copying/copy_tests.cu | 79 +++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/cpp/tests/copying/copy_tests.cu b/cpp/tests/copying/copy_tests.cu index e9249a6bd0e..03869c37adf 100644 --- a/cpp/tests/copying/copy_tests.cu +++ b/cpp/tests/copying/copy_tests.cu @@ -285,6 +285,85 @@ TYPED_TEST(CopyTest, CopyIfElseBadInputLength) } } +struct CopyEmptyNested : public cudf::test::BaseFixture { +}; + +TEST_F(CopyEmptyNested, CopyIfElseTestEmptyNestedColumns) +{ + // lists + { + cudf::test::lists_column_wrapper col{{{"abc", "def"}, {"xyz"}}}; + auto lhs = cudf::empty_like(col); + auto rhs = cudf::empty_like(col); + cudf::test::fixed_width_column_wrapper mask{}; + + auto expected = empty_like(col); + + auto out = cudf::copy_if_else(*lhs, *rhs, mask); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), *expected); + } + + // structs + { + cudf::test::lists_column_wrapper _col0{{{"abc", "def"}, {"xyz"}}}; + auto col0 = cudf::empty_like(_col0); + cudf::test::fixed_width_column_wrapper col1; + + std::vector> cols; + cols.push_back(std::move(col0)); + cols.push_back(col1.release()); + cudf::test::structs_column_wrapper struct_col(std::move(cols)); + auto lhs = cudf::empty_like(struct_col); + auto rhs = cudf::empty_like(struct_col); + + cudf::test::fixed_width_column_wrapper mask{}; + + auto expected = cudf::empty_like(struct_col); + + auto out = cudf::copy_if_else(*lhs, *rhs, mask); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), *expected); + } +} + +TEST_F(CopyEmptyNested, CopyIfElseTestEmptyNestedScalars) +{ + // lists + { + cudf::test::lists_column_wrapper _col{{{"abc", "def"}, {"xyz"}}}; + std::unique_ptr lhs = cudf::get_element(_col, 0); + std::unique_ptr rhs = cudf::get_element(_col, 0); + + cudf::test::fixed_width_column_wrapper mask{}; + + auto expected = empty_like(_col); + + auto out = cudf::copy_if_else(*lhs, *rhs, mask); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), *expected); + } + + // structs + { + cudf::test::lists_column_wrapper col0{{{"abc", "def"}, {"xyz"}}}; + cudf::test::fixed_width_column_wrapper col1{1}; + + cudf::table_view tbl({col0, col1}); + cudf::struct_scalar lhs(tbl); + cudf::struct_scalar rhs(tbl); + + std::vector> cols; + cols.push_back(col0.release()); + cols.push_back(col1.release()); + cudf::test::structs_column_wrapper struct_col(std::move(cols)); + + cudf::test::fixed_width_column_wrapper mask{}; + + auto expected = cudf::empty_like(struct_col); + + auto out = cudf::copy_if_else(lhs, rhs, mask); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), *expected); + } +} + template struct CopyTestNumeric : public cudf::test::BaseFixture { };