diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 3b0267251d3..1c7d332dd27 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -78,6 +78,7 @@ test: - test -f $PREFIX/include/cudf/detail/groupby/sort_helper.hpp - test -f $PREFIX/include/cudf/detail/hashing.hpp - test -f $PREFIX/include/cudf/detail/interop.hpp + - test -f $PREFIX/include/cudf/detail/is_element_valid.hpp - test -f $PREFIX/include/cudf/detail/null_mask.hpp - test -f $PREFIX/include/cudf/detail/nvtx/nvtx3.hpp - test -f $PREFIX/include/cudf/detail/nvtx/ranges.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 198690e37ff..ba06be334a5 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -155,6 +155,7 @@ add_library(cudf src/binaryop/compiled/binary_ops.cu src/labeling/label_bins.cu src/bitmask/null_mask.cu + src/bitmask/is_element_valid.cpp src/column/column.cu src/column/column_device_view.cu src/column/column_factories.cpp diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md index 3169a0884c6..3abc35f9bd2 100644 --- a/cpp/docs/DEVELOPER_GUIDE.md +++ b/cpp/docs/DEVELOPER_GUIDE.md @@ -255,6 +255,11 @@ currently supported by cudf. Each type of value is represented by a separate typ which are all derived from `cudf::scalar`. e.g. A `numeric_scalar` holds a single numerical value, a `string_scalar` holds a single string. The data for the stored value resides in device memory. +A `list_scalar` holds the underlying data of a single list. This means the underlying data can be any type +that cudf supports. For example, a `list_scalar` representing a list of integers stores a `cudf::column` +of type `INT32`. A `list_scalar` representing a list of lists of integers stores a `cudf::column` of +type `LIST`, which in turn stores a column of type `INT32`. + |Value type|Scalar class|Notes| |-|-|-| |fixed-width|`fixed_width_scalar`| `T` can be any fixed-width type| @@ -263,6 +268,7 @@ a `string_scalar` holds a single string. The data for the stored value resides i |timestamp|`timestamp_scalar` | `T` can be `timestamp_D`, `timestamp_s`, etc.| |duration|`duration_scalar` | `T` can be `duration_D`, `duration_s`, etc.| |string|`string_scalar`| This class object is immutable| +|list|`list_scalar`| Underlying data can be any type supported by cudf | ### Construction `scalar`s can be created using either their respective constructors or using factory functions like @@ -285,11 +291,16 @@ auto s1 = static_cast(s.get()); ``` ### Passing to device -Each scalar type has a corresponding non-owning device view class which allows access to the value -and its validity from the device. This can be obtained using the function +Each scalar type, except `list_scalar`, has a corresponding non-owning device view class which allows +access to the value and its validity from the device. This can be obtained using the function `get_scalar_device_view(ScalarType s)`. Note that a device view is not provided for a base scalar object, only for the derived typed scalar class objects. +The underlying data for `list_scalar` can be accessed via `view()` method. For non-nested data, +the device view can be obtained via function `column_device_view::create(column_view)`. For nested +data, a specialized device view for list columns can be constructed via +`lists_column_device_view(column_device_view)`. + # libcudf++ API and Implementation ## Streams diff --git a/cpp/include/cudf/detail/is_element_valid.hpp b/cpp/include/cudf/detail/is_element_valid.hpp new file mode 100644 index 00000000000..fff67f107d9 --- /dev/null +++ b/cpp/include/cudf/detail/is_element_valid.hpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +namespace cudf { +namespace detail { + +/** + * @brief Return validity of a row + * + * Retrieves the validity (NULL or non-NULL) of the specified row from device memory. + * + * @note Synchronizes `stream`. + * + * @throw cudf::logic_error if `element_index < 0 or >= col_view.size()` + * + * @param col_view The column to retrieve the validity from. + * @param element_index The index of the row to retrieve. + * @param stream The stream to use for copying the validity to the host. + * @return Host boolean that indicates the validity of the row. + */ + +bool is_element_valid_sync(column_view const& col_view, + size_type element_index, + rmm::cuda_stream_view stream = rmm::cuda_stream_default); + +} // namespace detail +} // namespace cudf diff --git a/cpp/include/cudf/scalar/scalar_factories.hpp b/cpp/include/cudf/scalar/scalar_factories.hpp index a0a0a22091e..8e10e122571 100644 --- a/cpp/include/cudf/scalar/scalar_factories.hpp +++ b/cpp/include/cudf/scalar/scalar_factories.hpp @@ -156,5 +156,17 @@ std::unique_ptr make_fixed_point_scalar( return std::make_unique>(value, scale, true, stream, mr); } +/** + * @brief Construct scalar using the given column of elements + * + * @param elements Elements of the list + * @param stream CUDA stream used for device memory operations. + * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool. + */ +std::unique_ptr make_list_scalar( + column_view elements, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of group } // namespace cudf diff --git a/cpp/src/bitmask/is_element_valid.cpp b/cpp/src/bitmask/is_element_valid.cpp new file mode 100644 index 00000000000..47870e01567 --- /dev/null +++ b/cpp/src/bitmask/is_element_valid.cpp @@ -0,0 +1,47 @@ + +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +namespace cudf { +namespace detail { + +bool is_element_valid_sync(column_view const& col_view, + size_type element_index, + rmm::cuda_stream_view stream) +{ + CUDF_EXPECTS(element_index >= 0 and element_index < col_view.size(), "invalid index."); + if (!col_view.nullable()) { return true; } + + bitmask_type word; + // null_mask() returns device ptr to bitmask without offset + size_type index = element_index + col_view.offset(); + CUDA_TRY(cudaMemcpyAsync(&word, + col_view.null_mask() + word_index(index), + sizeof(bitmask_type), + cudaMemcpyDeviceToHost, + stream.value())); + stream.synchronize(); + return static_cast(word & (bitmask_type{1} << intra_word_index(index))); +} + +} // namespace detail +} // namespace cudf diff --git a/cpp/src/copying/get_element.cu b/cpp/src/copying/get_element.cu index 446f9b0dda9..fa5902efc0e 100644 --- a/cpp/src/copying/get_element.cu +++ b/cpp/src/copying/get_element.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,8 +17,11 @@ #include #include #include +#include #include #include +#include +#include #include #include @@ -122,7 +125,19 @@ struct get_element_functor { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) { - CUDF_FAIL("get_element_functor not supported for list_view"); + bool valid = is_element_valid_sync(input, index, stream); + + if (valid) { + lists_column_view lcv(input); + // Make a copy of the row + auto row_slice_contents = + lists::detail::copy_slice(lcv, index, index + 1, stream, mr)->release(); + // Construct scalar with row data + return std::make_unique( + std::move(*row_slice_contents.children[1]), valid, stream, mr); + } else { + return make_default_constructed_scalar(data_type(type_id::LIST)); + } } template ()> *p = nullptr> diff --git a/cpp/src/scalar/scalar_factories.cpp b/cpp/src/scalar/scalar_factories.cpp index 5714eaee864..7054e2b5c8f 100644 --- a/cpp/src/scalar/scalar_factories.cpp +++ b/cpp/src/scalar/scalar_factories.cpp @@ -97,6 +97,13 @@ std::unique_ptr make_fixed_width_scalar(data_type type, return type_dispatcher(type, scalar_construction_helper{}, stream, mr); } +std::unique_ptr make_list_scalar(column_view elements, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return std::make_unique(elements, true, stream, mr); +} + namespace { struct default_scalar_functor { template @@ -125,7 +132,7 @@ template <> std::unique_ptr default_scalar_functor::operator()( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_FAIL("list_view type not supported"); + return std::make_unique(column(), false, stream, mr); } template <> diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 29dd4319bfc..8fe7a3c9d66 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -368,7 +368,8 @@ ConfigureTest(TEXT_TEST ConfigureTest(BITMASK_TEST bitmask/valid_if_tests.cu bitmask/set_nullmask_tests.cu - bitmask/bitmask_tests.cu) + bitmask/bitmask_tests.cu + bitmask/is_element_valid_tests.cpp) ################################################################################################### diff --git a/cpp/tests/bitmask/is_element_valid_tests.cpp b/cpp/tests/bitmask/is_element_valid_tests.cpp new file mode 100644 index 00000000000..383448c0dd8 --- /dev/null +++ b/cpp/tests/bitmask/is_element_valid_tests.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include + +namespace cudf { +namespace test { + +struct IsElementValidTest : public BaseFixture { +}; + +TEST_F(IsElementValidTest, IsElementValidBasic) +{ + fixed_width_column_wrapper col({1, 1, 1, 1, 1}, {1, 0, 0, 0, 1}); + EXPECT_TRUE(cudf::detail::is_element_valid_sync(col, 0)); + EXPECT_FALSE(cudf::detail::is_element_valid_sync(col, 1)); + EXPECT_FALSE(cudf::detail::is_element_valid_sync(col, 2)); + EXPECT_FALSE(cudf::detail::is_element_valid_sync(col, 3)); + EXPECT_TRUE(cudf::detail::is_element_valid_sync(col, 4)); +} + +TEST_F(IsElementValidTest, IsElementValidLarge) +{ + auto filter = [](auto i) { return static_cast(i % 3); }; + auto val = thrust::make_counting_iterator(0); + auto valid = cudf::detail::make_counting_transform_iterator(0, filter); + size_type num_rows = 1000; + + fixed_width_column_wrapper col(val, val + num_rows, valid); + + for (int i = 0; i < num_rows; i++) { + EXPECT_EQ(cudf::detail::is_element_valid_sync(col, i), filter(i)); + } +} + +TEST_F(IsElementValidTest, IsElementValidOffset) +{ + fixed_width_column_wrapper col({1, 1, 1, 1, 1}, {1, 0, 0, 0, 1}); + { + auto offset_col = slice(col, {1, 5}).front(); + EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 0)); + EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 1)); + EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 2)); + EXPECT_TRUE(cudf::detail::is_element_valid_sync(offset_col, 3)); + } + { + auto offset_col = slice(col, {2, 5}).front(); + EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 0)); + EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 1)); + EXPECT_TRUE(cudf::detail::is_element_valid_sync(offset_col, 2)); + } +} + +TEST_F(IsElementValidTest, IsElementValidOffsetLarge) +{ + auto filter = [](auto i) { return static_cast(i % 3); }; + size_type offset = 37; + auto val = thrust::make_counting_iterator(0); + auto valid = cudf::detail::make_counting_transform_iterator(0, filter); + size_type num_rows = 1000; + + fixed_width_column_wrapper col(val, val + num_rows, valid); + auto offset_col = slice(col, {offset, num_rows}).front(); + + for (int i = 0; i < offset_col.size(); i++) { + EXPECT_EQ(cudf::detail::is_element_valid_sync(offset_col, i), filter(i + offset)); + } +} + +} // namespace test + +} // namespace cudf diff --git a/cpp/tests/copying/get_value_tests.cpp b/cpp/tests/copying/get_value_tests.cpp index 5a7a4f95066..7d2bc458462 100644 --- a/cpp/tests/copying/get_value_tests.cpp +++ b/cpp/tests/copying/get_value_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,10 @@ * limitations under the License. */ +#include +#include #include +#include #include #include #include @@ -41,7 +44,7 @@ TYPED_TEST(FixedWidthGetValueTest, BasicGet) auto s = get_element(col, 0); using ScalarType = scalar_type_t; - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ(cudf::test::make_type_param_scalar(9), typed_s->value()); @@ -53,7 +56,7 @@ TYPED_TEST(FixedWidthGetValueTest, GetFromNullable) auto s = get_element(col, 1); using ScalarType = scalar_type_t; - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ(cudf::test::make_type_param_scalar(8), typed_s->value()); @@ -83,7 +86,7 @@ TEST_F(StringGetValueTest, BasicGet) strings_column_wrapper col{"this", "is", "a", "test"}; auto s = get_element(col, 3); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ("test", typed_s->to_string()); @@ -94,7 +97,7 @@ TEST_F(StringGetValueTest, GetEmpty) strings_column_wrapper col{"this", "is", "", "test"}; auto s = get_element(col, 2); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ("", typed_s->to_string()); @@ -105,7 +108,7 @@ TEST_F(StringGetValueTest, GetFromNullable) strings_column_wrapper col({"this", "is", "a", "test"}, {0, 1, 0, 1}); auto s = get_element(col, 1); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ("is", typed_s->to_string()); @@ -134,7 +137,7 @@ TYPED_TEST(DictionaryGetValueTest, BasicGet) auto s = get_element(*col, 2); using ScalarType = scalar_type_t; - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ(cudf::test::make_type_param_scalar(7), typed_s->value()); @@ -149,7 +152,7 @@ TYPED_TEST(DictionaryGetValueTest, GetFromNullable) auto s = get_element(*col, 3); using ScalarType = scalar_type_t; - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ(cudf::test::make_type_param_scalar(8), typed_s->value()); @@ -166,5 +169,588 @@ TYPED_TEST(DictionaryGetValueTest, GetNull) EXPECT_FALSE(s->is_valid()); } +/* + * Lists test grid: + * Dim1 nestedness: {Nested, Non-nested} + * Dim2 validity, emptiness: {Null element, Non-null non-empty list, Non-null empty list} + * Dim3 leaf data type: {Fixed-width, string, struct} + */ + +template +struct ListGetFixedWidthValueTest : public BaseFixture { + auto odds_valid() + { + return cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); + } + auto nth_valid(size_type x) + { + return cudf::detail::make_counting_transform_iterator(0, [=](auto i) { return x == i; }); + } +}; + +TYPED_TEST_CASE(ListGetFixedWidthValueTest, FixedWidthTypes); + +TYPED_TEST(ListGetFixedWidthValueTest, NonNestedGetNonNullNonEmpty) +{ + using LCW = cudf::test::lists_column_wrapper; + + LCW col{LCW({1, 2, 34}, this->odds_valid()), LCW{}, LCW{1}, LCW{}}; + fixed_width_column_wrapper expected_data({1, 2, 34}, this->odds_valid()); + size_type index = 0; + + auto s = get_element(col, index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetFixedWidthValueTest, NonNestedGetNonNullEmpty) +{ + using LCW = cudf::test::lists_column_wrapper; + + LCW col{LCW{1, 2, 34}, LCW{}, LCW{1}, LCW{}}; + fixed_width_column_wrapper expected_data{}; + size_type index = 1; + + auto s = get_element(col, index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetFixedWidthValueTest, NonNestedGetNull) +{ + using LCW = cudf::test::lists_column_wrapper; + LCW col({LCW{1, 2, 34}, LCW{}, LCW{1}, LCW{}}, this->odds_valid()); + size_type index = 2; + + auto s = get_element(col, index); + + EXPECT_FALSE(s->is_valid()); +} + +TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNonNullNonEmpty) +{ + using LCW = cudf::test::lists_column_wrapper; + + // clang-format off + LCW col{ + LCW{LCW{1, 2}, LCW{34}}, + LCW{}, + LCW{LCW{1}}, + LCW{LCW{42}, LCW{10}} + }; + // clang-format on + LCW expected_data{LCW{42}, LCW{10}}; + + size_type index = 3; + + auto s = get_element(col, index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNonNullNonEmptyPreserveNull) +{ + using LCW = cudf::test::lists_column_wrapper; + + std::vector valid{0, 1, 1}; + // clang-format off + LCW col{ + LCW{LCW{1, 2}, LCW{34}}, + LCW{}, + LCW{LCW{1}}, + LCW({LCW{42}, LCW{10}, LCW({1, 3, 2}, this->nth_valid(1))}, valid.begin()) + }; + // clang-format on + LCW expected_data({LCW{42}, LCW{10}, LCW({1, 3, 2}, this->nth_valid(1))}, valid.begin()); + size_type index = 3; + + auto s = get_element(col, index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNonNullEmpty) +{ + using LCW = cudf::test::lists_column_wrapper; + + // clang-format off + LCW col{ + LCW{LCW{1, 2}, LCW{34}}, + LCW{}, + LCW{LCW{1}}, + LCW{LCW{42}, LCW{10}} + }; + // clang-format on + LCW expected_data{}; + size_type index = 1; + + auto s = get_element(col, index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNull) +{ + using LCW = cudf::test::lists_column_wrapper; + + std::vector valid{1, 0, 1, 0}; + // clang-format off + LCW col( + { + LCW{LCW{1, 2}, LCW{34}}, + LCW{}, + LCW{LCW{1}}, + LCW{LCW{42}, LCW{10}} + }, valid.begin()); + // clang-format on + size_type index = 1; + + auto s = get_element(col, index); + + EXPECT_FALSE(s->is_valid()); +} + +struct ListGetStringValueTest : public BaseFixture { + auto odds_valid() + { + return cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); + } + auto nth_valid(size_type x) + { + return cudf::detail::make_counting_transform_iterator(0, [=](auto i) { return x == i; }); + } +}; + +TEST_F(ListGetStringValueTest, NonNestedGetNonNullNonEmpty) +{ + using LCW = cudf::test::lists_column_wrapper; + + LCW col{LCW({"aaa", "Héllo"}, this->odds_valid()), LCW{}, LCW{""}, LCW{"42"}}; + strings_column_wrapper expected_data({"aaa", "Héllo"}, this->odds_valid()); + size_type index = 0; + + auto s = get_element(col, index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); +} + +TEST_F(ListGetStringValueTest, NonNestedGetNonNullEmpty) +{ + using LCW = cudf::test::lists_column_wrapper; + + LCW col{LCW{"aaa", "Héllo"}, LCW{}, LCW{""}, LCW{"42"}}; + strings_column_wrapper expected_data{}; + size_type index = 1; + + auto s = get_element(col, index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); +} + +TEST_F(ListGetStringValueTest, NonNestedGetNull) +{ + using LCW = cudf::test::lists_column_wrapper; + + std::vector valid{1, 0, 0, 1}; + LCW col({LCW{"aaa", "Héllo"}, LCW{}, LCW{""}, LCW{"42"}}, valid.begin()); + size_type index = 2; + + auto s = get_element(col, index); + + EXPECT_FALSE(s->is_valid()); +} + +TEST_F(ListGetStringValueTest, NestedGetNonNullNonEmpty) +{ + using LCW = cudf::test::lists_column_wrapper; + + // clang-format off + LCW col{ + LCW{LCW{"aaa", "Héllo"}}, + LCW{}, + LCW{LCW{""}, LCW({"string", "str2", "xyz"}, this->nth_valid(0))}, + LCW{LCW{"42"}, LCW{"21"}} + }; + // clang-format on + LCW expected_data{LCW{""}, LCW({"string", "str2", "xyz"}, this->nth_valid(0))}; + size_type index = 2; + + auto s = get_element(col, index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); +} + +TEST_F(ListGetStringValueTest, NestedGetNonNullNonEmptyPreserveNull) +{ + using LCW = cudf::test::lists_column_wrapper; + + std::vector valid{0, 1, 1}; + // clang-format off + LCW col{ + LCW{LCW{"aaa", "Héllo"}}, + LCW{}, + LCW({LCW{""}, LCW{"cc"}, LCW({"string", "str2", "xyz"}, this->nth_valid(0))}, valid.begin()), + LCW{LCW{"42"}, LCW{"21"}} + }; + // clang-format on + LCW expected_data({LCW{""}, LCW{"cc"}, LCW({"string", "str2", "xyz"}, this->nth_valid(0))}, + valid.begin()); + size_type index = 2; + + auto s = get_element(col, index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); +} + +TEST_F(ListGetStringValueTest, NestedGetNonNullEmpty) +{ + using LCW = cudf::test::lists_column_wrapper; + + // clang-format off + LCW col{ + LCW{LCW{"aaa", "Héllo"}}, + LCW{LCW{""}}, + LCW{LCW{"42"}, LCW{"21"}}, + LCW{} + }; + // clang-format on + LCW expected_data{}; + size_type index = 3; + + auto s = get_element(col, index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + // Relax to equivalent. `expected_data` leaf string column does not + // allocate offset and byte array, but `typed_s` does. + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_data, typed_s->view()); +} + +TEST_F(ListGetStringValueTest, NestedGetNull) +{ + using LCW = cudf::test::lists_column_wrapper; + + std::vector valid{0, 0, 1, 1}; + // clang-format off + LCW col( + { + LCW{LCW{"aaa", "Héllo"}}, + LCW{LCW{""}}, + LCW{LCW{"42"}, LCW{"21"}}, + LCW{} + }, valid.begin()); + // clang-format on + LCW expected_data{}; + size_type index = 0; + + auto s = get_element(col, index); + EXPECT_FALSE(s->is_valid()); +} + +/** + * @brief Some shared helper functions used by lists of structs test. + */ +template +struct ListGetStructValueTest : public BaseFixture { + using SCW = structs_column_wrapper; + using LCWinner_t = cudf::test::lists_column_wrapper; + + /** + * @brief Create a lists column + * + * @note Different from `cudf::make_lists_column`, this allows setting the `null_mask` + * in `initializer_list`. However this is an expensive function because it repeatedly + * calls `cudf::set_null_mask` for each row. + */ + std::unique_ptr make_test_lists_column(size_type num_lists, + fixed_width_column_wrapper offsets, + std::unique_ptr child, + std::initializer_list null_mask) + { + size_type null_count = num_lists - std::accumulate(null_mask.begin(), null_mask.end(), 0); + auto d_null_mask = cudf::create_null_mask( + num_lists, null_count == 0 ? cudf::mask_state::UNALLOCATED : cudf::mask_state::ALL_NULL); + if (null_count > 0) { + std::for_each( + thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_lists), [&](auto i) { + if (*(null_mask.begin() + i)) { + set_null_mask(static_cast(d_null_mask.data()), i, i + 1, true); + } + }); + } + return cudf::make_lists_column( + num_lists, offsets.release(), std::move(child), null_count, std::move(d_null_mask)); + } + + /** + * @brief Create a structs column that contains 3 fields: int, string, List + */ + template + SCW make_test_structs_column(fixed_width_column_wrapper field1, + strings_column_wrapper field2, + lists_column_wrapper field3, + MaskIterator mask) + { + return SCW{{field1, field2, field3}, mask}; + } + + /** + * @brief Create a 0-length structs column + */ + SCW zero_length_struct() { return SCW{}; } + + /** + * @brief Concatenate structs columns, allow specifying inputs in `initializer_list` + */ + std::unique_ptr concat(std::initializer_list rows) + { + std::vector views; + std::transform( + rows.begin(), rows.end(), std::back_inserter(views), [](auto &r) { return column_view(r); }); + return cudf::concatenate(views); + } + + /** + * @brief Test data setup: row 0 of structs column + */ + SCW row0() + { + // {int: 1, string: NULL, list: NULL} + return this->make_test_structs_column({{1}, {1}}, + strings_column_wrapper({"aa"}, {false}), + LCWinner_t({{}}, all_invalid()), + all_valid()); + } + + /** + * @brief Test data setup: row 1 of structs column + */ + SCW row1() + { + // NULL + return this->make_test_structs_column({-1}, {""}, LCWinner_t{-1}, all_invalid()); + } + + /** + * @brief Test data setup: row 2 of structs column + */ + SCW row2() + { + // {int: 3, string: "xyz", list: [3, 8, 4]} + return this->make_test_structs_column({{3}, {1}}, + strings_column_wrapper({"xyz"}, {true}), + LCWinner_t({{3, 8, 4}}, all_valid()), + all_valid()); + } + + /** + * @brief Test data setup: a 3-row structs column + */ + std::unique_ptr leaf_data() + { + // 3 rows: + // {int: 1, string: NULL, list: NULL} + // NULL + // {int: 3, string: "xyz", list: [3, 8, 4]} + return this->concat({row0(), row1(), row2()}); + } + + auto all_valid() { return thrust::make_constant_iterator(true); } + auto all_invalid() { return thrust::make_constant_iterator(false); } +}; + +TYPED_TEST_CASE(ListGetStructValueTest, FixedWidthTypes); + +TYPED_TEST(ListGetStructValueTest, NonNestedGetNonNullNonEmpty) +{ + // 2-rows + // [{1, NULL, NULL}, NULL] + // [{3, "xyz", [3, 8, 4]}] <- get_element(1) + + auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {1, 1}); + size_type index = 1; + auto expected_data = this->row2(); + + auto s = get_element(list_column->view(), index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + // Relax to equivalent. The nested list column in struct allocates `null_mask`. + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetStructValueTest, NonNestedGetNonNullNonEmpty2) +{ + // 2-rows + // [{1, NULL, NULL}, NULL] <- get_element(0) + // [{3, "xyz", [3, 8, 4]}] + + auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {1, 1}); + size_type index = 0; + auto expected_data = this->concat({this->row0(), this->row1()}); + + auto s = get_element(list_column->view(), index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetStructValueTest, NonNestedGetNonNullEmpty) +{ + // 3-rows + // [{1, NULL, NULL}, NULL] + // [{3, "xyz", [3, 8, 4]}] + // [] <- get_element(0) + + auto list_column = this->make_test_lists_column(3, {0, 2, 3, 3}, this->leaf_data(), {1, 1, 1}); + size_type index = 2; + // For well-formed list column, an empty list still holds the complete structure of + // a 0-length structs column + auto expected_data = this->zero_length_struct(); + + auto s = get_element(list_column->view(), index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + // Relax to equivalent. The nested list column in struct allocates `null_mask`. + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetStructValueTest, NonNestedGetNull) +{ + // 2-rows + // NULL <- get_element(0) + // [{3, "xyz", [3, 8, 4]}] + + auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {0, 1}); + size_type index = 0; + + auto s = get_element(list_column->view(), index); + + EXPECT_FALSE(s->is_valid()); +} + +TYPED_TEST(ListGetStructValueTest, NestedGetNonNullNonEmpty) +{ + // 2-rows + // [[{1, NULL, NULL}, NULL], [{3, "xyz", [3, 8, 4]}]] <- get_element(0) + // [] + + auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {1, 1}); + auto expected_data = std::make_unique(*list_column); + + auto list_column_nested = + this->make_test_lists_column(2, {0, 2, 2}, std::move(list_column), {1, 1}); + + size_type index = 0; + auto s = get_element(list_column_nested->view(), index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetStructValueTest, NestedGetNonNullNonEmpty2) +{ + // 2-rows + // [[{1, NULL, NULL}, NULL]] <- get_element(0) + // [[{3, "xyz", [3, 8, 4]}]] + + auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {1, 1}); + auto list_column_nested = + this->make_test_lists_column(2, {0, 1, 2}, std::move(list_column), {1, 1}); + + auto expected_data = + this->make_test_lists_column(1, {0, 2}, this->concat({this->row0(), this->row1()}), {1}); + + size_type index = 0; + auto s = get_element(list_column_nested->view(), index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetStructValueTest, NestedGetNonNullNonEmpty3) +{ + // 2-rows + // [[{1, NULL, NULL}, NULL]] + // [[{3, "xyz", [3, 8, 4]}]] <- get_element(1) + + auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {1, 1}); + auto list_column_nested = + this->make_test_lists_column(2, {0, 1, 2}, std::move(list_column), {1, 1}); + + auto expected_data = this->make_test_lists_column(1, {0, 1}, this->row2().release(), {1}); + + size_type index = 1; + auto s = get_element(list_column_nested->view(), index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + // Relax to equivalent. For `get_element`, the nested list column in struct + // allocates `null_mask`. + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetStructValueTest, NestedGetNonNullEmpty) +{ + // 3-rows + // [[{1, NULL, NULL}, NULL]] + // [] <- get_element(1) + // [[{3, "xyz", [3, 8, 4]}]] + + auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {1, 1}); + auto list_column_nested = + this->make_test_lists_column(3, {0, 1, 1, 2}, std::move(list_column), {1, 1, 1}); + + auto expected_data = + this->make_test_lists_column(0, {0}, this->zero_length_struct().release(), {1}); + + size_type index = 1; + auto s = get_element(list_column_nested->view(), index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + // Relax to equivalent. The sliced version still has the array for fields + // allocated. + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetStructValueTest, NestedGetNull) +{ + // 3-rows + // [[{1, NULL, NULL}, NULL]] + // [] + // NULL <- get_element(1) + auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {1, 1}); + auto list_column_nested = + this->make_test_lists_column(3, {0, 1, 1, 2}, std::move(list_column), {1, 1, 0}); + + size_type index = 2; + auto s = get_element(list_column_nested->view(), index); + + EXPECT_FALSE(s->is_valid()); +} + } // namespace test } // namespace cudf