From 60725de26e8b2a954fa78769bc4dd4daf0619785 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 26 Apr 2021 16:30:27 -0700 Subject: [PATCH 01/22] Initial --- cpp/include/cudf/scalar/scalar.hpp | 20 +- cpp/src/copying/get_element.cu | 21 +- cpp/src/lists/copying/copying.cu | 2 +- cpp/src/scalar/scalar_factories.cpp | 2 +- cpp/tests/copying/get_value_tests.cpp | 280 ++++++++++++++++++++++++++ 5 files changed, 320 insertions(+), 5 deletions(-) diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index 8b5c976512f..483a9fc4919 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -607,7 +607,7 @@ class list_scalar : public scalar { list_scalar& operator=(list_scalar&& other) = delete; /** - * @brief Construct a new list scalar object from existing device data + * @brief Construct a new list scalar object copying from existing device data * * @param elements The elements of the list * @param is_valid Whether the value held by the scalar is valid @@ -618,7 +618,23 @@ class list_scalar : public scalar { bool is_valid = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - : scalar(data_type(type_id::LIST), is_valid), _data(elements, stream, mr) + : scalar(data_type(type_id::LIST), is_valid, stream, mr), _data(elements, stream, mr) + { + } + + /** + * @brief Move existing device data into a new list scalar object + * + * @param elements The elements of the list + * @param is_valid Whether the value held by the scalar is valid + * @param stream CUDA stream used for device memory operations. + * @param mr Device memory resource to use for device memory allocation + */ + list_scalar(cudf::column const&& elements, + bool is_valid = true, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + : scalar(data_type(type_id::LIST), is_valid, stream, mr), _data(elements) { } diff --git a/cpp/src/copying/get_element.cu b/cpp/src/copying/get_element.cu index 446f9b0dda9..93f9ee8d911 100644 --- a/cpp/src/copying/get_element.cu +++ b/cpp/src/copying/get_element.cu @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include @@ -122,7 +124,24 @@ struct get_element_functor { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) { - CUDF_FAIL("get_element_functor not supported for list_view"); + rmm::device_scalar d_valid; + auto d_input = column_device_view::create(input); + device_single_thread([d_input = *d_input, index, valid = d_valid.data()] __device__() { + *valid = d_input.is_valid(index); + }); + bool valid = d_valid.value(stream); + + if (valid) { + lists_column_view lcv(input); + // Make a copy of the row + auto row_slice_contents = + lists::detail::copy_slice(lcv, index, index + 1, stream, mr)->release(); + // Construct scalar with row data + return std::make_unique( + std::move(*row_slice_contents.children[1]), valid, stream, mr); + } else { + return make_default_constructed_scalar(data_type(type_id::LIST)); + } } template ()> *p = nullptr> diff --git a/cpp/src/lists/copying/copying.cu b/cpp/src/lists/copying/copying.cu index 3f4f02cf910..41df152f9b9 100644 --- a/cpp/src/lists/copying/copying.cu +++ b/cpp/src/lists/copying/copying.cu @@ -35,7 +35,7 @@ std::unique_ptr copy_slice(lists_column_view const& lists, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - if (lists.is_empty()) { return cudf::empty_like(lists.parent()); } + if (lists.is_empty() or start == end) { return cudf::empty_like(lists.parent()); } if (end < 0 || end > lists.size()) end = lists.size(); CUDF_EXPECTS(((start >= 0) && (start < end)), "Invalid slice range."); auto lists_count = end - start; diff --git a/cpp/src/scalar/scalar_factories.cpp b/cpp/src/scalar/scalar_factories.cpp index 5714eaee864..0b94fc0065c 100644 --- a/cpp/src/scalar/scalar_factories.cpp +++ b/cpp/src/scalar/scalar_factories.cpp @@ -125,7 +125,7 @@ template <> std::unique_ptr default_scalar_functor::operator()( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_FAIL("list_view type not supported"); + return std::make_unique(); } template <> diff --git a/cpp/tests/copying/get_value_tests.cpp b/cpp/tests/copying/get_value_tests.cpp index 5a7a4f95066..d22f391232d 100644 --- a/cpp/tests/copying/get_value_tests.cpp +++ b/cpp/tests/copying/get_value_tests.cpp @@ -25,6 +25,8 @@ #include #include #include +#include "build/cuda-11.2/fea-shift/release/_deps/gtest-src/googletest/include/gtest/gtest.h" +#include "cudf/detail/iterator.cuh" namespace cudf { namespace test { @@ -166,5 +168,283 @@ TYPED_TEST(DictionaryGetValueTest, GetNull) EXPECT_FALSE(s->is_valid()); } +/* + * Lists test grid: + * Dim1 nestedness: {Nested, Non-nested} + * Dim2 validity, emptiness: {Null element, Non-null non-empty list, Non-null empty list} + * Dim3 leaf data type: {Fixed-width, string} + */ + +template +struct ListGetFixedWidthValueTest : public BaseFixture { +}; + +TYPED_TEST_CASE(ListGetFixedWidthValueTest, FixedWidthTypes); + +TYPED_TEST(ListGetFixedWidthValueTest, NonNestedGetNonNullNonEmpty) +{ + using LCW = cudf::test::lists_column_wrapper; + + LCW col{LCW{1, 2, 34}, LCW{}, LCW{1}, LCW{}}; + fixed_width_column_wrapper expected_data{1, 2, 34}; + size_type index = 0; + + auto s = get_element(col, index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetFixedWidthValueTest, NonNestedGetNonNullEmpty) +{ + using LCW = cudf::test::lists_column_wrapper; + + LCW col{LCW{1, 2, 34}, LCW{}, LCW{1}, LCW{}}; + fixed_width_column_wrapper expected_data{}; + size_type index = 1; + + auto s = get_element(col, index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetFixedWidthValueTest, NonNestedGetNull) +{ + using LCW = cudf::test::lists_column_wrapper; + std::vector valid{0, 1, 0, 1}; + LCW col({LCW{1, 2, 34}, LCW{}, LCW{1}, LCW{}}, valid.begin()); + size_type index = 2; + + auto s = get_element(col, index); + + EXPECT_FALSE(s->is_valid()); +} + +TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNonNullNonEmpty) +{ + using LCW = cudf::test::lists_column_wrapper; + + // clang-format off + LCW col{ + LCW{LCW{1, 2}, LCW{34}}, + LCW{}, + LCW{LCW{1}}, + LCW{LCW{42}, LCW{10}} + }; + // clang-format on + LCW expected_data{LCW{42}, LCW{10}}; + + size_type index = 3; + + auto s = get_element(col, index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNonNullNonEmptyPreserveNull) +{ + using LCW = cudf::test::lists_column_wrapper; + + std::vector valid{0, 1}; + // clang-format off + LCW col{ + LCW{LCW{1, 2}, LCW{34}}, + LCW{}, + LCW{LCW{1}}, + LCW({LCW{42}, LCW{10}}, valid.begin()) + }; + // clang-format on + LCW expected_data({LCW{42}, LCW{10}}, valid.begin()); + size_type index = 3; + + auto s = get_element(col, index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNonNullEmpty) +{ + using LCW = cudf::test::lists_column_wrapper; + + // clang-format off + LCW col{ + LCW{LCW{1, 2}, LCW{34}}, + LCW{}, + LCW{LCW{1}}, + LCW{LCW{42}, LCW{10}} + }; + // clang-format on + LCW expected_data{}; + size_type index = 1; + + auto s = get_element(col, index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNull) +{ + using LCW = cudf::test::lists_column_wrapper; + + std::vector valid{1, 0, 1, 0}; + // clang-format off + LCW col( + { + LCW{LCW{1, 2}, LCW{34}}, + LCW{}, + LCW{LCW{1}}, + LCW{LCW{42}, LCW{10}} + }, valid.begin()); + // clang-format on + size_type index = 1; + + auto s = get_element(col, index); + + EXPECT_FALSE(s->is_valid()); +} + +struct ListGetStringValueTest : public BaseFixture { +}; + +TEST_F(ListGetStringValueTest, NonNestedGetNonNullNonEmpty) +{ + using LCW = cudf::test::lists_column_wrapper; + + LCW col{LCW{"aaa", "Héllo"}, LCW{}, LCW{""}, LCW{"42"}}; + strings_column_wrapper expected_data{"aaa", "Héllo"}; + size_type index = 0; + + auto s = get_element(col, index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); +} + +TEST_F(ListGetStringValueTest, NonNestedGetNonNullEmpty) +{ + using LCW = cudf::test::lists_column_wrapper; + + LCW col{LCW{"aaa", "Héllo"}, LCW{}, LCW{""}, LCW{"42"}}; + strings_column_wrapper expected_data{}; + size_type index = 1; + + auto s = get_element(col, index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); +} + +TEST_F(ListGetStringValueTest, NonNestedGetNull) +{ + using LCW = cudf::test::lists_column_wrapper; + + std::vector valid{1, 0, 0, 1}; + LCW col({LCW{"aaa", "Héllo"}, LCW{}, LCW{""}, LCW{"42"}}, valid.begin()); + size_type index = 2; + + auto s = get_element(col, index); + + EXPECT_FALSE(s->is_valid()); +} + +TEST_F(ListGetStringValueTest, NestedGetNonNullNonEmpty) +{ + using LCW = cudf::test::lists_column_wrapper; + + // clang-format off + LCW col{ + LCW{LCW{"aaa", "Héllo"}}, + {LCW{}}, + LCW{LCW{""}}, + LCW{LCW{"42"}, LCW{"21"}} + }; + // clang-format on + LCW expected_data{LCW{""}}; + size_type index = 2; + + auto s = get_element(col, index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); +} + +TEST_F(ListGetStringValueTest, NestedGetNonNullNonEmptyPreserveNull) +{ + using LCW = cudf::test::lists_column_wrapper; + + std::vector valid{0, 1}; + // clang-format off + LCW col{ + LCW{LCW{"aaa", "Héllo"}}, + {LCW{}}, + LCW({LCW{""}, LCW{"cc"}}, valid.begin()), + LCW{LCW{"42"}, LCW{"21"}} + }; + // clang-format on + LCW expected_data({LCW{""}, LCW{"cc"}}, valid.begin()); + size_type index = 2; + + auto s = get_element(col, index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); +} + +TEST_F(ListGetStringValueTest, NestedGetNonNullEmpty) +{ + using LCW = cudf::test::lists_column_wrapper; + + // clang-format off + LCW col{ + LCW{LCW{"aaa", "Héllo"}}, + LCW{LCW{""}}, + LCW{LCW{"42"}, LCW{"21"}}, + {LCW{}} + }; + // clang-format on + LCW expected_data{LCW{}}; // a list column with 1 row of an empty string list + size_type index = 3; + + auto s = get_element(col, index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); +} + +TEST_F(ListGetStringValueTest, NestedGetNull) +{ + using LCW = cudf::test::lists_column_wrapper; + + std::vector valid{0, 0, 1, 1}; + // clang-format off + LCW col( + { + LCW{LCW{"aaa", "Héllo"}}, + LCW{LCW{""}}, + LCW{LCW{"42"}, LCW{"21"}}, + {LCW{}} + }, valid.begin()); + // clang-format on + LCW expected_data{}; + size_type index = 0; + + auto s = get_element(col, index); + EXPECT_FALSE(s->is_valid()); +} + } // namespace test } // namespace cudf From 67c4d048fde4d0c566497c920af7ef9688eff69d Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 26 Apr 2021 16:48:38 -0700 Subject: [PATCH 02/22] Remove clangd imports --- cpp/tests/copying/get_value_tests.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/tests/copying/get_value_tests.cpp b/cpp/tests/copying/get_value_tests.cpp index d22f391232d..119fa4d2cc1 100644 --- a/cpp/tests/copying/get_value_tests.cpp +++ b/cpp/tests/copying/get_value_tests.cpp @@ -25,8 +25,6 @@ #include #include #include -#include "build/cuda-11.2/fea-shift/release/_deps/gtest-src/googletest/include/gtest/gtest.h" -#include "cudf/detail/iterator.cuh" namespace cudf { namespace test { From daf5c5b399dad4f39c4d61378adc23f97504bbf5 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 28 Apr 2021 12:42:52 -0700 Subject: [PATCH 03/22] Copyright years and fix test nuances --- cpp/src/copying/get_element.cu | 2 +- cpp/tests/copying/get_value_tests.cpp | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/cpp/src/copying/get_element.cu b/cpp/src/copying/get_element.cu index 93f9ee8d911..6560f2a9497 100644 --- a/cpp/src/copying/get_element.cu +++ b/cpp/src/copying/get_element.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/tests/copying/get_value_tests.cpp b/cpp/tests/copying/get_value_tests.cpp index 119fa4d2cc1..a5038bdc79a 100644 --- a/cpp/tests/copying/get_value_tests.cpp +++ b/cpp/tests/copying/get_value_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -363,7 +363,7 @@ TEST_F(ListGetStringValueTest, NestedGetNonNullNonEmpty) // clang-format off LCW col{ LCW{LCW{"aaa", "Héllo"}}, - {LCW{}}, + LCW{}, LCW{LCW{""}}, LCW{LCW{"42"}, LCW{"21"}} }; @@ -386,7 +386,7 @@ TEST_F(ListGetStringValueTest, NestedGetNonNullNonEmptyPreserveNull) // clang-format off LCW col{ LCW{LCW{"aaa", "Héllo"}}, - {LCW{}}, + LCW{}, LCW({LCW{""}, LCW{"cc"}}, valid.begin()), LCW{LCW{"42"}, LCW{"21"}} }; @@ -410,17 +410,19 @@ TEST_F(ListGetStringValueTest, NestedGetNonNullEmpty) LCW{LCW{"aaa", "Héllo"}}, LCW{LCW{""}}, LCW{LCW{"42"}, LCW{"21"}}, - {LCW{}} + LCW{} }; // clang-format on - LCW expected_data{LCW{}}; // a list column with 1 row of an empty string list + LCW expected_data{}; size_type index = 3; auto s = get_element(col, index); auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); + // Relax to equivalent. `expected_data` leaf string column does not + // allocate offset and byte array, but `typed_s` does. + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_data, typed_s->view()); } TEST_F(ListGetStringValueTest, NestedGetNull) @@ -433,8 +435,8 @@ TEST_F(ListGetStringValueTest, NestedGetNull) { LCW{LCW{"aaa", "Héllo"}}, LCW{LCW{""}}, - LCW{LCW{"42"}, LCW{"21"}}, - {LCW{}} + LCW{LCW{"42"}, LCW{"21"}}, + LCW{} }, valid.begin()); // clang-format on LCW expected_data{}; From 8d2d7eff5e90221c784ea751afae3c00f86b1d5b Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 28 Apr 2021 16:37:33 -0700 Subject: [PATCH 04/22] Add get_valid API --- cpp/CMakeLists.txt | 1 + cpp/include/cudf/detail/get_valid.hpp | 44 +++++++++++++ cpp/src/bitmask/get_valid.cpp | 44 +++++++++++++ cpp/src/copying/get_element.cu | 8 +-- cpp/tests/CMakeLists.txt | 3 +- cpp/tests/bitmask/get_valid_tests.cpp | 90 +++++++++++++++++++++++++++ 6 files changed, 183 insertions(+), 7 deletions(-) create mode 100644 cpp/include/cudf/detail/get_valid.hpp create mode 100644 cpp/src/bitmask/get_valid.cpp create mode 100644 cpp/tests/bitmask/get_valid_tests.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 89f0ebeb239..326708b72ba 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -155,6 +155,7 @@ add_library(cudf src/binaryop/compiled/binary_ops.cu src/labeling/label_bins.cu src/bitmask/null_mask.cu + src/bitmask/get_valid.cpp src/column/column.cu src/column/column_device_view.cu src/column/column_factories.cpp diff --git a/cpp/include/cudf/detail/get_valid.hpp b/cpp/include/cudf/detail/get_valid.hpp new file mode 100644 index 00000000000..7211452aec8 --- /dev/null +++ b/cpp/include/cudf/detail/get_valid.hpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +namespace cudf { +namespace detail { + +/** + * @brief Return validity of a row + * + * Retrieves the specified row validity from device memory. + * + * @throw cudf::logic_error if `element_index < 0 or >= col_view.size()` + * + * @param col_view The column to retrieve the validity from. + * @param element_index The index of the row to retrieve. + * @param stream The stream to use for copying the validity to the host. + * @return Host boolean that indicates the validity of the row. + */ + +bool get_valid(column_view const& col_view, + size_type element_index, + rmm::cuda_stream_view stream = rmm::cuda_stream_default); + +} // namespace detail +} // namespace cudf diff --git a/cpp/src/bitmask/get_valid.cpp b/cpp/src/bitmask/get_valid.cpp new file mode 100644 index 00000000000..b52198b85ed --- /dev/null +++ b/cpp/src/bitmask/get_valid.cpp @@ -0,0 +1,44 @@ + +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +namespace cudf { +namespace detail { + +bool get_valid(column_view const& col_view, size_type element_index, rmm::cuda_stream_view stream) +{ + CUDF_EXPECTS(element_index >= 0 and element_index < col_view.size(), "invalid index."); + if (!col_view.nullable()) { return true; } + + bitmask_type word; + // null_mask() returns device ptr to bitmask without offset + size_type index = element_index + col_view.offset(); + CUDA_TRY(cudaMemcpyAsync(&word, + col_view.null_mask() + word_index(index), + sizeof(bitmask_type), + cudaMemcpyDeviceToHost, + stream.value())); + return static_cast(word & (bitmask_type{1} << intra_word_index(index))); +} + +} // namespace detail +} // namespace cudf diff --git a/cpp/src/copying/get_element.cu b/cpp/src/copying/get_element.cu index 6560f2a9497..20d88cdf5b5 100644 --- a/cpp/src/copying/get_element.cu +++ b/cpp/src/copying/get_element.cu @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -124,12 +125,7 @@ struct get_element_functor { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) { - rmm::device_scalar d_valid; - auto d_input = column_device_view::create(input); - device_single_thread([d_input = *d_input, index, valid = d_valid.data()] __device__() { - *valid = d_input.is_valid(index); - }); - bool valid = d_valid.value(stream); + bool valid = get_valid(input, index, stream); if (valid) { lists_column_view lcv(input); diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 9dbd4a881a6..733ecf56373 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -364,7 +364,8 @@ ConfigureTest(TEXT_TEST ConfigureTest(BITMASK_TEST bitmask/valid_if_tests.cu bitmask/set_nullmask_tests.cu - bitmask/bitmask_tests.cu) + bitmask/bitmask_tests.cu + bitmask/get_valid_tests.cpp) ################################################################################################### diff --git a/cpp/tests/bitmask/get_valid_tests.cpp b/cpp/tests/bitmask/get_valid_tests.cpp new file mode 100644 index 00000000000..1f06d6d2813 --- /dev/null +++ b/cpp/tests/bitmask/get_valid_tests.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include + +namespace cudf { +namespace test { + +struct GetValidTest : public BaseFixture { +}; + +TEST_F(GetValidTest, GetValidBasic) +{ + fixed_width_column_wrapper col({1, 1, 1, 1, 1}, {1, 0, 0, 0, 1}); + EXPECT_TRUE(cudf::detail::get_valid(col, 0)); + EXPECT_FALSE(cudf::detail::get_valid(col, 1)); + EXPECT_FALSE(cudf::detail::get_valid(col, 2)); + EXPECT_FALSE(cudf::detail::get_valid(col, 3)); + EXPECT_TRUE(cudf::detail::get_valid(col, 4)); +} + +TEST_F(GetValidTest, GetValidLarge) +{ + auto filter = [](auto i) { return static_cast(i % 3); }; + auto val = thrust::make_counting_iterator(0); + auto valid = cudf::detail::make_counting_transform_iterator(0, filter); + size_type num_rows = 1000; + + fixed_width_column_wrapper col(val, val + num_rows, valid); + + for (int i = 0; i < num_rows; i++) { EXPECT_EQ(cudf::detail::get_valid(col, i), filter(i)); } +} + +TEST_F(GetValidTest, GetValidOffset) +{ + fixed_width_column_wrapper col({1, 1, 1, 1, 1}, {1, 0, 0, 0, 1}); + { + auto offset_col = slice(col, {1, 5}).front(); + EXPECT_FALSE(cudf::detail::get_valid(offset_col, 0)); + EXPECT_FALSE(cudf::detail::get_valid(offset_col, 1)); + EXPECT_FALSE(cudf::detail::get_valid(offset_col, 2)); + EXPECT_TRUE(cudf::detail::get_valid(offset_col, 3)); + } + { + auto offset_col = slice(col, {2, 5}).front(); + EXPECT_FALSE(cudf::detail::get_valid(offset_col, 0)); + EXPECT_FALSE(cudf::detail::get_valid(offset_col, 1)); + EXPECT_TRUE(cudf::detail::get_valid(offset_col, 2)); + } +} + +TEST_F(GetValidTest, GetValidOffsetLarge) +{ + auto filter = [](auto i) { return static_cast(i % 3); }; + size_type offset = 37; + auto val = thrust::make_counting_iterator(0); + auto valid = cudf::detail::make_counting_transform_iterator(0, filter); + size_type num_rows = 1000; + + fixed_width_column_wrapper col(val, val + num_rows, valid); + auto offset_col = slice(col, {offset, num_rows}).front(); + + for (int i = 0; i < offset_col.size(); i++) { + EXPECT_EQ(cudf::detail::get_valid(offset_col, i), filter(i + offset)); + } +} + +} // namespace test + +} // namespace cudf From 2db0e8dbff62adb66ea3b129597ebaf8102dde68 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 28 Apr 2021 16:41:28 -0700 Subject: [PATCH 05/22] fixing default scalar factory method for list_scalar --- cpp/src/scalar/scalar_factories.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/scalar/scalar_factories.cpp b/cpp/src/scalar/scalar_factories.cpp index 0b94fc0065c..e8eadf31c9e 100644 --- a/cpp/src/scalar/scalar_factories.cpp +++ b/cpp/src/scalar/scalar_factories.cpp @@ -125,7 +125,7 @@ template <> std::unique_ptr default_scalar_functor::operator()( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return std::make_unique(); + return std::make_unique(column(), false, stream, mr); } template <> From 72718e60c7e47ac039d9411b15d9f7208e4d1f63 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 28 Apr 2021 16:53:05 -0700 Subject: [PATCH 06/22] doc, func naming review apply Co-authored-by: Mark Harris --- cpp/include/cudf/detail/get_valid.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/detail/get_valid.hpp b/cpp/include/cudf/detail/get_valid.hpp index 7211452aec8..0be118b41b8 100644 --- a/cpp/include/cudf/detail/get_valid.hpp +++ b/cpp/include/cudf/detail/get_valid.hpp @@ -28,6 +28,8 @@ namespace detail { * * Retrieves the specified row validity from device memory. * + * @note Synchronizes `stream`. + * * @throw cudf::logic_error if `element_index < 0 or >= col_view.size()` * * @param col_view The column to retrieve the validity from. @@ -36,7 +38,7 @@ namespace detail { * @return Host boolean that indicates the validity of the row. */ -bool get_valid(column_view const& col_view, +bool is_element_valid_sync(column_view const& col_view, size_type element_index, rmm::cuda_stream_view stream = rmm::cuda_stream_default); From 5a5f1d6360d65c90e62e41ea867e4b1be620dc74 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 28 Apr 2021 17:18:26 -0700 Subject: [PATCH 07/22] Refactor get_valid into is_element_valid_sync --- cpp/CMakeLists.txt | 2 +- .../{get_valid.hpp => is_element_valid.hpp} | 4 +- .../{get_valid.cpp => is_element_valid.cpp} | 5 ++- cpp/src/copying/get_element.cu | 4 +- cpp/tests/CMakeLists.txt | 2 +- ...d_tests.cpp => is_element_valid_tests.cpp} | 42 ++++++++++--------- 6 files changed, 32 insertions(+), 27 deletions(-) rename cpp/include/cudf/detail/{get_valid.hpp => is_element_valid.hpp} (90%) rename cpp/src/bitmask/{get_valid.cpp => is_element_valid.cpp} (88%) rename cpp/tests/bitmask/{get_valid_tests.cpp => is_element_valid_tests.cpp} (60%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 817b41a8d45..2222e315bd3 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -155,7 +155,7 @@ add_library(cudf src/binaryop/compiled/binary_ops.cu src/labeling/label_bins.cu src/bitmask/null_mask.cu - src/bitmask/get_valid.cpp + src/bitmask/is_element_valid.cpp src/column/column.cu src/column/column_device_view.cu src/column/column_factories.cpp diff --git a/cpp/include/cudf/detail/get_valid.hpp b/cpp/include/cudf/detail/is_element_valid.hpp similarity index 90% rename from cpp/include/cudf/detail/get_valid.hpp rename to cpp/include/cudf/detail/is_element_valid.hpp index 0be118b41b8..ffcba804a7c 100644 --- a/cpp/include/cudf/detail/get_valid.hpp +++ b/cpp/include/cudf/detail/is_element_valid.hpp @@ -39,8 +39,8 @@ namespace detail { */ bool is_element_valid_sync(column_view const& col_view, - size_type element_index, - rmm::cuda_stream_view stream = rmm::cuda_stream_default); + size_type element_index, + rmm::cuda_stream_view stream = rmm::cuda_stream_default); } // namespace detail } // namespace cudf diff --git a/cpp/src/bitmask/get_valid.cpp b/cpp/src/bitmask/is_element_valid.cpp similarity index 88% rename from cpp/src/bitmask/get_valid.cpp rename to cpp/src/bitmask/is_element_valid.cpp index b52198b85ed..47870e01567 100644 --- a/cpp/src/bitmask/get_valid.cpp +++ b/cpp/src/bitmask/is_element_valid.cpp @@ -24,7 +24,9 @@ namespace cudf { namespace detail { -bool get_valid(column_view const& col_view, size_type element_index, rmm::cuda_stream_view stream) +bool is_element_valid_sync(column_view const& col_view, + size_type element_index, + rmm::cuda_stream_view stream) { CUDF_EXPECTS(element_index >= 0 and element_index < col_view.size(), "invalid index."); if (!col_view.nullable()) { return true; } @@ -37,6 +39,7 @@ bool get_valid(column_view const& col_view, size_type element_index, rmm::cuda_s sizeof(bitmask_type), cudaMemcpyDeviceToHost, stream.value())); + stream.synchronize(); return static_cast(word & (bitmask_type{1} << intra_word_index(index))); } diff --git a/cpp/src/copying/get_element.cu b/cpp/src/copying/get_element.cu index 20d88cdf5b5..fa5902efc0e 100644 --- a/cpp/src/copying/get_element.cu +++ b/cpp/src/copying/get_element.cu @@ -16,8 +16,8 @@ #include #include -#include #include +#include #include #include #include @@ -125,7 +125,7 @@ struct get_element_functor { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) { - bool valid = get_valid(input, index, stream); + bool valid = is_element_valid_sync(input, index, stream); if (valid) { lists_column_view lcv(input); diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 14e989c617f..0be7c47ecbe 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -368,7 +368,7 @@ ConfigureTest(BITMASK_TEST bitmask/valid_if_tests.cu bitmask/set_nullmask_tests.cu bitmask/bitmask_tests.cu - bitmask/get_valid_tests.cpp) + bitmask/is_element_valid_tests.cpp) ################################################################################################### diff --git a/cpp/tests/bitmask/get_valid_tests.cpp b/cpp/tests/bitmask/is_element_valid_tests.cpp similarity index 60% rename from cpp/tests/bitmask/get_valid_tests.cpp rename to cpp/tests/bitmask/is_element_valid_tests.cpp index 1f06d6d2813..383448c0dd8 100644 --- a/cpp/tests/bitmask/get_valid_tests.cpp +++ b/cpp/tests/bitmask/is_element_valid_tests.cpp @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include @@ -26,20 +26,20 @@ namespace cudf { namespace test { -struct GetValidTest : public BaseFixture { +struct IsElementValidTest : public BaseFixture { }; -TEST_F(GetValidTest, GetValidBasic) +TEST_F(IsElementValidTest, IsElementValidBasic) { fixed_width_column_wrapper col({1, 1, 1, 1, 1}, {1, 0, 0, 0, 1}); - EXPECT_TRUE(cudf::detail::get_valid(col, 0)); - EXPECT_FALSE(cudf::detail::get_valid(col, 1)); - EXPECT_FALSE(cudf::detail::get_valid(col, 2)); - EXPECT_FALSE(cudf::detail::get_valid(col, 3)); - EXPECT_TRUE(cudf::detail::get_valid(col, 4)); + EXPECT_TRUE(cudf::detail::is_element_valid_sync(col, 0)); + EXPECT_FALSE(cudf::detail::is_element_valid_sync(col, 1)); + EXPECT_FALSE(cudf::detail::is_element_valid_sync(col, 2)); + EXPECT_FALSE(cudf::detail::is_element_valid_sync(col, 3)); + EXPECT_TRUE(cudf::detail::is_element_valid_sync(col, 4)); } -TEST_F(GetValidTest, GetValidLarge) +TEST_F(IsElementValidTest, IsElementValidLarge) { auto filter = [](auto i) { return static_cast(i % 3); }; auto val = thrust::make_counting_iterator(0); @@ -48,28 +48,30 @@ TEST_F(GetValidTest, GetValidLarge) fixed_width_column_wrapper col(val, val + num_rows, valid); - for (int i = 0; i < num_rows; i++) { EXPECT_EQ(cudf::detail::get_valid(col, i), filter(i)); } + for (int i = 0; i < num_rows; i++) { + EXPECT_EQ(cudf::detail::is_element_valid_sync(col, i), filter(i)); + } } -TEST_F(GetValidTest, GetValidOffset) +TEST_F(IsElementValidTest, IsElementValidOffset) { fixed_width_column_wrapper col({1, 1, 1, 1, 1}, {1, 0, 0, 0, 1}); { auto offset_col = slice(col, {1, 5}).front(); - EXPECT_FALSE(cudf::detail::get_valid(offset_col, 0)); - EXPECT_FALSE(cudf::detail::get_valid(offset_col, 1)); - EXPECT_FALSE(cudf::detail::get_valid(offset_col, 2)); - EXPECT_TRUE(cudf::detail::get_valid(offset_col, 3)); + EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 0)); + EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 1)); + EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 2)); + EXPECT_TRUE(cudf::detail::is_element_valid_sync(offset_col, 3)); } { auto offset_col = slice(col, {2, 5}).front(); - EXPECT_FALSE(cudf::detail::get_valid(offset_col, 0)); - EXPECT_FALSE(cudf::detail::get_valid(offset_col, 1)); - EXPECT_TRUE(cudf::detail::get_valid(offset_col, 2)); + EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 0)); + EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 1)); + EXPECT_TRUE(cudf::detail::is_element_valid_sync(offset_col, 2)); } } -TEST_F(GetValidTest, GetValidOffsetLarge) +TEST_F(IsElementValidTest, IsElementValidOffsetLarge) { auto filter = [](auto i) { return static_cast(i % 3); }; size_type offset = 37; @@ -81,7 +83,7 @@ TEST_F(GetValidTest, GetValidOffsetLarge) auto offset_col = slice(col, {offset, num_rows}).front(); for (int i = 0; i < offset_col.size(); i++) { - EXPECT_EQ(cudf::detail::get_valid(offset_col, i), filter(i + offset)); + EXPECT_EQ(cudf::detail::is_element_valid_sync(offset_col, i), filter(i + offset)); } } From 4ae833b139486a94c571b33f4a7a456c8574fd59 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 28 Apr 2021 17:41:59 -0700 Subject: [PATCH 08/22] Update dev guide --- cpp/docs/DEVELOPER_GUIDE.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md index fa59162c345..e1078c7dd7f 100644 --- a/cpp/docs/DEVELOPER_GUIDE.md +++ b/cpp/docs/DEVELOPER_GUIDE.md @@ -255,6 +255,11 @@ currently supported by cudf. Each type of value is represented by a separate typ which are all derived from `cudf::scalar`. e.g. A `numeric_scalar` holds a single numerical value, a `string_scalar` holds a single string. The data for the stored value resides in device memory. +A `list_scalar` holds the underlying data of a list. This means the underlying data can be any type +that cudf supports. e.g. A `list_scalar` representing a list of integers stores an `cudf::column` +of type `INT32`, a `list_scalar` represents a list of list of integers stores an `cudf::column` of +type `LIST`, which in turn stores a column of type `INT32`. + |Value type|Scalar class|Notes| |-|-|-| |fixed-width|`fixed_width_scalar`| `T` can be any fixed-width type| @@ -263,11 +268,14 @@ a `string_scalar` holds a single string. The data for the stored value resides i |timestamp|`timestamp_scalar` | `T` can be `timestamp_D`, `timestamp_s`, etc.| |duration|`duration_scalar` | `T` can be `duration_D`, `duration_s`, etc.| |string|`string_scalar`| This class object is immutable| +|list|`list_scalar`| Underlying data can be any type supported by cudf | ### Construction `scalar`s can be created using either their respective constructors or using factory functions like `make_numeric_scalar()`, `make_timestamp_scalar()` or `make_string_scalar()`. +// TODO: add details for `list_scalar` + ### Casting All the factory methods return a `unique_ptr` which needs to be statically downcasted to its respective scalar class type before accessing its value. Their validity (nullness) can be @@ -290,6 +298,8 @@ and its validity from the device. This can be obtained using the function `get_scalar_device_view(ScalarType s)`. Note that a device view is not provided for a base scalar object, only for the derived typed scalar class objects. +// TODO: add details for `list_scalar` + # libcudf++ API and Implementation ## Streams From 5f33bc36d577ae4b758124c65a1c57a05fc1cf64 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 28 Apr 2021 22:10:45 -0700 Subject: [PATCH 09/22] style --- conda/recipes/libcudf/meta.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 75955428eab..45a709206ee 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -78,6 +78,7 @@ test: - test -f $PREFIX/include/cudf/detail/groupby/sort_helper.hpp - test -f $PREFIX/include/cudf/detail/hashing.hpp - test -f $PREFIX/include/cudf/detail/interop.hpp + - test -f $PREFIX/include/cudf/detail/is_element_valid.hpp - test -f $PREFIX/include/cudf/detail/null_mask.hpp - test -f $PREFIX/include/cudf/detail/nvtx/nvtx3.hpp - test -f $PREFIX/include/cudf/detail/nvtx/ranges.hpp From a4eae6db41882329606e1f8623a20d81a09d007f Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Fri, 30 Apr 2021 09:32:20 -0700 Subject: [PATCH 10/22] Update cpp/include/cudf/scalar/scalar.hpp Co-authored-by: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com> --- cpp/include/cudf/scalar/scalar.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index 483a9fc4919..f3d8ffe2315 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -607,7 +607,7 @@ class list_scalar : public scalar { list_scalar& operator=(list_scalar&& other) = delete; /** - * @brief Construct a new list scalar object copying from existing device data + * @brief Construct a new list scalar object by copying from existing device data * * @param elements The elements of the list * @param is_valid Whether the value held by the scalar is valid From b2b156488c7b961fe0a3e244fd0a9c7be5e0d48f Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 3 May 2021 16:45:16 -0700 Subject: [PATCH 11/22] Initial for lists of structs test --- cpp/tests/copying/get_value_tests.cpp | 328 ++++++++++++++++++++++++-- 1 file changed, 310 insertions(+), 18 deletions(-) diff --git a/cpp/tests/copying/get_value_tests.cpp b/cpp/tests/copying/get_value_tests.cpp index a5038bdc79a..efd360ed2ca 100644 --- a/cpp/tests/copying/get_value_tests.cpp +++ b/cpp/tests/copying/get_value_tests.cpp @@ -14,7 +14,9 @@ * limitations under the License. */ +#include #include +#include #include #include #include @@ -25,6 +27,8 @@ #include #include #include +#include "build/cuda-11.2/groupshift-python/release/_deps/gtest-src/googletest/include/gtest/gtest.h" +#include "cudf/concatenate.hpp" namespace cudf { namespace test { @@ -41,7 +45,7 @@ TYPED_TEST(FixedWidthGetValueTest, BasicGet) auto s = get_element(col, 0); using ScalarType = scalar_type_t; - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ(cudf::test::make_type_param_scalar(9), typed_s->value()); @@ -53,7 +57,7 @@ TYPED_TEST(FixedWidthGetValueTest, GetFromNullable) auto s = get_element(col, 1); using ScalarType = scalar_type_t; - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ(cudf::test::make_type_param_scalar(8), typed_s->value()); @@ -83,7 +87,7 @@ TEST_F(StringGetValueTest, BasicGet) strings_column_wrapper col{"this", "is", "a", "test"}; auto s = get_element(col, 3); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ("test", typed_s->to_string()); @@ -94,7 +98,7 @@ TEST_F(StringGetValueTest, GetEmpty) strings_column_wrapper col{"this", "is", "", "test"}; auto s = get_element(col, 2); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ("", typed_s->to_string()); @@ -105,7 +109,7 @@ TEST_F(StringGetValueTest, GetFromNullable) strings_column_wrapper col({"this", "is", "a", "test"}, {0, 1, 0, 1}); auto s = get_element(col, 1); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ("is", typed_s->to_string()); @@ -134,7 +138,7 @@ TYPED_TEST(DictionaryGetValueTest, BasicGet) auto s = get_element(*col, 2); using ScalarType = scalar_type_t; - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ(cudf::test::make_type_param_scalar(7), typed_s->value()); @@ -149,7 +153,7 @@ TYPED_TEST(DictionaryGetValueTest, GetFromNullable) auto s = get_element(*col, 3); using ScalarType = scalar_type_t; - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ(cudf::test::make_type_param_scalar(8), typed_s->value()); @@ -170,7 +174,7 @@ TYPED_TEST(DictionaryGetValueTest, GetNull) * Lists test grid: * Dim1 nestedness: {Nested, Non-nested} * Dim2 validity, emptiness: {Null element, Non-null non-empty list, Non-null empty list} - * Dim3 leaf data type: {Fixed-width, string} + * Dim3 leaf data type: {Fixed-width, string, struct} */ template @@ -188,7 +192,7 @@ TYPED_TEST(ListGetFixedWidthValueTest, NonNestedGetNonNullNonEmpty) size_type index = 0; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); @@ -203,7 +207,7 @@ TYPED_TEST(ListGetFixedWidthValueTest, NonNestedGetNonNullEmpty) size_type index = 1; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); @@ -238,7 +242,7 @@ TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNonNullNonEmpty) size_type index = 3; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); @@ -261,7 +265,7 @@ TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNonNullNonEmptyPreserveNull) size_type index = 3; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); @@ -283,7 +287,7 @@ TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNonNullEmpty) size_type index = 1; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); @@ -322,7 +326,7 @@ TEST_F(ListGetStringValueTest, NonNestedGetNonNullNonEmpty) size_type index = 0; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); @@ -337,7 +341,7 @@ TEST_F(ListGetStringValueTest, NonNestedGetNonNullEmpty) size_type index = 1; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); @@ -372,7 +376,7 @@ TEST_F(ListGetStringValueTest, NestedGetNonNullNonEmpty) size_type index = 2; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); @@ -395,7 +399,7 @@ TEST_F(ListGetStringValueTest, NestedGetNonNullNonEmptyPreserveNull) size_type index = 2; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); @@ -417,7 +421,7 @@ TEST_F(ListGetStringValueTest, NestedGetNonNullEmpty) size_type index = 3; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); // Relax to equivalent. `expected_data` leaf string column does not @@ -446,5 +450,293 @@ TEST_F(ListGetStringValueTest, NestedGetNull) EXPECT_FALSE(s->is_valid()); } +/** + * @brief Some shared helper functions used by lists of structs test. + */ +template +struct ListGetStructValueTest : public BaseFixture { + using SCW = structs_column_wrapper; + using LCWinner_t = cudf::test::lists_column_wrapper; + + /** + * @brief Create a lists column + * + * @note Different from `cudf::make_lists_column`, this allows setting the `null_mask` + * in `initializer_list`. However this is an expensive function because it repeatedly + * calls `cudf::set_null_mask` for each row. + */ + std::unique_ptr make_test_lists_column(size_type num_lists, + fixed_width_column_wrapper offsets, + std::unique_ptr child, + std::initializer_list null_mask) + { + size_type null_count = num_lists - std::accumulate(null_mask.begin(), null_mask.end(), 0); + auto d_null_mask = cudf::create_null_mask( + num_lists, null_count == 0 ? cudf::mask_state::UNALLOCATED : cudf::mask_state::ALL_NULL); + if (null_count > 0) { + std::for_each( + thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_lists), [&](auto i) { + if (*(null_mask.begin() + i)) { + set_null_mask(static_cast(d_null_mask.data()), i, i + 1, true); + } + }); + } + return cudf::make_lists_column( + num_lists, offsets.release(), std::move(child), null_count, std::move(d_null_mask)); + } + + /** + * @brief Create a structs column that contains 3 fields: int, string, List + */ + template + SCW make_test_structs_column(fixed_width_column_wrapper field1, + strings_column_wrapper field2, + lists_column_wrapper field3, + MaskIterator mask) + { + return SCW{{field1, field2, field3}, mask}; + } + + /** + * @brief Create a 0-length structs column + */ + SCW zero_length_struct() { return SCW{}; } + + /** + * @brief Concatenate structs columns, allow specifying inputs in `initializer_list` + */ + std::unique_ptr concat(std::initializer_list rows) + { + std::vector views; + std::transform( + rows.begin(), rows.end(), std::back_inserter(views), [](auto &r) { return column_view(r); }); + return cudf::concatenate(views); + } + + /** + * @brief Test data setup: row 0 of structs column + */ + SCW row0() + { + // {int: 1, string: NULL, list: NULL} + return this->make_test_structs_column({{1}, {1}}, + strings_column_wrapper({"aa"}, {false}), + LCWinner_t({{}}, all_invalid()), + all_valid()); + } + + /** + * @brief Test data setup: row 1 of structs column + */ + SCW row1() + { + // NULL + return this->make_test_structs_column({-1}, {""}, LCWinner_t{-1}, all_invalid()); + } + + /** + * @brief Test data setup: row 2 of structs column + */ + SCW row2() + { + // {int: 3, string: "xyz", list: [3, 8, 4]} + return this->make_test_structs_column({{3}, {1}}, + strings_column_wrapper({"xyz"}, {true}), + LCWinner_t({{3, 8, 4}}, all_valid()), + all_valid()); + } + + /** + * @brief Test data setup: a 3-row structs column + */ + std::unique_ptr leaf_data() + { + // 3 rows: + // {int: 1, string: NULL, list: NULL} + // NULL + // {int: 3, string: "xyz", list: [3, 8, 4]} + return this->concat({row0(), row1(), row2()}); + } + + auto all_valid() { return thrust::make_constant_iterator(true); } + auto all_invalid() { return thrust::make_constant_iterator(false); } +}; + +// TYPED_TEST_CASE(ListGetStructValueTest, FixedWidthTypes); +TYPED_TEST_CASE(ListGetStructValueTest, int32_t); + +TYPED_TEST(ListGetStructValueTest, NonNestedGetNonNullNonEmpty) +{ + // 2-rows + // [{1, NULL, NULL}, NULL] + // [{3, "xyz", [3, 8, 4]}] <- get_element(1) + + auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {1, 1}); + size_type index = 1; + auto expected_data = this->row2(); + + auto s = get_element(list_column->view(), index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + // Relax to equivalent. The nested list column in struct allocates `null_mask`. + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetStructValueTest, NonNestedGetNonNullNonEmpty2) +{ + // 2-rows + // [{1, NULL, NULL}, NULL] <- get_element(0) + // [{3, "xyz", [3, 8, 4]}] + + auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {1, 1}); + size_type index = 0; + auto expected_data = this->concat({this->row0(), this->row1()}); + + auto s = get_element(list_column->view(), index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetStructValueTest, NonNestedGetNonNullEmpty) +{ + // 3-rows + // [{1, NULL, NULL}, NULL] + // [{3, "xyz", [3, 8, 4]}] + // [] <- get_element(0) + + auto list_column = this->make_test_lists_column(3, {0, 2, 3, 3}, this->leaf_data(), {1, 1, 1}); + size_type index = 2; + // For well-formed list column, an empty list still holds the complete structure of + // a 0-length structs column + auto expected_data = this->zero_length_struct(); + + auto s = get_element(list_column->view(), index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + // Relax to equivalent. The nested list column in struct allocates `null_mask`. + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetStructValueTest, NonNestedGetNull) +{ + // 2-rows + // NULL <- get_element(0) + // [{3, "xyz", [3, 8, 4]}] + + auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {0, 1}); + size_type index = 0; + + auto s = get_element(list_column->view(), index); + + EXPECT_FALSE(s->is_valid()); +} + +TYPED_TEST(ListGetStructValueTest, NestedGetNonNullNonEmpty) +{ + // 2-rows + // [[{1, NULL, NULL}, NULL], [{3, "xyz", [3, 8, 4]}]] <- get_element(0) + // [] + + auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {1, 1}); + auto expected_data = std::make_unique(*list_column); + + auto list_column_nested = + this->make_test_lists_column(2, {0, 2, 2}, std::move(list_column), {1, 1}); + + size_type index = 0; + auto s = get_element(list_column_nested->view(), index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetStructValueTest, NestedGetNonNullNonEmpty2) +{ + // 2-rows + // [[{1, NULL, NULL}, NULL]] <- get_element(0) + // [[{3, "xyz", [3, 8, 4]}]] + + auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {1, 1}); + auto list_column_nested = + this->make_test_lists_column(2, {0, 1, 2}, std::move(list_column), {1, 1}); + + auto expected_data = + this->make_test_lists_column(1, {0, 2}, this->concat({this->row0(), this->row1()}), {1}); + + size_type index = 0; + auto s = get_element(list_column_nested->view(), index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetStructValueTest, NestedGetNonNullNonEmpty3) +{ + // 2-rows + // [[{1, NULL, NULL}, NULL]] + // [[{3, "xyz", [3, 8, 4]}]] <- get_element(1) + + auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {1, 1}); + auto list_column_nested = + this->make_test_lists_column(2, {0, 1, 2}, std::move(list_column), {1, 1}); + + auto expected_data = this->make_test_lists_column(1, {0, 1}, this->row2().release(), {1}); + + size_type index = 1; + auto s = get_element(list_column_nested->view(), index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + // Relax to equivalent. For `get_element`, the nested list column in struct + // allocates `null_mask`. + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetStructValueTest, NestedGetNonNullEmpty) +{ + // 3-rows + // [[{1, NULL, NULL}, NULL]] + // [] <- get_element(1) + // [[{3, "xyz", [3, 8, 4]}]] + + auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {1, 1}); + auto list_column_nested = + this->make_test_lists_column(3, {0, 1, 1, 2}, std::move(list_column), {1, 1, 1}); + + auto expected_data = + this->make_test_lists_column(0, {0}, this->zero_length_struct().release(), {1}); + + size_type index = 1; + auto s = get_element(list_column_nested->view(), index); + auto typed_s = static_cast(s.get()); + + EXPECT_TRUE(s->is_valid()); + // Relax to equivalent. The sliced version still has the array for fields + // allocated. + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected_data, typed_s->view()); +} + +TYPED_TEST(ListGetStructValueTest, NestedGetNull) +{ + // 3-rows + // [[{1, NULL, NULL}, NULL]] + // [] + // NULL <- get_element(1) + auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {1, 1}); + auto list_column_nested = + this->make_test_lists_column(3, {0, 1, 1, 2}, std::move(list_column), {1, 1, 0}); + + size_type index = 2; + auto s = get_element(list_column_nested->view(), index); + + EXPECT_FALSE(s->is_valid()); +} + } // namespace test } // namespace cudf From 623b7dcbaa1c92bff767e24898bf08e16725fee2 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 3 May 2021 16:47:35 -0700 Subject: [PATCH 12/22] remove clangd includes --- cpp/tests/copying/get_value_tests.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/tests/copying/get_value_tests.cpp b/cpp/tests/copying/get_value_tests.cpp index efd360ed2ca..62bcf05046a 100644 --- a/cpp/tests/copying/get_value_tests.cpp +++ b/cpp/tests/copying/get_value_tests.cpp @@ -15,6 +15,7 @@ */ #include +#include #include #include #include @@ -27,8 +28,6 @@ #include #include #include -#include "build/cuda-11.2/groupshift-python/release/_deps/gtest-src/googletest/include/gtest/gtest.h" -#include "cudf/concatenate.hpp" namespace cudf { namespace test { From 656fcad6bbf90309f34697385850dca2523923db Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 3 May 2021 17:05:37 -0700 Subject: [PATCH 13/22] Enable all typed test cases --- cpp/tests/copying/get_value_tests.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/tests/copying/get_value_tests.cpp b/cpp/tests/copying/get_value_tests.cpp index 62bcf05046a..ea738fec2fb 100644 --- a/cpp/tests/copying/get_value_tests.cpp +++ b/cpp/tests/copying/get_value_tests.cpp @@ -561,8 +561,7 @@ struct ListGetStructValueTest : public BaseFixture { auto all_invalid() { return thrust::make_constant_iterator(false); } }; -// TYPED_TEST_CASE(ListGetStructValueTest, FixedWidthTypes); -TYPED_TEST_CASE(ListGetStructValueTest, int32_t); +TYPED_TEST_CASE(ListGetStructValueTest, FixedWidthTypes); TYPED_TEST(ListGetStructValueTest, NonNestedGetNonNullNonEmpty) { From e930c6e303bf187a51b1e631776853365cb324db Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 3 May 2021 17:19:03 -0700 Subject: [PATCH 14/22] sprinkling null values inside leaf data columns --- cpp/tests/copying/get_value_tests.cpp | 44 ++++++++++++++++++--------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/cpp/tests/copying/get_value_tests.cpp b/cpp/tests/copying/get_value_tests.cpp index ea738fec2fb..295af4d6bcd 100644 --- a/cpp/tests/copying/get_value_tests.cpp +++ b/cpp/tests/copying/get_value_tests.cpp @@ -178,6 +178,14 @@ TYPED_TEST(DictionaryGetValueTest, GetNull) template struct ListGetFixedWidthValueTest : public BaseFixture { + auto odds_valid() + { + return cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); + } + auto nth_valid(size_type x) + { + return cudf::detail::make_counting_transform_iterator(0, [&](auto i) { return x == i; }); + } }; TYPED_TEST_CASE(ListGetFixedWidthValueTest, FixedWidthTypes); @@ -186,8 +194,8 @@ TYPED_TEST(ListGetFixedWidthValueTest, NonNestedGetNonNullNonEmpty) { using LCW = cudf::test::lists_column_wrapper; - LCW col{LCW{1, 2, 34}, LCW{}, LCW{1}, LCW{}}; - fixed_width_column_wrapper expected_data{1, 2, 34}; + LCW col{LCW({1, 2, 34}, this->odds_valid()), LCW{}, LCW{1}, LCW{}}; + fixed_width_column_wrapper expected_data({1, 2, 34}, this->odds_valid()); size_type index = 0; auto s = get_element(col, index); @@ -215,8 +223,7 @@ TYPED_TEST(ListGetFixedWidthValueTest, NonNestedGetNonNullEmpty) TYPED_TEST(ListGetFixedWidthValueTest, NonNestedGetNull) { using LCW = cudf::test::lists_column_wrapper; - std::vector valid{0, 1, 0, 1}; - LCW col({LCW{1, 2, 34}, LCW{}, LCW{1}, LCW{}}, valid.begin()); + LCW col({LCW{1, 2, 34}, LCW{}, LCW{1}, LCW{}}, this->odds_valid()); size_type index = 2; auto s = get_element(col, index); @@ -251,16 +258,16 @@ TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNonNullNonEmptyPreserveNull) { using LCW = cudf::test::lists_column_wrapper; - std::vector valid{0, 1}; + std::vector valid{0, 1, 1}; // clang-format off LCW col{ LCW{LCW{1, 2}, LCW{34}}, LCW{}, LCW{LCW{1}}, - LCW({LCW{42}, LCW{10}}, valid.begin()) + LCW({LCW{42}, LCW{10}, LCW({1, 3, 2}, this->nth_valid(1))}, valid.begin()) }; // clang-format on - LCW expected_data({LCW{42}, LCW{10}}, valid.begin()); + LCW expected_data({LCW{42}, LCW{10}, LCW({1, 3, 2}, this->nth_valid(1))}, valid.begin()); size_type index = 3; auto s = get_element(col, index); @@ -314,14 +321,22 @@ TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNull) } struct ListGetStringValueTest : public BaseFixture { + auto odds_valid() + { + return cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); + } + auto nth_valid(size_type x) + { + return cudf::detail::make_counting_transform_iterator(0, [&](auto i) { return x == i; }); + } }; TEST_F(ListGetStringValueTest, NonNestedGetNonNullNonEmpty) { using LCW = cudf::test::lists_column_wrapper; - LCW col{LCW{"aaa", "Héllo"}, LCW{}, LCW{""}, LCW{"42"}}; - strings_column_wrapper expected_data{"aaa", "Héllo"}; + LCW col{LCW({"aaa", "Héllo"}, this->odds_valid()), LCW{}, LCW{""}, LCW{"42"}}; + strings_column_wrapper expected_data({"aaa", "Héllo"}, this->odds_valid()); size_type index = 0; auto s = get_element(col, index); @@ -367,11 +382,11 @@ TEST_F(ListGetStringValueTest, NestedGetNonNullNonEmpty) LCW col{ LCW{LCW{"aaa", "Héllo"}}, LCW{}, - LCW{LCW{""}}, + LCW{LCW{""}, LCW({"string", "str2", "xyz"}, this->nth_valid(0))}, LCW{LCW{"42"}, LCW{"21"}} }; // clang-format on - LCW expected_data{LCW{""}}; + LCW expected_data{LCW{""}, LCW({"string", "str2", "xyz"}, this->nth_valid(0))}; size_type index = 2; auto s = get_element(col, index); @@ -385,16 +400,17 @@ TEST_F(ListGetStringValueTest, NestedGetNonNullNonEmptyPreserveNull) { using LCW = cudf::test::lists_column_wrapper; - std::vector valid{0, 1}; + std::vector valid{0, 1, 1}; // clang-format off LCW col{ LCW{LCW{"aaa", "Héllo"}}, LCW{}, - LCW({LCW{""}, LCW{"cc"}}, valid.begin()), + LCW({LCW{""}, LCW{"cc"}, LCW({"string", "str2", "xyz"}, this->nth_valid(0))}, valid.begin()), LCW{LCW{"42"}, LCW{"21"}} }; // clang-format on - LCW expected_data({LCW{""}, LCW{"cc"}}, valid.begin()); + LCW expected_data({LCW{""}, LCW{"cc"}, LCW({"string", "str2", "xyz"}, this->nth_valid(0))}, + valid.begin()); size_type index = 2; auto s = get_element(col, index); From b607d37b958f256e31b2a4a1ac9dba7c3db3e5c6 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 3 May 2021 18:01:37 -0700 Subject: [PATCH 15/22] scalar.hpp includes clean up --- cpp/include/cudf/scalar/scalar.hpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index f3d8ffe2315..5d3ca4f4092 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -15,13 +15,12 @@ */ #pragma once -#include +#include +#include #include #include #include -#include - #include #include #include @@ -30,8 +29,6 @@ #include #include -#include - /** * @file * @brief Class definitions for cudf::scalar @@ -614,7 +611,7 @@ class list_scalar : public scalar { * @param stream CUDA stream used for device memory operations. * @param mr Device memory resource to use for device memory allocation */ - list_scalar(cudf::column_view const& elements, + list_scalar(column_view const& elements, bool is_valid = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) @@ -630,7 +627,7 @@ class list_scalar : public scalar { * @param stream CUDA stream used for device memory operations. * @param mr Device memory resource to use for device memory allocation */ - list_scalar(cudf::column const&& elements, + list_scalar(column const&& elements, bool is_valid = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) From dbb4a693230bf949656c9d834cad44855fc6cd46 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 3 May 2021 18:03:31 -0700 Subject: [PATCH 16/22] Doc updates Co-authored-by: Mark Harris --- cpp/docs/DEVELOPER_GUIDE.md | 6 +++--- cpp/include/cudf/detail/is_element_valid.hpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md index e1078c7dd7f..1cf040d1f49 100644 --- a/cpp/docs/DEVELOPER_GUIDE.md +++ b/cpp/docs/DEVELOPER_GUIDE.md @@ -255,9 +255,9 @@ currently supported by cudf. Each type of value is represented by a separate typ which are all derived from `cudf::scalar`. e.g. A `numeric_scalar` holds a single numerical value, a `string_scalar` holds a single string. The data for the stored value resides in device memory. -A `list_scalar` holds the underlying data of a list. This means the underlying data can be any type -that cudf supports. e.g. A `list_scalar` representing a list of integers stores an `cudf::column` -of type `INT32`, a `list_scalar` represents a list of list of integers stores an `cudf::column` of +A `list_scalar` holds the underlying data of a single list. This means the underlying data can be any type +that cudf supports. For example, a `list_scalar` representing a list of integers stores a `cudf::column` +of type `INT32`. A `list_scalar` representing a list of lists of integers stores a `cudf::column` of type `LIST`, which in turn stores a column of type `INT32`. |Value type|Scalar class|Notes| diff --git a/cpp/include/cudf/detail/is_element_valid.hpp b/cpp/include/cudf/detail/is_element_valid.hpp index ffcba804a7c..fff67f107d9 100644 --- a/cpp/include/cudf/detail/is_element_valid.hpp +++ b/cpp/include/cudf/detail/is_element_valid.hpp @@ -26,7 +26,7 @@ namespace detail { /** * @brief Return validity of a row * - * Retrieves the specified row validity from device memory. + * Retrieves the validity (NULL or non-NULL) of the specified row from device memory. * * @note Synchronizes `stream`. * From 982a7c22c569555dfd3cabec80ac68f4b82308b5 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 4 May 2021 10:11:58 -0700 Subject: [PATCH 17/22] Factories, dev guide updates --- cpp/docs/DEVELOPER_GUIDE.md | 11 ++++--- cpp/include/cudf/scalar/scalar.hpp | 13 ++++++++ cpp/include/cudf/scalar/scalar_factories.hpp | 33 ++++++++++++++++++++ cpp/src/scalar/scalar.cpp | 10 ++++++ 4 files changed, 62 insertions(+), 5 deletions(-) diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md index 1cf040d1f49..35c94233a78 100644 --- a/cpp/docs/DEVELOPER_GUIDE.md +++ b/cpp/docs/DEVELOPER_GUIDE.md @@ -274,8 +274,6 @@ type `LIST`, which in turn stores a column of type `INT32`. `scalar`s can be created using either their respective constructors or using factory functions like `make_numeric_scalar()`, `make_timestamp_scalar()` or `make_string_scalar()`. -// TODO: add details for `list_scalar` - ### Casting All the factory methods return a `unique_ptr` which needs to be statically downcasted to its respective scalar class type before accessing its value. Their validity (nullness) can be @@ -293,12 +291,15 @@ auto s1 = static_cast(s.get()); ``` ### Passing to device -Each scalar type has a corresponding non-owning device view class which allows access to the value -and its validity from the device. This can be obtained using the function +Each scalar type, except `list_scalar`, has a corresponding non-owning device view class which allows +access to the value and its validity from the device. This can be obtained using the function `get_scalar_device_view(ScalarType s)`. Note that a device view is not provided for a base scalar object, only for the derived typed scalar class objects. -// TODO: add details for `list_scalar` +The underlying data for `list_scalar` can be accessed via `view()` method. For non-nested data, +the device view can be obtained via function `column_device_view::create(column_view)`. For nested +data, a specialized device view for list columns can be constructed via +`lists_column_device_view(column_device_view)`. # libcudf++ API and Implementation diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index 5d3ca4f4092..750ce8b07ac 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -635,6 +635,19 @@ class list_scalar : public scalar { { } + /** + * @brief Retrieve `row_index` row from column `col` + * + * @param col The column view to retrieve row from + * @param row_index Index into `col` to retrieve row from + * @param stream CUDA stream used for device memory operations. + * @param mr Device memory resource to use for device memory allocation + */ + list_scalar(column_view const& col, + size_type row_index, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Returns a non-owning, immutable view to underlying device data */ diff --git a/cpp/include/cudf/scalar/scalar_factories.hpp b/cpp/include/cudf/scalar/scalar_factories.hpp index a0a0a22091e..9823c5b052f 100644 --- a/cpp/include/cudf/scalar/scalar_factories.hpp +++ b/cpp/include/cudf/scalar/scalar_factories.hpp @@ -156,5 +156,38 @@ std::unique_ptr make_fixed_point_scalar( return std::make_unique>(value, scale, true, stream, mr); } +/** + * @brief Construct scalar using the given value of fixed_point type + * + * @param elements Elements of the list + * @param stream CUDA stream used for device memory operations. + * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool. + */ +std::unique_ptr make_list_scalar( + column_view elements, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + return std::make_unique(elements, true, stream, mr); +} + +/** + * @brief Construct scalar from the @p row_index row of column @p col + * + * @param col The list column to retrieve row data from. + * @param row_index The index + * @param stream CUDA stream used for device memory operations. + * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool. + */ +std::unique_ptr make_list_scalar( + column_view col, + size_type row_index, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + CUDF_EXPECTS(col.type().id() == type_id::LIST, "Column is not LIST type."); + return std::make_unique(col, row_index, stream, mr); +} + /** @} */ // end of group } // namespace cudf diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp index fe051b1ffc5..55596de8627 100644 --- a/cpp/src/scalar/scalar.cpp +++ b/cpp/src/scalar/scalar.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include @@ -56,4 +57,13 @@ std::string string_scalar::to_string(rmm::cuda_stream_view stream) const return result; } +list_scalar::list_scalar(column_view const& col, + size_type row_index, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + : list_scalar(std::move( + *static_cast(detail::get_element(col, row_index, stream, mr).release()))) +{ +} + } // namespace cudf From 3a2d0c38bea51168b73c319b2a79a00d3552dae0 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 4 May 2021 15:10:30 -0700 Subject: [PATCH 18/22] Reverting construct from row factory --- cpp/include/cudf/scalar/scalar.hpp | 13 ----------- cpp/include/cudf/scalar/scalar_factories.hpp | 23 +------------------- cpp/src/scalar/scalar.cpp | 9 -------- cpp/src/scalar/scalar_factories.cpp | 7 ++++++ 4 files changed, 8 insertions(+), 44 deletions(-) diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index 750ce8b07ac..5d3ca4f4092 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -635,19 +635,6 @@ class list_scalar : public scalar { { } - /** - * @brief Retrieve `row_index` row from column `col` - * - * @param col The column view to retrieve row from - * @param row_index Index into `col` to retrieve row from - * @param stream CUDA stream used for device memory operations. - * @param mr Device memory resource to use for device memory allocation - */ - list_scalar(column_view const& col, - size_type row_index, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - /** * @brief Returns a non-owning, immutable view to underlying device data */ diff --git a/cpp/include/cudf/scalar/scalar_factories.hpp b/cpp/include/cudf/scalar/scalar_factories.hpp index 9823c5b052f..12645ea78cb 100644 --- a/cpp/include/cudf/scalar/scalar_factories.hpp +++ b/cpp/include/cudf/scalar/scalar_factories.hpp @@ -166,28 +166,7 @@ std::unique_ptr make_fixed_point_scalar( std::unique_ptr make_list_scalar( column_view elements, rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) -{ - return std::make_unique(elements, true, stream, mr); -} - -/** - * @brief Construct scalar from the @p row_index row of column @p col - * - * @param col The list column to retrieve row data from. - * @param row_index The index - * @param stream CUDA stream used for device memory operations. - * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool. - */ -std::unique_ptr make_list_scalar( - column_view col, - size_type row_index, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) -{ - CUDF_EXPECTS(col.type().id() == type_id::LIST, "Column is not LIST type."); - return std::make_unique(col, row_index, stream, mr); -} + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group } // namespace cudf diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp index 55596de8627..3fe62978c2d 100644 --- a/cpp/src/scalar/scalar.cpp +++ b/cpp/src/scalar/scalar.cpp @@ -57,13 +57,4 @@ std::string string_scalar::to_string(rmm::cuda_stream_view stream) const return result; } -list_scalar::list_scalar(column_view const& col, - size_type row_index, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - : list_scalar(std::move( - *static_cast(detail::get_element(col, row_index, stream, mr).release()))) -{ -} - } // namespace cudf diff --git a/cpp/src/scalar/scalar_factories.cpp b/cpp/src/scalar/scalar_factories.cpp index e8eadf31c9e..7054e2b5c8f 100644 --- a/cpp/src/scalar/scalar_factories.cpp +++ b/cpp/src/scalar/scalar_factories.cpp @@ -97,6 +97,13 @@ std::unique_ptr make_fixed_width_scalar(data_type type, return type_dispatcher(type, scalar_construction_helper{}, stream, mr); } +std::unique_ptr make_list_scalar(column_view elements, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return std::make_unique(elements, true, stream, mr); +} + namespace { struct default_scalar_functor { template From 54c8374505fe9bb61e72b1471cd152edbd819ace Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 5 May 2021 20:11:53 -0700 Subject: [PATCH 19/22] Fix tests --- cpp/tests/copying/get_value_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/copying/get_value_tests.cpp b/cpp/tests/copying/get_value_tests.cpp index 295af4d6bcd..d29d5d7da2a 100644 --- a/cpp/tests/copying/get_value_tests.cpp +++ b/cpp/tests/copying/get_value_tests.cpp @@ -184,7 +184,7 @@ struct ListGetFixedWidthValueTest : public BaseFixture { } auto nth_valid(size_type x) { - return cudf::detail::make_counting_transform_iterator(0, [&](auto i) { return x == i; }); + return cudf::detail::make_counting_transform_iterator(0, [=](auto i) { return x == i; }); } }; From 65569b4839af739e0e747f7bea5325d9fa4385a5 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 5 May 2021 21:31:10 -0700 Subject: [PATCH 20/22] More fix tests --- cpp/tests/copying/get_value_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/copying/get_value_tests.cpp b/cpp/tests/copying/get_value_tests.cpp index d29d5d7da2a..7d2bc458462 100644 --- a/cpp/tests/copying/get_value_tests.cpp +++ b/cpp/tests/copying/get_value_tests.cpp @@ -327,7 +327,7 @@ struct ListGetStringValueTest : public BaseFixture { } auto nth_valid(size_type x) { - return cudf::detail::make_counting_transform_iterator(0, [&](auto i) { return x == i; }); + return cudf::detail::make_counting_transform_iterator(0, [=](auto i) { return x == i; }); } }; From 6e25f96954a330ae518d8ed148a67012e75e68a0 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 6 May 2021 10:21:46 -0700 Subject: [PATCH 21/22] Scalar move column operator fix --- cpp/src/scalar/scalar.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp index 5f091461e29..858d2c063b3 100644 --- a/cpp/src/scalar/scalar.cpp +++ b/cpp/src/scalar/scalar.cpp @@ -440,7 +440,7 @@ list_scalar::list_scalar(cudf::column&& data, bool is_valid, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) - : scalar(data_type(type_id::LIST), is_valid, stream, mr), _data(data) + : scalar(data_type(type_id::LIST), is_valid, stream, mr), _data(std::move(data)) { } From aa38f7f23e018622b80a683bf38dc85eb8bef878 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Fri, 7 May 2021 12:12:17 -0700 Subject: [PATCH 22/22] Update cpp/include/cudf/scalar/scalar_factories.hpp Co-authored-by: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com> --- cpp/include/cudf/scalar/scalar_factories.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/scalar/scalar_factories.hpp b/cpp/include/cudf/scalar/scalar_factories.hpp index 12645ea78cb..8e10e122571 100644 --- a/cpp/include/cudf/scalar/scalar_factories.hpp +++ b/cpp/include/cudf/scalar/scalar_factories.hpp @@ -157,7 +157,7 @@ std::unique_ptr make_fixed_point_scalar( } /** - * @brief Construct scalar using the given value of fixed_point type + * @brief Construct scalar using the given column of elements * * @param elements Elements of the list * @param stream CUDA stream used for device memory operations.