From 7242bf8a0a297ef7b0b6d3953e94815217bfc715 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Tue, 30 Mar 2021 20:35:05 -0400 Subject: [PATCH 01/16] Introduces `make_optional_iterator` for nullable column and scalars This is a first step in fixing issues brought up in #6952 and #7573. The iterator produces `thrust::optional` to better represent nullable column elements or scalars. `make_optional_iterator` supports three different `contains_null` modes: - `YES` means that the column supports nulls and has null values, therefore the optional might not contain a value - `NO` means that the column has no null values, therefore the optional will always have a value - `DYNAMIC` defers the assumption of nullability to runtime with the users stating on construction of the iterator if column has nulls. --- .../cudf/column/column_device_view.cuh | 264 ++++++++++++++++- cpp/include/cudf/detail/iterator.cuh | 279 ++++++++++++++++++ cpp/tests/CMakeLists.txt | 5 +- cpp/tests/iterator/optional_iterator_test.cu | 208 +++++++++++++ 4 files changed, 753 insertions(+), 3 deletions(-) create mode 100644 cpp/tests/iterator/optional_iterator_test.cu diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index a842e51c94a..f0eb37d5de6 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -31,6 +31,7 @@ #include #include +#include #include @@ -40,6 +41,28 @@ */ namespace cudf { + +/** + * @brief Policy for what assumptions the optional iterator has about null values + * + * - `YES` means that the column supports nulls and has null values, therefore + * the optional might not contain a value + * + * - `NO` means that the column has no null values, therefore the optional will + * always have a value + * + * - `DYNAMIC` defers the assumption of nullability to runtime with the users stating + * on construction of the iterator if column has nulls. + */ +namespace contains_nulls { +struct YES { +}; +struct NO { +}; +struct DYNAMIC { +}; +} // namespace contains_nulls + namespace detail { /** * @brief An immutable, non-owning view of device data as a column of elements @@ -255,10 +278,11 @@ class alignas(16) column_device_view_base { : std::true_type { }; }; - // Forward declaration template struct value_accessor; +template +struct optional_accessor; template struct pair_accessor; template @@ -484,6 +508,13 @@ class alignas(16) column_device_view : public detail::column_device_view_base { return const_iterator{count_it{size()}, detail::value_accessor{*this}}; } + /** + * @brief optional iterator for navigating this column + */ + template + using const_optional_iterator = + thrust::transform_iterator, count_it>; + /** * @brief Pair iterator for navigating this column */ @@ -500,6 +531,86 @@ class alignas(16) column_device_view : public detail::column_device_view_base { using const_pair_rep_iterator = thrust::transform_iterator, count_it>; + /** + * @brief Return an optional iterator to the first element of the column. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * optional_begin with mode `DYNAMIC` defers the assumption of nullability to + * runtime, with the user stating on construction of the iterator if column has nulls. + * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * + * @throws cudf::logic_error if the column is not nullable, and `DYNAMIC` mode used and + * the user has stated nulls exist + * @throws cudf::logic_error if column datatype and Element type mismatch. + */ + /**@{*/ + template ())> + auto optional_begin(contains_nulls::DYNAMIC, bool has_nulls) const + { + return const_optional_iterator{ + count_it{0}, detail::optional_accessor{*this, has_nulls}}; + } + template ())> + auto optional_begin(contains_nulls::DYNAMIC) const + { + return const_optional_iterator{ + count_it{0}, detail::optional_accessor{*this}}; + } + /**@}*/ + + /** + * @brief Return an optional iterator to the first element of the column. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * optional_begin with mode `YES` means that the column supports nulls and + * potentially has null values, therefore the optional might not contain a value + * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * + * @throws cudf::logic_error if the column is not nullable, and `YES` mode used + * @throws cudf::logic_error if column datatype and Element type mismatch. + */ + template ())> + auto optional_begin(contains_nulls::YES) const + { + return const_optional_iterator{ + count_it{0}, detail::optional_accessor{*this}}; + } + + /** + * @brief Return an optional iterator to the first element of the column. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * optional_begin with mode `NO` means that the column has no null values, + * therefore the optional will always contain a value. + * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * + * @throws cudf::logic_error if column datatype and Element type mismatch. + */ + template ())> + auto optional_begin(contains_nulls::NO) const + { + return const_optional_iterator{ + count_it{0}, detail::optional_accessor{*this}}; + } + /** * @brief Return a pair iterator to the first element of the column. * @@ -558,6 +669,71 @@ class alignas(16) column_device_view : public detail::column_device_view_base { detail::pair_rep_accessor{*this}}; } + /** + * @brief Return an optional iterator to the element following the last element of + * the column. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * + * @throws cudf::logic_error if the column is not nullable, and `DYNAMIC` mode used and + * the user has stated nulls exist + * @throws cudf::logic_error if column datatype and Element type mismatch. + */ + /**@{*/ + template ())> + auto optional_end(contains_nulls::DYNAMIC, bool has_nulls) const + { + return const_optional_iterator{ + count_it{size()}, detail::optional_accessor{*this, has_nulls}}; + } + template ())> + auto optional_end(contains_nulls::DYNAMIC) const + { + return const_optional_iterator{ + count_it{size()}, detail::optional_accessor{*this}}; + } + /**@}*/ + + /** + * @brief Return an optional iterator to the element following the last element of + * the column. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * + * @throws cudf::logic_error if the column is not nullable, and `YES` mode used + * @throws cudf::logic_error if column datatype and Element type mismatch. + */ + template ())> + auto optional_end(contains_nulls::YES) const + { + return const_optional_iterator{ + count_it{size()}, detail::optional_accessor{*this}}; + } + + /** + * @brief Return an optional iterator to the element following the last element of + * the column. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * + * @throws cudf::logic_error if column datatype and Element type mismatch. + */ + template ())> + auto optional_end(contains_nulls::NO) const + { + return const_optional_iterator{ + count_it{size()}, detail::optional_accessor{*this}}; + } + /** * @brief Return a pair iterator to the element following the last element of * the column. @@ -999,6 +1175,92 @@ struct value_accessor { __device__ T operator()(cudf::size_type i) const { return col.element(i); } }; +/** + * @brief optional accessor of a column + * + * + * The optional_accessor always returns a thrust::optional of column[i]. The validity + * of the optional is determined by the contains_nulls_mode template parameter + * which has the following modes: + * + * - `YES` means that the column supports nulls and has null values, therefore + * the optional might be valid or invalid + * + * - `NO` the user has attested that the column has no null values, + * no checks will occur and `thrust::optional{column[i]}` will be + * return for each `i`. + * + * - `DYNAMIC` defers the assumption of nullability to runtime with the users stating + * on construction of the iterator if column has nulls. + * When `with_nulls=true` the return value validity will be determined if column[i] + * is not null. + * When `with_nulls=false` the return value will always be valid + * + * @throws cudf::logic_error if column datatype and template T type mismatch. + * @throws cudf::logic_error if the column is not nullable, and `with_nulls=true` + * @throws cudf::logic_error if column datatype and template T type mismatch. + * + * + * @tparam T The type of elements in the column + */ +template +struct optional_accessor { + column_device_view const col; ///< column view of column in device + + /** + * @brief constructor + * @param[in] _col column device view of cudf column + */ + optional_accessor(column_device_view const& _col) : col{_col} + { + CUDF_EXPECTS(type_id_matches_device_storage_type(col.type().id()), "the data type mismatch"); + } + + CUDA_DEVICE_CALLABLE + thrust::optional operator()(cudf::size_type i) const + { + if constexpr (std::is_same_v) { + return (col.is_valid_nocheck(i)) ? thrust::optional{col.element(i)} + : thrust::optional{thrust::nullopt}; + } + return thrust::optional{col.element(i)}; + } +}; + +template +struct optional_accessor { + column_device_view const col; ///< column view of column in device + bool has_nulls; + + /** + * @brief constructor + * @param[in] _col column device view of cudf column + */ + optional_accessor(column_device_view const& _col) + : col{_col}, has_nulls{_col.nullable()} + { + CUDF_EXPECTS(type_id_matches_device_storage_type(col.type().id()), "the data type mismatch"); + } + + /** + * @brief constructor + * @param[in] _col column device view of cudf column + */ + optional_accessor(column_device_view const& _col, bool with_nulls) + : col{_col}, has_nulls{with_nulls} + { + CUDF_EXPECTS(type_id_matches_device_storage_type(col.type().id()), "the data type mismatch"); + if (with_nulls) { CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column."); } + } + + CUDA_DEVICE_CALLABLE + thrust::optional operator()(cudf::size_type i) const + { + return (has_nulls and col.is_null_nocheck(i)) ? thrust::optional{thrust::nullopt} + : thrust::optional{col.element(i)}; + } +}; + /** * @brief pair accessor of column with/without null bitmask * A unary functor returns pair with scalar value at `id` and boolean validity diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 881afa63ca5..a6faec0002a 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -167,6 +167,94 @@ auto make_null_replacement_iterator(column_device_view const& column, 0, null_replaced_value_accessor{column, null_replacement, has_nulls}); } +/** + * @brief Constructs an optional iterator over a column's values and its validity. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * make_optional_iterator with mode `DYNAMIC` defers the assumption of nullability to + * runtime, with the user stating on construction of the iterator if column has nulls. + * + * @throws cudf::logic_error if the column is not nullable, and `DYNAMIC` mode used and + * the user has stated nulls exist + * @throws cudf::logic_error if column datatype and Element type mismatch. + * + * @tparam Element The type of elements in the column + * @tparam MODE The null_mode of the iterator + * @param column The column to iterate + * @return auto Iterator that returns valid column elements, and validity of the + * element in a thrust::optional + */ +/**@{*/ +template +auto make_optional_iterator(column_device_view const& column, + contains_nulls::DYNAMIC, + bool has_nulls) +{ + return column.optional_begin(contains_nulls::DYNAMIC{}, has_nulls); +} +template +auto make_optional_iterator(column_device_view const& column, + contains_nulls::DYNAMIC) +{ + return column.optional_begin(contains_nulls::DYNAMIC{}); +} +/**@}*/ + +/** + * @brief Constructs an optional iterator over a column's values and its validity. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * make_optional_iterator ith mode `YES` means that the column supports nulls and + * potentially has null values, therefore the optional might not contain a value + * + * @throws cudf::logic_error if the column is not nullable, and `YES` mode used + * @throws cudf::logic_error if column datatype and Element type mismatch. + * + * @tparam Element The type of elements in the column + * @tparam mode The has_nulls mode of the iterator + * @param column The column to iterate + * @return auto Iterator that returns column elements, and validity of the + * element in a thrust::optional + */ +template +auto make_optional_iterator(column_device_view const& column, contains_nulls::YES) +{ + return column.optional_begin(contains_nulls::YES{}); +} + +/** + * @brief Constructs an optional iterator over a column's values and its validity. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * make_optional_iterator with mode `NO` means that the column has no null values, + * therefore the optional will always contain a value. + * + * @throws cudf::logic_error if column datatype and Element type mismatch. + * + * @tparam Element The type of elements in the column + * @tparam mode The has_nulls mode of the iterator + * @param column The column to iterate + * @return auto Iterator that returns column elements, and validity of the + * element in a thrust::optional + */ +template +auto make_optional_iterator(column_device_view const& column, contains_nulls::NO) +{ + return column.optional_begin(contains_nulls::NO{}); +} + /** * @brief Constructs a pair iterator over a column's values and its validity. * @@ -320,6 +408,86 @@ auto inline make_scalar_iterator(scalar const& scalar_value) scalar_value_accessor{scalar_value}); } +template +struct scalar_optional_accessor; + +/** + * @brief optional accessor of a maybe-nullable scalar + * + * The scalar_optional_accessor always returns a thrust::optional of the scalar. + * The validity of the optional is determined by the contains_nulls_mode template parameter + * which has the following modes: + * + * `DYNAMIC`: Defer nullability checks to runtime + * + * - When `with_nulls=true` the return value will be a `thrust::optional{scalar}` + * when scalar is valid, and `thrust::optional{}` when the scalar is invalid. + * + * - When `with_nulls=false` the return value will always be `thrust::optional{scalar}` + * + * `NO`: No null values will occur for this scalar, no checks will occur + * and `thrust::optional{scalar}` will always be returned. + * + * `YES`: null values will occur for this scalar, + * and `thrust::optional{scalar}` will always be returned. + * + * @throws `cudf::logic_error` if scalar datatype and Element type mismatch. + * + * @tparam Element The type of return type of functor + */ +template +struct scalar_optional_accessor : public scalar_value_accessor { + using super_t = scalar_value_accessor; + using value_type = thrust::optional; + + scalar_optional_accessor(scalar const& scalar_value) + : scalar_value_accessor(scalar_value) + { + } + + /** + * @brief returns a thrust::optional. + * + * @throw `cudf::logic_error` if this function is called in host. + * + * @return a thrust::optional for the scalar value. + */ + CUDA_HOST_DEVICE_CALLABLE + const value_type operator()(size_type) const + { + if constexpr (std::is_same_v) { + return (super_t::dscalar.is_valid()) ? Element{super_t::dscalar.value()} + : value_type{thrust::nullopt}; + } + return Element{super_t::dscalar.value()}; + } +}; + +template +struct scalar_optional_accessor + : public scalar_value_accessor { + using super_t = scalar_value_accessor; + using value_type = thrust::optional; + bool has_nulls; + + scalar_optional_accessor(scalar const& scalar_value) + : scalar_value_accessor(scalar_value), has_nulls{scalar_value.is_valid()} + { + } + + scalar_optional_accessor(scalar const& scalar_value, bool with_nulls) + : scalar_value_accessor(scalar_value), has_nulls{with_nulls} + { + } + + CUDA_HOST_DEVICE_CALLABLE + const value_type operator()(size_type) const + { + return (has_nulls and !super_t::dscalar.is_valid()) ? value_type{thrust::nullopt} + : Element{super_t::dscalar.value()}; + } +}; + /** * @brief pair accessor for scalar. * The unary functor returns a pair of data of Element type and bool validity of the scalar. @@ -415,6 +583,117 @@ struct scalar_representation_pair_accessor : public scalar_value_accessor`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * The iterator behavior is undefined if the scalar is destroyed before iterator dereferencing. + * + * make_optional_iterator with mode `DYNAMIC` defers the assumption of nullability to + * runtime, with the user stating on construction of the iterator if scalar has nulls. + * + * @throws cudf::logic_error if the scalar is not nullable, and `DYNAMIC` mode used and + * the user has stated nulls exist + * @throws cudf::logic_error if scalar datatype and Element type mismatch. + * + * @tparam Element The type of elements in the scalar + * @tparam mode The has_nulls mode of the iterator + * @tparam has_nulls If the scalar value will have a null at runtime + * @param scalar_value The scalar to iterate + * @return auto Iterator that returns scalar elements, and validity of the + * element in a thrust::optional + */ +/**@{*/ +template +auto inline make_optional_iterator(scalar const& scalar_value, + contains_nulls::DYNAMIC, bool has_nulls) +{ + CUDF_EXPECTS(type_id_matches_device_storage_type(scalar_value.type().id()), + "the data type mismatch"); + return thrust::make_transform_iterator( + thrust::make_constant_iterator(0), + scalar_optional_accessor{scalar_value, has_nulls}); +} +template +auto inline make_optional_iterator(scalar const& scalar_value, + contains_nulls::DYNAMIC) +{ + CUDF_EXPECTS(type_id_matches_device_storage_type(scalar_value.type().id()), + "the data type mismatch"); + return thrust::make_transform_iterator(thrust::make_constant_iterator(0), + scalar_optional_accessor{ + scalar_value}); +} +/**@}*/ + + +/** + * @brief Constructs an optional iterator over a scalar's values and its validity. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * The iterator behavior is undefined if the scalar is destroyed before iterator dereferencing. + * + * make_optional_iterator ith mode `YES` means that the scalar supports nulls and + * potentially has null values, therefore the optional might not contain a value + * + * @throws cudf::logic_error if the scalar is not nullable, and `YES` mode used + * @throws cudf::logic_error if scalar datatype and Element type mismatch. + * + * @tparam Element The type of elements in the scalar + * @tparam mode The has_nulls mode of the iterator + * @param scalar_value The scalar to iterate + * @return auto Iterator that returns scalar elements, and validity of the + * element in a thrust::optional + */ +template +auto inline make_optional_iterator(scalar const& scalar_value, contains_nulls::YES) +{ + CUDF_EXPECTS(type_id_matches_device_storage_type(scalar_value.type().id()), + "the data type mismatch"); + return thrust::make_transform_iterator( + thrust::make_constant_iterator(0), + scalar_optional_accessor{scalar_value}); +} + +/** + * @brief Constructs an optional iterator over a scalar's values and its validity. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * The iterator behavior is undefined if the scalar is destroyed before iterator dereferencing. + * + * make_optional_iterator with mode `NO` means that the scalar has no null values, + * therefore the optional will always contain a value. + * + * @throws cudf::logic_error if scalar datatype and Element type mismatch. + * + * @tparam Element The type of elements in the scalar + * @tparam mode The has_nulls mode of the iterator + * @param scalar_value The scalar to iterate + * @return auto Iterator that returns scalar elements, and validity of the + * element in a thrust::optional + */ +template +auto inline make_optional_iterator(scalar const& scalar_value, contains_nulls::NO) +{ + CUDF_EXPECTS(type_id_matches_device_storage_type(scalar_value.type().id()), + "the data type mismatch"); + return thrust::make_transform_iterator( + thrust::make_constant_iterator(0), + scalar_optional_accessor{scalar_value}); +} + /** * @brief Constructs a constant device pair iterator over a scalar's value and its validity. * diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 342ec9145fd..fc3026f744d 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -228,9 +228,10 @@ ConfigureTest(SPAN_TEST utilities_tests/span_tests.cu) ################################################################################################### # - iterator tests -------------------------------------------------------------------------------- ConfigureTest(ITERATOR_TEST - iterator/value_iterator_test.cu + iterator/optional_iterator_test.cu iterator/pair_iterator_test.cu - iterator/scalar_iterator_test.cu) + iterator/scalar_iterator_test.cu + iterator/value_iterator_test.cu) ################################################################################################### # - device atomics tests -------------------------------------------------------------------------- diff --git a/cpp/tests/iterator/optional_iterator_test.cu b/cpp/tests/iterator/optional_iterator_test.cu new file mode 100644 index 00000000000..c1736ea1b25 --- /dev/null +++ b/cpp/tests/iterator/optional_iterator_test.cu @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ +#include +#include + +// to print meanvar for debug. +template +std::ostream& operator<<(std::ostream& os, cudf::meanvar const& rhs) +{ + return os << "[" << rhs.value << ", " << rhs.value_squared << ", " << rhs.count << "] "; +}; + +// Transformers and Operators for optional_iterator test +template +struct transformer_optional_meanvar { + using ResultType = thrust::optional>; + + CUDA_HOST_DEVICE_CALLABLE + ResultType operator()(thrust::optional const& optional) + { + if (optional.has_value()) { + auto v = *optional; + return cudf::meanvar{v, static_cast(v * v), 1}; + } + return thrust::nullopt; + } +}; + +struct sum_if_not_null { + template + CUDA_HOST_DEVICE_CALLABLE thrust::optional operator()(const thrust::optional& lhs, + const thrust::optional& rhs) + { + if (lhs.has_value() and rhs.has_value()) + return thrust::optional{*lhs + *rhs}; + else if (lhs.has_value()) + return lhs; + else if (rhs.has_value()) + return rhs; + else + return thrust::optional{}; + } +}; + +template +struct OptionalIteratorTest : public cudf::test::BaseFixture { +}; +TYPED_TEST_CASE(OptionalIteratorTest, cudf::test::NumericTypes); +// TODO: enable this test also at __CUDACC_DEBUG__ +// This test causes fatal compilation error only at device debug mode. +// Workaround: exclude this test only at device debug mode. +#if !defined(__CUDACC_DEBUG__) +// This test computes `count`, `sum`, `sum_of_squares` at a single reduction call. +// It would be useful for `var`, `std` operation +TYPED_TEST(OptionalIteratorTest, mean_var_output) +{ + using T = TypeParam; + using T_output = cudf::meanvar; + transformer_optional_meanvar transformer{}; + + const int column_size{50}; + const T init{0}; + + // data and valid arrays + std::vector host_values(column_size); + std::vector host_bools(column_size); + + cudf::test::UniformRandomGenerator rng; + cudf::test::UniformRandomGenerator rbg; + std::generate(host_values.begin(), host_values.end(), [&rng]() { return rng.generate(); }); + std::generate(host_bools.begin(), host_bools.end(), [&rbg]() { return rbg.generate(); }); + + cudf::test::fixed_width_column_wrapper w_col( + host_values.begin(), host_values.end(), host_bools.begin()); + auto d_col = cudf::column_device_view::create(w_col); + + // calculate expected values by CPU + T_output expected_value; + + expected_value.count = d_col->size() - static_cast(w_col).null_count(); + + std::vector replaced_array(d_col->size()); + std::transform(host_values.begin(), + host_values.end(), + host_bools.begin(), + replaced_array.begin(), + [&](T x, bool b) { return (b) ? static_cast(x) : init; }); + + expected_value.count = d_col->size() - static_cast(w_col).null_count(); + expected_value.value = std::accumulate(replaced_array.begin(), replaced_array.end(), T{0}); + expected_value.value_squared = std::accumulate( + replaced_array.begin(), replaced_array.end(), T{0}, [](T acc, T i) { return acc + i * i; }); + + std::cout << "expected = " << expected_value << std::endl; + + // GPU test + auto it_dev = d_col->optional_begin(cudf::contains_nulls::YES{}); + auto it_dev_squared = thrust::make_transform_iterator(it_dev, transformer); + auto result = thrust::reduce(it_dev_squared, + it_dev_squared + d_col->size(), + thrust::optional{T_output{}}, + sum_if_not_null{}); + if (not std::is_floating_point()) { + EXPECT_EQ(expected_value, *result) << "optional iterator reduction sum"; + } else { + EXPECT_NEAR(expected_value.value, result->value, 1e-3) << "optional iterator reduction sum"; + EXPECT_NEAR(expected_value.value_squared, result->value_squared, 1e-3) + << "optional iterator reduction sum squared"; + EXPECT_EQ(expected_value.count, result->count) << "optional iterator reduction count"; + } +} +#endif + +using TestingTypes = cudf::test::AllTypes; + +TYPED_TEST_CASE(IteratorTest, TestingTypes); + +TYPED_TEST(IteratorTest, nonull_optional_iterator) +{ + using T = TypeParam; + // data and valid arrays + auto host_values_std = + cudf::test::make_type_param_vector({0, 6, 0, -14, 13, 64, -13, -20, 45}); + thrust::host_vector host_values(host_values_std); + + // create a column + cudf::test::fixed_width_column_wrapper w_col(host_values.begin(), host_values.end()); + auto d_col = cudf::column_device_view::create(w_col); + + // calculate the expected value by CPU. + thrust::host_vector> replaced_array(host_values.size()); + std::transform(host_values.begin(), host_values.end(), replaced_array.begin(), [](auto s) { + return thrust::optional{s}; + }); + + // GPU test + this->iterator_test_thrust( + replaced_array, + cudf::detail::make_optional_iterator(*d_col, cudf::contains_nulls::DYNAMIC{}, false), + host_values.size()); + this->iterator_test_thrust( + replaced_array, + cudf::detail::make_optional_iterator(*d_col, cudf::contains_nulls::NO{}), + host_values.size()); +} + +TYPED_TEST(IteratorTest, null_optional_iterator) +{ + using T = TypeParam; + // data and valid arrays + auto host_values = cudf::test::make_type_param_vector({0, 6, 0, -14, 13, 64, -13, -20, 45}); + thrust::host_vector host_bools(std::vector({1, 1, 0, 1, 1, 1, 0, 1, 1})); + + // create a column with bool vector + cudf::test::fixed_width_column_wrapper w_col( + host_values.begin(), host_values.end(), host_bools.begin()); + auto d_col = cudf::column_device_view::create(w_col); + + // calculate the expected value by CPU. + thrust::host_vector> optional_values(host_values.size()); + std::transform(host_values.begin(), + host_values.end(), + host_bools.begin(), + optional_values.begin(), + [](auto s, bool b) { return b ? thrust::optional{s} : thrust::optional{}; }); + + thrust::host_vector> value_all_valid(host_values.size()); + std::transform(host_values.begin(), + host_values.end(), + host_bools.begin(), + value_all_valid.begin(), + [](auto s, bool b) { return thrust::optional{s}; }); + + // GPU test for correct null mapping + this->iterator_test_thrust(optional_values, + d_col->optional_begin(cudf::contains_nulls::DYNAMIC{}, true), + host_values.size()); + this->iterator_test_thrust(optional_values, + d_col->optional_begin(cudf::contains_nulls::DYNAMIC{}), + host_values.size()); + + this->iterator_test_thrust( + optional_values, d_col->optional_begin(cudf::contains_nulls::YES{}), host_values.size()); + this->iterator_test_thrust( + optional_values, d_col->optional_begin(cudf::contains_nulls::YES{}), host_values.size()); + + // GPU test for ignoring null mapping + this->iterator_test_thrust(value_all_valid, + d_col->optional_begin(cudf::contains_nulls::DYNAMIC{}, false), + host_values.size()); + + this->iterator_test_thrust( + value_all_valid, d_col->optional_begin(cudf::contains_nulls::NO{}), host_values.size()); + this->iterator_test_thrust( + value_all_valid, d_col->optional_begin(cudf::contains_nulls::NO{}), host_values.size()); +} From ce6d8647a9a1ec888f38e954c2cd92d3eded219a Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Wed, 7 Apr 2021 11:53:50 -0400 Subject: [PATCH 02/16] DYNAMIC optional iterators require users to specify has_nulls --- .../cudf/column/column_device_view.cuh | 26 ------------------- cpp/include/cudf/detail/iterator.cuh | 25 ------------------ cpp/tests/iterator/optional_iterator_test.cu | 3 --- 3 files changed, 54 deletions(-) diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index f0eb37d5de6..c1a09e68447 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -549,20 +549,12 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * the user has stated nulls exist * @throws cudf::logic_error if column datatype and Element type mismatch. */ - /**@{*/ template ())> auto optional_begin(contains_nulls::DYNAMIC, bool has_nulls) const { return const_optional_iterator{ count_it{0}, detail::optional_accessor{*this, has_nulls}}; } - template ())> - auto optional_begin(contains_nulls::DYNAMIC) const - { - return const_optional_iterator{ - count_it{0}, detail::optional_accessor{*this}}; - } - /**@}*/ /** * @brief Return an optional iterator to the first element of the column. @@ -682,20 +674,12 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * the user has stated nulls exist * @throws cudf::logic_error if column datatype and Element type mismatch. */ - /**@{*/ template ())> auto optional_end(contains_nulls::DYNAMIC, bool has_nulls) const { return const_optional_iterator{ count_it{size()}, detail::optional_accessor{*this, has_nulls}}; } - template ())> - auto optional_end(contains_nulls::DYNAMIC) const - { - return const_optional_iterator{ - count_it{size()}, detail::optional_accessor{*this}}; - } - /**@}*/ /** * @brief Return an optional iterator to the element following the last element of @@ -1232,16 +1216,6 @@ struct optional_accessor { column_device_view const col; ///< column view of column in device bool has_nulls; - /** - * @brief constructor - * @param[in] _col column device view of cudf column - */ - optional_accessor(column_device_view const& _col) - : col{_col}, has_nulls{_col.nullable()} - { - CUDF_EXPECTS(type_id_matches_device_storage_type(col.type().id()), "the data type mismatch"); - } - /** * @brief constructor * @param[in] _col column device view of cudf column diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index a6faec0002a..84ebdd02e9b 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -188,7 +188,6 @@ auto make_null_replacement_iterator(column_device_view const& column, * @return auto Iterator that returns valid column elements, and validity of the * element in a thrust::optional */ -/**@{*/ template auto make_optional_iterator(column_device_view const& column, contains_nulls::DYNAMIC, @@ -196,13 +195,6 @@ auto make_optional_iterator(column_device_view const& column, { return column.optional_begin(contains_nulls::DYNAMIC{}, has_nulls); } -template -auto make_optional_iterator(column_device_view const& column, - contains_nulls::DYNAMIC) -{ - return column.optional_begin(contains_nulls::DYNAMIC{}); -} -/**@}*/ /** * @brief Constructs an optional iterator over a column's values and its validity. @@ -470,11 +462,6 @@ struct scalar_optional_accessor using value_type = thrust::optional; bool has_nulls; - scalar_optional_accessor(scalar const& scalar_value) - : scalar_value_accessor(scalar_value), has_nulls{scalar_value.is_valid()} - { - } - scalar_optional_accessor(scalar const& scalar_value, bool with_nulls) : scalar_value_accessor(scalar_value), has_nulls{with_nulls} { @@ -607,7 +594,6 @@ struct scalar_representation_pair_accessor : public scalar_value_accessor auto inline make_optional_iterator(scalar const& scalar_value, contains_nulls::DYNAMIC, bool has_nulls) @@ -618,17 +604,6 @@ auto inline make_optional_iterator(scalar const& scalar_value, thrust::make_constant_iterator(0), scalar_optional_accessor{scalar_value, has_nulls}); } -template -auto inline make_optional_iterator(scalar const& scalar_value, - contains_nulls::DYNAMIC) -{ - CUDF_EXPECTS(type_id_matches_device_storage_type(scalar_value.type().id()), - "the data type mismatch"); - return thrust::make_transform_iterator(thrust::make_constant_iterator(0), - scalar_optional_accessor{ - scalar_value}); -} -/**@}*/ /** diff --git a/cpp/tests/iterator/optional_iterator_test.cu b/cpp/tests/iterator/optional_iterator_test.cu index c1736ea1b25..cee77538130 100644 --- a/cpp/tests/iterator/optional_iterator_test.cu +++ b/cpp/tests/iterator/optional_iterator_test.cu @@ -187,9 +187,6 @@ TYPED_TEST(IteratorTest, null_optional_iterator) this->iterator_test_thrust(optional_values, d_col->optional_begin(cudf::contains_nulls::DYNAMIC{}, true), host_values.size()); - this->iterator_test_thrust(optional_values, - d_col->optional_begin(cudf::contains_nulls::DYNAMIC{}), - host_values.size()); this->iterator_test_thrust( optional_values, d_col->optional_begin(cudf::contains_nulls::YES{}), host_values.size()); From 8c520133b0cf804dd76d997757d2fc5b8e0f99d3 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 8 Apr 2021 12:22:13 -0400 Subject: [PATCH 03/16] correct code format issues --- cpp/include/cudf/detail/iterator.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 84ebdd02e9b..7e5c8983c7b 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -596,7 +596,8 @@ struct scalar_representation_pair_accessor : public scalar_value_accessor auto inline make_optional_iterator(scalar const& scalar_value, - contains_nulls::DYNAMIC, bool has_nulls) + contains_nulls::DYNAMIC, + bool has_nulls) { CUDF_EXPECTS(type_id_matches_device_storage_type(scalar_value.type().id()), "the data type mismatch"); @@ -605,7 +606,6 @@ auto inline make_optional_iterator(scalar const& scalar_value, scalar_optional_accessor{scalar_value, has_nulls}); } - /** * @brief Constructs an optional iterator over a scalar's values and its validity. * From e2bd5dec4f937502cdcdb5c2f14abfb5bd795e0e Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 8 Apr 2021 10:43:12 -0400 Subject: [PATCH 04/16] optional_iterator_test reworked to leverage optional.value_or --- cpp/tests/iterator/optional_iterator_test.cu | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/cpp/tests/iterator/optional_iterator_test.cu b/cpp/tests/iterator/optional_iterator_test.cu index cee77538130..460f692ee14 100644 --- a/cpp/tests/iterator/optional_iterator_test.cu +++ b/cpp/tests/iterator/optional_iterator_test.cu @@ -43,14 +43,7 @@ struct sum_if_not_null { CUDA_HOST_DEVICE_CALLABLE thrust::optional operator()(const thrust::optional& lhs, const thrust::optional& rhs) { - if (lhs.has_value() and rhs.has_value()) - return thrust::optional{*lhs + *rhs}; - else if (lhs.has_value()) - return lhs; - else if (rhs.has_value()) - return rhs; - else - return thrust::optional{}; + return lhs.value_or(T{0}) + rhs.value_or(T{0}); } }; From 41fbe40c2420052872b43819bd858e106c1e032f Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 8 Apr 2021 12:32:23 -0400 Subject: [PATCH 05/16] add first example to docs, for feedback --- cpp/include/cudf/column/column_device_view.cuh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index c1a09e68447..d6b42dadcee 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -542,6 +542,17 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * optional_begin with mode `DYNAMIC` defers the assumption of nullability to * runtime, with the user stating on construction of the iterator if column has nulls. * + * Example: + * + * \code{.cpp} + * template + * void some_function( cudf::column_view const& col_view, bool has_nulls){ + * auto d_col = cudf::column_device_view::create(col_view); + * // Create a `DYNAMIC` optional iterator + * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::DYNAMIC{}, has_nulls); + * } + * \endcode + * * This function does not participate in overload resolution if * `column_device_view::has_element_accessor()` is false. * From 0f281a3e489a0ae18c1dae2232b43d2f5ac53a90 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 8 Apr 2021 13:05:15 -0400 Subject: [PATCH 06/16] add examples to all the optional iterator functions --- .../cudf/column/column_device_view.cuh | 36 +++++++++++++- cpp/include/cudf/detail/iterator.cuh | 47 ++++++++++++++++++- 2 files changed, 81 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index d6b42dadcee..3fbbbaeb96d 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -541,12 +541,14 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * * optional_begin with mode `DYNAMIC` defers the assumption of nullability to * runtime, with the user stating on construction of the iterator if column has nulls. + * `DYNAMIC` mode is nice when an algorithm is going to execute on mutliple + * iterators and you don't want to compile all the combinations of iterator types * * Example: * * \code{.cpp} * template - * void some_function( cudf::column_view const& col_view, bool has_nulls){ + * void some_function(cudf::column_view const& col_view, bool has_nulls){ * auto d_col = cudf::column_device_view::create(col_view); * // Create a `DYNAMIC` optional iterator * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::DYNAMIC{}, has_nulls); @@ -578,6 +580,22 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * optional_begin with mode `YES` means that the column supports nulls and * potentially has null values, therefore the optional might not contain a value * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view){ + * auto d_col = cudf::column_device_view::create(col_view); + * if constexpr(has_nulls) { + * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::YES{}); + * //use optional_iterator + * } else { + * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::NO{}); + * //use optional_iterator + * } + * } + * \endcode + * * This function does not participate in overload resolution if * `column_device_view::has_element_accessor()` is false. * @@ -602,6 +620,22 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * optional_begin with mode `NO` means that the column has no null values, * therefore the optional will always contain a value. * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view){ + * auto d_col = cudf::column_device_view::create(col_view); + * if constexpr(has_nulls) { + * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::YES{}); + * //use optional_iterator + * } else { + * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::NO{}); + * //use optional_iterator + * } + * } + * \endcode + * * This function does not participate in overload resolution if * `column_device_view::has_element_accessor()` is false. * diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 7e5c8983c7b..5aaa799bf4f 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -177,6 +177,19 @@ auto make_null_replacement_iterator(column_device_view const& column, * * make_optional_iterator with mode `DYNAMIC` defers the assumption of nullability to * runtime, with the user stating on construction of the iterator if column has nulls. + * `DYNAMIC` mode is nice when an algorithm is going to execute on mutliple + * iterators and you don't want to compile all the combinations of iterator types + * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view, bool has_nulls){ + * auto d_col = cudf::column_device_view::create(col_view); + * // Create a `DYNAMIC` optional iterator + * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, cudf::contains_nulls::DYNAMIC{}, has_nulls); + * } + * \endcode * * @throws cudf::logic_error if the column is not nullable, and `DYNAMIC` mode used and * the user has stated nulls exist @@ -204,9 +217,25 @@ auto make_optional_iterator(column_device_view const& column, * When the element of an iterator contextually converted to bool, the conversion returns true * if the object contains a value and false if it does not contain a value. * - * make_optional_iterator ith mode `YES` means that the column supports nulls and + * make_optional_iterator with mode `YES` means that the column supports nulls and * potentially has null values, therefore the optional might not contain a value * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view){ + * auto d_col = cudf::column_device_view::create(col_view); + * if constexpr(has_nulls) { + * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, cudf::contains_nulls::YES{}); + * //use optional_iterator + * } else { + * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, cudf::contains_nulls::NO{}); + * //use optional_iterator + * } + * } + * \endcode + * * @throws cudf::logic_error if the column is not nullable, and `YES` mode used * @throws cudf::logic_error if column datatype and Element type mismatch. * @@ -233,6 +262,22 @@ auto make_optional_iterator(column_device_view const& column, contains_nulls::YE * make_optional_iterator with mode `NO` means that the column has no null values, * therefore the optional will always contain a value. * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view){ + * auto d_col = cudf::column_device_view::create(col_view); + * if constexpr(has_nulls) { + * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, cudf::contains_nulls::YES{}); + * //use optional_iterator + * } else { + * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, cudf::contains_nulls::NO{}); + * //use optional_iterator + * } + * } + * \endcode + * * @throws cudf::logic_error if column datatype and Element type mismatch. * * @tparam Element The type of elements in the column From f71fe12dde87ae504cd23f033adb1e4f9700e257 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 8 Apr 2021 13:12:45 -0400 Subject: [PATCH 07/16] add examples to all the scalar optional iterator functions --- cpp/include/cudf/detail/iterator.cuh | 53 ++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 5aaa799bf4f..cc71ff79c96 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -628,6 +628,20 @@ struct scalar_representation_pair_accessor : public scalar_value_accessor + * void some_function(cudf::column_view const& col_view, + * scalar const& scalar_value, + * bool col_has_nulls){ + * auto d_col = cudf::column_device_view::create(col_view); + * auto column_iterator = cudf::detail::make_optional_iterator(d_col, cudf::contains_nulls::DYNAMIC{}, col_has_nulls); + * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, cudf::contains_nulls::DYNAMIC{}, scalar_value.is_valid()); + * //use iterators + * } + * \endcode + * * @throws cudf::logic_error if the scalar is not nullable, and `DYNAMIC` mode used and * the user has stated nulls exist * @throws cudf::logic_error if scalar datatype and Element type mismatch. @@ -663,12 +677,30 @@ auto inline make_optional_iterator(scalar const& scalar_value, * * make_optional_iterator ith mode `YES` means that the scalar supports nulls and * potentially has null values, therefore the optional might not contain a value + * therefore the optional will always contain a value. + * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view, scalar const& scalar_value){ + * auto d_col = cudf::column_device_view::create(col_view); + * if constexpr(any_nulls) { + * auto column_iterator = cudf::detail::make_optional_iterator(d_col, cudf::contains_nulls::YES{}); + * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, cudf::contains_nulls::YES{}); + * //use iterators + * } else { + * auto column_iterator = cudf::detail::make_optional_iterator(d_col, cudf::contains_nulls::NO{}); + * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, cudf::contains_nulls::NO{}); + * //use iterators + * } + * } + * \endcode * * @throws cudf::logic_error if the scalar is not nullable, and `YES` mode used * @throws cudf::logic_error if scalar datatype and Element type mismatch. * * @tparam Element The type of elements in the scalar - * @tparam mode The has_nulls mode of the iterator * @param scalar_value The scalar to iterate * @return auto Iterator that returns scalar elements, and validity of the * element in a thrust::optional @@ -696,10 +728,27 @@ auto inline make_optional_iterator(scalar const& scalar_value, contains_nulls::Y * make_optional_iterator with mode `NO` means that the scalar has no null values, * therefore the optional will always contain a value. * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view, scalar const& scalar_value){ + * auto d_col = cudf::column_device_view::create(col_view); + * if constexpr(any_nulls) { + * auto column_iterator = cudf::detail::make_optional_iterator(d_col, cudf::contains_nulls::YES{}); + * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, cudf::contains_nulls::YES{}); + * //use iterators + * } else { + * auto column_iterator = cudf::detail::make_optional_iterator(d_col, cudf::contains_nulls::NO{}); + * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, cudf::contains_nulls::NO{}); + * //use iterators + * } + * } + * \endcode + * * @throws cudf::logic_error if scalar datatype and Element type mismatch. * * @tparam Element The type of elements in the scalar - * @tparam mode The has_nulls mode of the iterator * @param scalar_value The scalar to iterate * @return auto Iterator that returns scalar elements, and validity of the * element in a thrust::optional From b83d66750e5e500b391d79ff195517ff37600e00 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 8 Apr 2021 15:14:31 -0400 Subject: [PATCH 08/16] correct formatting of code examples --- .../cudf/column/column_device_view.cuh | 3 +- cpp/include/cudf/detail/iterator.cuh | 45 ++++++++++++------- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 3fbbbaeb96d..5a18d0681d4 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -551,7 +551,8 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * void some_function(cudf::column_view const& col_view, bool has_nulls){ * auto d_col = cudf::column_device_view::create(col_view); * // Create a `DYNAMIC` optional iterator - * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::DYNAMIC{}, has_nulls); + * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::DYNAMIC{}, + * has_nulls); * } * \endcode * diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index cc71ff79c96..96a823d7e82 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -187,7 +187,8 @@ auto make_null_replacement_iterator(column_device_view const& column, * void some_function(cudf::column_view const& col_view, bool has_nulls){ * auto d_col = cudf::column_device_view::create(col_view); * // Create a `DYNAMIC` optional iterator - * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, cudf::contains_nulls::DYNAMIC{}, has_nulls); + * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::DYNAMIC{}, has_nulls); * } * \endcode * @@ -227,10 +228,12 @@ auto make_optional_iterator(column_device_view const& column, * void some_function(cudf::column_view const& col_view){ * auto d_col = cudf::column_device_view::create(col_view); * if constexpr(has_nulls) { - * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, cudf::contains_nulls::YES{}); + * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::YES{}); * //use optional_iterator * } else { - * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, cudf::contains_nulls::NO{}); + * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::NO{}); * //use optional_iterator * } * } @@ -269,10 +272,12 @@ auto make_optional_iterator(column_device_view const& column, contains_nulls::YE * void some_function(cudf::column_view const& col_view){ * auto d_col = cudf::column_device_view::create(col_view); * if constexpr(has_nulls) { - * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, cudf::contains_nulls::YES{}); + * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::YES{}); * //use optional_iterator * } else { - * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, cudf::contains_nulls::NO{}); + * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::NO{}); * //use optional_iterator * } * } @@ -636,8 +641,10 @@ struct scalar_representation_pair_accessor : public scalar_value_accessor(d_col, cudf::contains_nulls::DYNAMIC{}, col_has_nulls); - * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, cudf::contains_nulls::DYNAMIC{}, scalar_value.is_valid()); + * auto column_iterator = cudf::detail::make_optional_iterator(d_col, + cudf::contains_nulls::DYNAMIC{}, col_has_nulls); + * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, + cudf::contains_nulls::DYNAMIC{}, scalar_value.is_valid()); * //use iterators * } * \endcode @@ -686,12 +693,16 @@ auto inline make_optional_iterator(scalar const& scalar_value, * void some_function(cudf::column_view const& col_view, scalar const& scalar_value){ * auto d_col = cudf::column_device_view::create(col_view); * if constexpr(any_nulls) { - * auto column_iterator = cudf::detail::make_optional_iterator(d_col, cudf::contains_nulls::YES{}); - * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, cudf::contains_nulls::YES{}); + * auto column_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::YES{}); + * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, + * cudf::contains_nulls::YES{}); * //use iterators * } else { - * auto column_iterator = cudf::detail::make_optional_iterator(d_col, cudf::contains_nulls::NO{}); - * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, cudf::contains_nulls::NO{}); + * auto column_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::NO{}); + * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, + * cudf::contains_nulls::NO{}); * //use iterators * } * } @@ -735,12 +746,16 @@ auto inline make_optional_iterator(scalar const& scalar_value, contains_nulls::Y * void some_function(cudf::column_view const& col_view, scalar const& scalar_value){ * auto d_col = cudf::column_device_view::create(col_view); * if constexpr(any_nulls) { - * auto column_iterator = cudf::detail::make_optional_iterator(d_col, cudf::contains_nulls::YES{}); - * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, cudf::contains_nulls::YES{}); + * auto column_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::YES{}); + * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, + * cudf::contains_nulls::YES{}); * //use iterators * } else { - * auto column_iterator = cudf::detail::make_optional_iterator(d_col, cudf::contains_nulls::NO{}); - * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, cudf::contains_nulls::NO{}); + * auto column_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::NO{}); + * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, + * cudf::contains_nulls::NO{}); * //use iterators * } * } From 72f3b409b0a454d15a44b996564b48e3b9732c70 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 12 Apr 2021 10:10:36 -0400 Subject: [PATCH 09/16] restore correct ordering of ITERATOR_TEST sources --- cpp/tests/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index fc3026f744d..54c808eb4fb 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -228,10 +228,11 @@ ConfigureTest(SPAN_TEST utilities_tests/span_tests.cu) ################################################################################################### # - iterator tests -------------------------------------------------------------------------------- ConfigureTest(ITERATOR_TEST - iterator/optional_iterator_test.cu + iterator/value_iterator_test.cu iterator/pair_iterator_test.cu iterator/scalar_iterator_test.cu - iterator/value_iterator_test.cu) + iterator/optional_iterator_test.cu + ) ################################################################################################### # - device atomics tests -------------------------------------------------------------------------- From 8e904945876ddb68e82cd55b8d930ba2ea4b6a2c Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 19 Apr 2021 13:04:26 -0400 Subject: [PATCH 10/16] Clean up iterator.cuh grammer Co-authored-by: David Wendt <45795991+davidwendt@users.noreply.github.com> --- cpp/include/cudf/detail/iterator.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 96a823d7e82..10d285aa0e6 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -657,7 +657,7 @@ struct scalar_representation_pair_accessor : public scalar_value_accessor From 746e81983103e99783a6f28033730501bdd50eb4 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 19 Apr 2021 13:04:33 -0400 Subject: [PATCH 11/16] Clean up iterator.cuh grammer Co-authored-by: David Wendt <45795991+davidwendt@users.noreply.github.com> --- cpp/include/cudf/detail/iterator.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 10d285aa0e6..65c3191b00b 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -245,8 +245,8 @@ auto make_optional_iterator(column_device_view const& column, * @tparam Element The type of elements in the column * @tparam mode The has_nulls mode of the iterator * @param column The column to iterate - * @return auto Iterator that returns column elements, and validity of the - * element in a thrust::optional + * @return Iterator that returns column elements and the validity of the + * element as a thrust::optional */ template auto make_optional_iterator(column_device_view const& column, contains_nulls::YES) From 27e71fd3216e6f200c2a3d99e2b95c5bd30ee216 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 19 Apr 2021 13:05:15 -0400 Subject: [PATCH 12/16] Clarify contains_nulls_mode behavior for optional_iterator Co-authored-by: David Wendt <45795991+davidwendt@users.noreply.github.com> --- cpp/include/cudf/column/column_device_view.cuh | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 5a18d0681d4..aa4333d6964 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -1232,6 +1232,7 @@ struct value_accessor { * * * @tparam T The type of elements in the column + * @tparam contains_nulls_mode Specifies if nulls are checked at runtime or compile time. */ template struct optional_accessor { From 615dcee24c366075c8c8d2800daf28e92beca15c Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 19 Apr 2021 13:05:43 -0400 Subject: [PATCH 13/16] Remove stale throw documentation from optional_iterator Co-authored-by: David Wendt <45795991+davidwendt@users.noreply.github.com> --- cpp/include/cudf/column/column_device_view.cuh | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index aa4333d6964..ddcf82d5b38 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -1228,7 +1228,6 @@ struct value_accessor { * * @throws cudf::logic_error if column datatype and template T type mismatch. * @throws cudf::logic_error if the column is not nullable, and `with_nulls=true` - * @throws cudf::logic_error if column datatype and template T type mismatch. * * * @tparam T The type of elements in the column From 286644ee4cdcc19e8b037446282ea6e640b11022 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 19 Apr 2021 13:06:48 -0400 Subject: [PATCH 14/16] Provide better example of DYNAMIC optional iterator Co-authored-by: David Wendt <45795991+davidwendt@users.noreply.github.com> --- cpp/include/cudf/column/column_device_view.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index ddcf82d5b38..5f42823afe4 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -548,11 +548,11 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * * \code{.cpp} * template - * void some_function(cudf::column_view const& col_view, bool has_nulls){ + * void some_function(cudf::column_view const& col_view){ * auto d_col = cudf::column_device_view::create(col_view); * // Create a `DYNAMIC` optional iterator * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::DYNAMIC{}, - * has_nulls); + * col_view.has_nulls()); * } * \endcode * From 5f69f545a0d7c9a150b6a08d5f81bc6a91286429 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 19 Apr 2021 13:14:45 -0400 Subject: [PATCH 15/16] Provide better example of DYNAMIC optional iterator --- cpp/include/cudf/detail/iterator.cuh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 65c3191b00b..113b7f53a9e 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -184,11 +184,12 @@ auto make_null_replacement_iterator(column_device_view const& column, * * \code{.cpp} * template - * void some_function(cudf::column_view const& col_view, bool has_nulls){ + * void some_function(cudf::column_view const& col_view){ * auto d_col = cudf::column_device_view::create(col_view); * // Create a `DYNAMIC` optional iterator * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, - * cudf::contains_nulls::DYNAMIC{}, has_nulls); + * cudf::contains_nulls::DYNAMIC{}, + * col_view.has_nulls()); * } * \endcode * From 6437e448d14a9b5a32234afb7459d39be6a3d3ed Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 19 Apr 2021 14:01:13 -0400 Subject: [PATCH 16/16] Update optional_iterator documentation --- cpp/include/cudf/detail/iterator.cuh | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 113b7f53a9e..4cb0c6e1877 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -198,9 +198,8 @@ auto make_null_replacement_iterator(column_device_view const& column, * @throws cudf::logic_error if column datatype and Element type mismatch. * * @tparam Element The type of elements in the column - * @tparam MODE The null_mode of the iterator * @param column The column to iterate - * @return auto Iterator that returns valid column elements, and validity of the + * @return Iterator that returns valid column elements and the validity of the * element in a thrust::optional */ template @@ -244,7 +243,6 @@ auto make_optional_iterator(column_device_view const& column, * @throws cudf::logic_error if column datatype and Element type mismatch. * * @tparam Element The type of elements in the column - * @tparam mode The has_nulls mode of the iterator * @param column The column to iterate * @return Iterator that returns column elements and the validity of the * element as a thrust::optional @@ -287,9 +285,8 @@ auto make_optional_iterator(column_device_view const& column, contains_nulls::YE * @throws cudf::logic_error if column datatype and Element type mismatch. * * @tparam Element The type of elements in the column - * @tparam mode The has_nulls mode of the iterator * @param column The column to iterate - * @return auto Iterator that returns column elements, and validity of the + * @return Iterator that returns column elements and the validity of the * element in a thrust::optional */ template @@ -655,7 +652,6 @@ struct scalar_representation_pair_accessor : public scalar_value_accessor @@ -766,7 +762,7 @@ auto inline make_optional_iterator(scalar const& scalar_value, contains_nulls::Y * * @tparam Element The type of elements in the scalar * @param scalar_value The scalar to iterate - * @return auto Iterator that returns scalar elements, and validity of the + * @return Iterator that returns scalar elements and the validity of the * element in a thrust::optional */ template