diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index a842e51c94a..5f42823afe4 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -31,6 +31,7 @@ #include #include +#include #include @@ -40,6 +41,28 @@ */ namespace cudf { + +/** + * @brief Policy for what assumptions the optional iterator has about null values + * + * - `YES` means that the column supports nulls and has null values, therefore + * the optional might not contain a value + * + * - `NO` means that the column has no null values, therefore the optional will + * always have a value + * + * - `DYNAMIC` defers the assumption of nullability to runtime with the users stating + * on construction of the iterator if column has nulls. + */ +namespace contains_nulls { +struct YES { +}; +struct NO { +}; +struct DYNAMIC { +}; +} // namespace contains_nulls + namespace detail { /** * @brief An immutable, non-owning view of device data as a column of elements @@ -255,10 +278,11 @@ class alignas(16) column_device_view_base { : std::true_type { }; }; - // Forward declaration template struct value_accessor; +template +struct optional_accessor; template struct pair_accessor; template @@ -484,6 +508,13 @@ class alignas(16) column_device_view : public detail::column_device_view_base { return const_iterator{count_it{size()}, detail::value_accessor{*this}}; } + /** + * @brief optional iterator for navigating this column + */ + template + using const_optional_iterator = + thrust::transform_iterator, count_it>; + /** * @brief Pair iterator for navigating this column */ @@ -500,6 +531,124 @@ class alignas(16) column_device_view : public detail::column_device_view_base { using const_pair_rep_iterator = thrust::transform_iterator, count_it>; + /** + * @brief Return an optional iterator to the first element of the column. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * optional_begin with mode `DYNAMIC` defers the assumption of nullability to + * runtime, with the user stating on construction of the iterator if column has nulls. + * `DYNAMIC` mode is nice when an algorithm is going to execute on mutliple + * iterators and you don't want to compile all the combinations of iterator types + * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view){ + * auto d_col = cudf::column_device_view::create(col_view); + * // Create a `DYNAMIC` optional iterator + * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::DYNAMIC{}, + * col_view.has_nulls()); + * } + * \endcode + * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * + * @throws cudf::logic_error if the column is not nullable, and `DYNAMIC` mode used and + * the user has stated nulls exist + * @throws cudf::logic_error if column datatype and Element type mismatch. + */ + template ())> + auto optional_begin(contains_nulls::DYNAMIC, bool has_nulls) const + { + return const_optional_iterator{ + count_it{0}, detail::optional_accessor{*this, has_nulls}}; + } + + /** + * @brief Return an optional iterator to the first element of the column. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * optional_begin with mode `YES` means that the column supports nulls and + * potentially has null values, therefore the optional might not contain a value + * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view){ + * auto d_col = cudf::column_device_view::create(col_view); + * if constexpr(has_nulls) { + * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::YES{}); + * //use optional_iterator + * } else { + * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::NO{}); + * //use optional_iterator + * } + * } + * \endcode + * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * + * @throws cudf::logic_error if the column is not nullable, and `YES` mode used + * @throws cudf::logic_error if column datatype and Element type mismatch. + */ + template ())> + auto optional_begin(contains_nulls::YES) const + { + return const_optional_iterator{ + count_it{0}, detail::optional_accessor{*this}}; + } + + /** + * @brief Return an optional iterator to the first element of the column. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * optional_begin with mode `NO` means that the column has no null values, + * therefore the optional will always contain a value. + * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view){ + * auto d_col = cudf::column_device_view::create(col_view); + * if constexpr(has_nulls) { + * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::YES{}); + * //use optional_iterator + * } else { + * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::NO{}); + * //use optional_iterator + * } + * } + * \endcode + * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * + * @throws cudf::logic_error if column datatype and Element type mismatch. + */ + template ())> + auto optional_begin(contains_nulls::NO) const + { + return const_optional_iterator{ + count_it{0}, detail::optional_accessor{*this}}; + } + /** * @brief Return a pair iterator to the first element of the column. * @@ -558,6 +707,63 @@ class alignas(16) column_device_view : public detail::column_device_view_base { detail::pair_rep_accessor{*this}}; } + /** + * @brief Return an optional iterator to the element following the last element of + * the column. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * + * @throws cudf::logic_error if the column is not nullable, and `DYNAMIC` mode used and + * the user has stated nulls exist + * @throws cudf::logic_error if column datatype and Element type mismatch. + */ + template ())> + auto optional_end(contains_nulls::DYNAMIC, bool has_nulls) const + { + return const_optional_iterator{ + count_it{size()}, detail::optional_accessor{*this, has_nulls}}; + } + + /** + * @brief Return an optional iterator to the element following the last element of + * the column. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * + * @throws cudf::logic_error if the column is not nullable, and `YES` mode used + * @throws cudf::logic_error if column datatype and Element type mismatch. + */ + template ())> + auto optional_end(contains_nulls::YES) const + { + return const_optional_iterator{ + count_it{size()}, detail::optional_accessor{*this}}; + } + + /** + * @brief Return an optional iterator to the element following the last element of + * the column. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * This function does not participate in overload resolution if + * `column_device_view::has_element_accessor()` is false. + * + * @throws cudf::logic_error if column datatype and Element type mismatch. + */ + template ())> + auto optional_end(contains_nulls::NO) const + { + return const_optional_iterator{ + count_it{size()}, detail::optional_accessor{*this}}; + } + /** * @brief Return a pair iterator to the element following the last element of * the column. @@ -999,6 +1205,82 @@ struct value_accessor { __device__ T operator()(cudf::size_type i) const { return col.element(i); } }; +/** + * @brief optional accessor of a column + * + * + * The optional_accessor always returns a thrust::optional of column[i]. The validity + * of the optional is determined by the contains_nulls_mode template parameter + * which has the following modes: + * + * - `YES` means that the column supports nulls and has null values, therefore + * the optional might be valid or invalid + * + * - `NO` the user has attested that the column has no null values, + * no checks will occur and `thrust::optional{column[i]}` will be + * return for each `i`. + * + * - `DYNAMIC` defers the assumption of nullability to runtime with the users stating + * on construction of the iterator if column has nulls. + * When `with_nulls=true` the return value validity will be determined if column[i] + * is not null. + * When `with_nulls=false` the return value will always be valid + * + * @throws cudf::logic_error if column datatype and template T type mismatch. + * @throws cudf::logic_error if the column is not nullable, and `with_nulls=true` + * + * + * @tparam T The type of elements in the column + * @tparam contains_nulls_mode Specifies if nulls are checked at runtime or compile time. + */ +template +struct optional_accessor { + column_device_view const col; ///< column view of column in device + + /** + * @brief constructor + * @param[in] _col column device view of cudf column + */ + optional_accessor(column_device_view const& _col) : col{_col} + { + CUDF_EXPECTS(type_id_matches_device_storage_type(col.type().id()), "the data type mismatch"); + } + + CUDA_DEVICE_CALLABLE + thrust::optional operator()(cudf::size_type i) const + { + if constexpr (std::is_same_v) { + return (col.is_valid_nocheck(i)) ? thrust::optional{col.element(i)} + : thrust::optional{thrust::nullopt}; + } + return thrust::optional{col.element(i)}; + } +}; + +template +struct optional_accessor { + column_device_view const col; ///< column view of column in device + bool has_nulls; + + /** + * @brief constructor + * @param[in] _col column device view of cudf column + */ + optional_accessor(column_device_view const& _col, bool with_nulls) + : col{_col}, has_nulls{with_nulls} + { + CUDF_EXPECTS(type_id_matches_device_storage_type(col.type().id()), "the data type mismatch"); + if (with_nulls) { CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column."); } + } + + CUDA_DEVICE_CALLABLE + thrust::optional operator()(cudf::size_type i) const + { + return (has_nulls and col.is_null_nocheck(i)) ? thrust::optional{thrust::nullopt} + : thrust::optional{col.element(i)}; + } +}; + /** * @brief pair accessor of column with/without null bitmask * A unary functor returns pair with scalar value at `id` and boolean validity diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 881afa63ca5..4cb0c6e1877 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -167,6 +167,134 @@ auto make_null_replacement_iterator(column_device_view const& column, 0, null_replaced_value_accessor{column, null_replacement, has_nulls}); } +/** + * @brief Constructs an optional iterator over a column's values and its validity. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * make_optional_iterator with mode `DYNAMIC` defers the assumption of nullability to + * runtime, with the user stating on construction of the iterator if column has nulls. + * `DYNAMIC` mode is nice when an algorithm is going to execute on mutliple + * iterators and you don't want to compile all the combinations of iterator types + * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view){ + * auto d_col = cudf::column_device_view::create(col_view); + * // Create a `DYNAMIC` optional iterator + * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::DYNAMIC{}, + * col_view.has_nulls()); + * } + * \endcode + * + * @throws cudf::logic_error if the column is not nullable, and `DYNAMIC` mode used and + * the user has stated nulls exist + * @throws cudf::logic_error if column datatype and Element type mismatch. + * + * @tparam Element The type of elements in the column + * @param column The column to iterate + * @return Iterator that returns valid column elements and the validity of the + * element in a thrust::optional + */ +template +auto make_optional_iterator(column_device_view const& column, + contains_nulls::DYNAMIC, + bool has_nulls) +{ + return column.optional_begin(contains_nulls::DYNAMIC{}, has_nulls); +} + +/** + * @brief Constructs an optional iterator over a column's values and its validity. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * make_optional_iterator with mode `YES` means that the column supports nulls and + * potentially has null values, therefore the optional might not contain a value + * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view){ + * auto d_col = cudf::column_device_view::create(col_view); + * if constexpr(has_nulls) { + * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::YES{}); + * //use optional_iterator + * } else { + * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::NO{}); + * //use optional_iterator + * } + * } + * \endcode + * + * @throws cudf::logic_error if the column is not nullable, and `YES` mode used + * @throws cudf::logic_error if column datatype and Element type mismatch. + * + * @tparam Element The type of elements in the column + * @param column The column to iterate + * @return Iterator that returns column elements and the validity of the + * element as a thrust::optional + */ +template +auto make_optional_iterator(column_device_view const& column, contains_nulls::YES) +{ + return column.optional_begin(contains_nulls::YES{}); +} + +/** + * @brief Constructs an optional iterator over a column's values and its validity. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * make_optional_iterator with mode `NO` means that the column has no null values, + * therefore the optional will always contain a value. + * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view){ + * auto d_col = cudf::column_device_view::create(col_view); + * if constexpr(has_nulls) { + * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::YES{}); + * //use optional_iterator + * } else { + * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::NO{}); + * //use optional_iterator + * } + * } + * \endcode + * + * @throws cudf::logic_error if column datatype and Element type mismatch. + * + * @tparam Element The type of elements in the column + * @param column The column to iterate + * @return Iterator that returns column elements and the validity of the + * element in a thrust::optional + */ +template +auto make_optional_iterator(column_device_view const& column, contains_nulls::NO) +{ + return column.optional_begin(contains_nulls::NO{}); +} + /** * @brief Constructs a pair iterator over a column's values and its validity. * @@ -320,6 +448,81 @@ auto inline make_scalar_iterator(scalar const& scalar_value) scalar_value_accessor{scalar_value}); } +template +struct scalar_optional_accessor; + +/** + * @brief optional accessor of a maybe-nullable scalar + * + * The scalar_optional_accessor always returns a thrust::optional of the scalar. + * The validity of the optional is determined by the contains_nulls_mode template parameter + * which has the following modes: + * + * `DYNAMIC`: Defer nullability checks to runtime + * + * - When `with_nulls=true` the return value will be a `thrust::optional{scalar}` + * when scalar is valid, and `thrust::optional{}` when the scalar is invalid. + * + * - When `with_nulls=false` the return value will always be `thrust::optional{scalar}` + * + * `NO`: No null values will occur for this scalar, no checks will occur + * and `thrust::optional{scalar}` will always be returned. + * + * `YES`: null values will occur for this scalar, + * and `thrust::optional{scalar}` will always be returned. + * + * @throws `cudf::logic_error` if scalar datatype and Element type mismatch. + * + * @tparam Element The type of return type of functor + */ +template +struct scalar_optional_accessor : public scalar_value_accessor { + using super_t = scalar_value_accessor; + using value_type = thrust::optional; + + scalar_optional_accessor(scalar const& scalar_value) + : scalar_value_accessor(scalar_value) + { + } + + /** + * @brief returns a thrust::optional. + * + * @throw `cudf::logic_error` if this function is called in host. + * + * @return a thrust::optional for the scalar value. + */ + CUDA_HOST_DEVICE_CALLABLE + const value_type operator()(size_type) const + { + if constexpr (std::is_same_v) { + return (super_t::dscalar.is_valid()) ? Element{super_t::dscalar.value()} + : value_type{thrust::nullopt}; + } + return Element{super_t::dscalar.value()}; + } +}; + +template +struct scalar_optional_accessor + : public scalar_value_accessor { + using super_t = scalar_value_accessor; + using value_type = thrust::optional; + bool has_nulls; + + scalar_optional_accessor(scalar const& scalar_value, bool with_nulls) + : scalar_value_accessor(scalar_value), has_nulls{with_nulls} + { + } + + CUDA_HOST_DEVICE_CALLABLE + const value_type operator()(size_type) const + { + return (has_nulls and !super_t::dscalar.is_valid()) ? value_type{thrust::nullopt} + : Element{super_t::dscalar.value()}; + } +}; + /** * @brief pair accessor for scalar. * The unary functor returns a pair of data of Element type and bool validity of the scalar. @@ -415,6 +618,163 @@ struct scalar_representation_pair_accessor : public scalar_value_accessor`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * The iterator behavior is undefined if the scalar is destroyed before iterator dereferencing. + * + * make_optional_iterator with mode `DYNAMIC` defers the assumption of nullability to + * runtime, with the user stating on construction of the iterator if scalar has nulls. + * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view, + * scalar const& scalar_value, + * bool col_has_nulls){ + * auto d_col = cudf::column_device_view::create(col_view); + * auto column_iterator = cudf::detail::make_optional_iterator(d_col, + cudf::contains_nulls::DYNAMIC{}, col_has_nulls); + * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, + cudf::contains_nulls::DYNAMIC{}, scalar_value.is_valid()); + * //use iterators + * } + * \endcode + * + * @throws cudf::logic_error if the scalar is not nullable, and `DYNAMIC` mode used and + * the user has stated nulls exist + * @throws cudf::logic_error if scalar datatype and Element type mismatch. + * + * @tparam Element The type of elements in the scalar + * @tparam has_nulls If the scalar value will have a null at runtime + * @param scalar_value The scalar to iterate + * @return Iterator that returns scalar elements and validity of the + * element in a thrust::optional + */ +template +auto inline make_optional_iterator(scalar const& scalar_value, + contains_nulls::DYNAMIC, + bool has_nulls) +{ + CUDF_EXPECTS(type_id_matches_device_storage_type(scalar_value.type().id()), + "the data type mismatch"); + return thrust::make_transform_iterator( + thrust::make_constant_iterator(0), + scalar_optional_accessor{scalar_value, has_nulls}); +} + +/** + * @brief Constructs an optional iterator over a scalar's values and its validity. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * The iterator behavior is undefined if the scalar is destroyed before iterator dereferencing. + * + * make_optional_iterator ith mode `YES` means that the scalar supports nulls and + * potentially has null values, therefore the optional might not contain a value + * therefore the optional will always contain a value. + * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view, scalar const& scalar_value){ + * auto d_col = cudf::column_device_view::create(col_view); + * if constexpr(any_nulls) { + * auto column_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::YES{}); + * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, + * cudf::contains_nulls::YES{}); + * //use iterators + * } else { + * auto column_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::NO{}); + * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, + * cudf::contains_nulls::NO{}); + * //use iterators + * } + * } + * \endcode + * + * @throws cudf::logic_error if the scalar is not nullable, and `YES` mode used + * @throws cudf::logic_error if scalar datatype and Element type mismatch. + * + * @tparam Element The type of elements in the scalar + * @param scalar_value The scalar to iterate + * @return Iterator that returns scalar elements and the validity of the + * element in a thrust::optional + */ +template +auto inline make_optional_iterator(scalar const& scalar_value, contains_nulls::YES) +{ + CUDF_EXPECTS(type_id_matches_device_storage_type(scalar_value.type().id()), + "the data type mismatch"); + return thrust::make_transform_iterator( + thrust::make_constant_iterator(0), + scalar_optional_accessor{scalar_value}); +} + +/** + * @brief Constructs an optional iterator over a scalar's values and its validity. + * + * Dereferencing the returned iterator returns a `thrust::optional`. + * + * When the element of an iterator contextually converted to bool, the conversion returns true + * if the object contains a value and false if it does not contain a value. + * + * The iterator behavior is undefined if the scalar is destroyed before iterator dereferencing. + * + * make_optional_iterator with mode `NO` means that the scalar has no null values, + * therefore the optional will always contain a value. + * + * Example: + * + * \code{.cpp} + * template + * void some_function(cudf::column_view const& col_view, scalar const& scalar_value){ + * auto d_col = cudf::column_device_view::create(col_view); + * if constexpr(any_nulls) { + * auto column_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::YES{}); + * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, + * cudf::contains_nulls::YES{}); + * //use iterators + * } else { + * auto column_iterator = cudf::detail::make_optional_iterator(d_col, + * cudf::contains_nulls::NO{}); + * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, + * cudf::contains_nulls::NO{}); + * //use iterators + * } + * } + * \endcode + * + * @throws cudf::logic_error if scalar datatype and Element type mismatch. + * + * @tparam Element The type of elements in the scalar + * @param scalar_value The scalar to iterate + * @return Iterator that returns scalar elements and the validity of the + * element in a thrust::optional + */ +template +auto inline make_optional_iterator(scalar const& scalar_value, contains_nulls::NO) +{ + CUDF_EXPECTS(type_id_matches_device_storage_type(scalar_value.type().id()), + "the data type mismatch"); + return thrust::make_transform_iterator( + thrust::make_constant_iterator(0), + scalar_optional_accessor{scalar_value}); +} + /** * @brief Constructs a constant device pair iterator over a scalar's value and its validity. * diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 342ec9145fd..54c808eb4fb 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -230,7 +230,9 @@ ConfigureTest(SPAN_TEST utilities_tests/span_tests.cu) ConfigureTest(ITERATOR_TEST iterator/value_iterator_test.cu iterator/pair_iterator_test.cu - iterator/scalar_iterator_test.cu) + iterator/scalar_iterator_test.cu + iterator/optional_iterator_test.cu + ) ################################################################################################### # - device atomics tests -------------------------------------------------------------------------- diff --git a/cpp/tests/iterator/optional_iterator_test.cu b/cpp/tests/iterator/optional_iterator_test.cu new file mode 100644 index 00000000000..460f692ee14 --- /dev/null +++ b/cpp/tests/iterator/optional_iterator_test.cu @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ +#include +#include + +// to print meanvar for debug. +template +std::ostream& operator<<(std::ostream& os, cudf::meanvar const& rhs) +{ + return os << "[" << rhs.value << ", " << rhs.value_squared << ", " << rhs.count << "] "; +}; + +// Transformers and Operators for optional_iterator test +template +struct transformer_optional_meanvar { + using ResultType = thrust::optional>; + + CUDA_HOST_DEVICE_CALLABLE + ResultType operator()(thrust::optional const& optional) + { + if (optional.has_value()) { + auto v = *optional; + return cudf::meanvar{v, static_cast(v * v), 1}; + } + return thrust::nullopt; + } +}; + +struct sum_if_not_null { + template + CUDA_HOST_DEVICE_CALLABLE thrust::optional operator()(const thrust::optional& lhs, + const thrust::optional& rhs) + { + return lhs.value_or(T{0}) + rhs.value_or(T{0}); + } +}; + +template +struct OptionalIteratorTest : public cudf::test::BaseFixture { +}; +TYPED_TEST_CASE(OptionalIteratorTest, cudf::test::NumericTypes); +// TODO: enable this test also at __CUDACC_DEBUG__ +// This test causes fatal compilation error only at device debug mode. +// Workaround: exclude this test only at device debug mode. +#if !defined(__CUDACC_DEBUG__) +// This test computes `count`, `sum`, `sum_of_squares` at a single reduction call. +// It would be useful for `var`, `std` operation +TYPED_TEST(OptionalIteratorTest, mean_var_output) +{ + using T = TypeParam; + using T_output = cudf::meanvar; + transformer_optional_meanvar transformer{}; + + const int column_size{50}; + const T init{0}; + + // data and valid arrays + std::vector host_values(column_size); + std::vector host_bools(column_size); + + cudf::test::UniformRandomGenerator rng; + cudf::test::UniformRandomGenerator rbg; + std::generate(host_values.begin(), host_values.end(), [&rng]() { return rng.generate(); }); + std::generate(host_bools.begin(), host_bools.end(), [&rbg]() { return rbg.generate(); }); + + cudf::test::fixed_width_column_wrapper w_col( + host_values.begin(), host_values.end(), host_bools.begin()); + auto d_col = cudf::column_device_view::create(w_col); + + // calculate expected values by CPU + T_output expected_value; + + expected_value.count = d_col->size() - static_cast(w_col).null_count(); + + std::vector replaced_array(d_col->size()); + std::transform(host_values.begin(), + host_values.end(), + host_bools.begin(), + replaced_array.begin(), + [&](T x, bool b) { return (b) ? static_cast(x) : init; }); + + expected_value.count = d_col->size() - static_cast(w_col).null_count(); + expected_value.value = std::accumulate(replaced_array.begin(), replaced_array.end(), T{0}); + expected_value.value_squared = std::accumulate( + replaced_array.begin(), replaced_array.end(), T{0}, [](T acc, T i) { return acc + i * i; }); + + std::cout << "expected = " << expected_value << std::endl; + + // GPU test + auto it_dev = d_col->optional_begin(cudf::contains_nulls::YES{}); + auto it_dev_squared = thrust::make_transform_iterator(it_dev, transformer); + auto result = thrust::reduce(it_dev_squared, + it_dev_squared + d_col->size(), + thrust::optional{T_output{}}, + sum_if_not_null{}); + if (not std::is_floating_point()) { + EXPECT_EQ(expected_value, *result) << "optional iterator reduction sum"; + } else { + EXPECT_NEAR(expected_value.value, result->value, 1e-3) << "optional iterator reduction sum"; + EXPECT_NEAR(expected_value.value_squared, result->value_squared, 1e-3) + << "optional iterator reduction sum squared"; + EXPECT_EQ(expected_value.count, result->count) << "optional iterator reduction count"; + } +} +#endif + +using TestingTypes = cudf::test::AllTypes; + +TYPED_TEST_CASE(IteratorTest, TestingTypes); + +TYPED_TEST(IteratorTest, nonull_optional_iterator) +{ + using T = TypeParam; + // data and valid arrays + auto host_values_std = + cudf::test::make_type_param_vector({0, 6, 0, -14, 13, 64, -13, -20, 45}); + thrust::host_vector host_values(host_values_std); + + // create a column + cudf::test::fixed_width_column_wrapper w_col(host_values.begin(), host_values.end()); + auto d_col = cudf::column_device_view::create(w_col); + + // calculate the expected value by CPU. + thrust::host_vector> replaced_array(host_values.size()); + std::transform(host_values.begin(), host_values.end(), replaced_array.begin(), [](auto s) { + return thrust::optional{s}; + }); + + // GPU test + this->iterator_test_thrust( + replaced_array, + cudf::detail::make_optional_iterator(*d_col, cudf::contains_nulls::DYNAMIC{}, false), + host_values.size()); + this->iterator_test_thrust( + replaced_array, + cudf::detail::make_optional_iterator(*d_col, cudf::contains_nulls::NO{}), + host_values.size()); +} + +TYPED_TEST(IteratorTest, null_optional_iterator) +{ + using T = TypeParam; + // data and valid arrays + auto host_values = cudf::test::make_type_param_vector({0, 6, 0, -14, 13, 64, -13, -20, 45}); + thrust::host_vector host_bools(std::vector({1, 1, 0, 1, 1, 1, 0, 1, 1})); + + // create a column with bool vector + cudf::test::fixed_width_column_wrapper w_col( + host_values.begin(), host_values.end(), host_bools.begin()); + auto d_col = cudf::column_device_view::create(w_col); + + // calculate the expected value by CPU. + thrust::host_vector> optional_values(host_values.size()); + std::transform(host_values.begin(), + host_values.end(), + host_bools.begin(), + optional_values.begin(), + [](auto s, bool b) { return b ? thrust::optional{s} : thrust::optional{}; }); + + thrust::host_vector> value_all_valid(host_values.size()); + std::transform(host_values.begin(), + host_values.end(), + host_bools.begin(), + value_all_valid.begin(), + [](auto s, bool b) { return thrust::optional{s}; }); + + // GPU test for correct null mapping + this->iterator_test_thrust(optional_values, + d_col->optional_begin(cudf::contains_nulls::DYNAMIC{}, true), + host_values.size()); + + this->iterator_test_thrust( + optional_values, d_col->optional_begin(cudf::contains_nulls::YES{}), host_values.size()); + this->iterator_test_thrust( + optional_values, d_col->optional_begin(cudf::contains_nulls::YES{}), host_values.size()); + + // GPU test for ignoring null mapping + this->iterator_test_thrust(value_all_valid, + d_col->optional_begin(cudf::contains_nulls::DYNAMIC{}, false), + host_values.size()); + + this->iterator_test_thrust( + value_all_valid, d_col->optional_begin(cudf::contains_nulls::NO{}), host_values.size()); + this->iterator_test_thrust( + value_all_valid, d_col->optional_begin(cudf::contains_nulls::NO{}), host_values.size()); +}