diff --git a/cpp/benchmarks/groupby/group_sum_benchmark.cu b/cpp/benchmarks/groupby/group_sum_benchmark.cu index f64022690d9..0e9f5061a1a 100644 --- a/cpp/benchmarks/groupby/group_sum_benchmark.cu +++ b/cpp/benchmarks/groupby/group_sum_benchmark.cu @@ -44,7 +44,6 @@ void BM_basic_sum(benchmark::State& state) { using wrapper = cudf::test::fixed_width_column_wrapper; - // const cudf::size_type num_columns{(cudf::size_type)state.range(0)}; const cudf::size_type column_size{(cudf::size_type)state.range(0)}; auto data_it = cudf::detail::make_counting_transform_iterator( @@ -53,7 +52,7 @@ void BM_basic_sum(benchmark::State& state) wrapper keys(data_it, data_it + column_size); wrapper vals(data_it, data_it + column_size); - cudf::groupby::groupby gb_obj(cudf::table_view({keys})); + cudf::groupby::groupby gb_obj(cudf::table_view({keys, keys, keys})); std::vector requests; requests.emplace_back(cudf::groupby::aggregation_request()); @@ -73,7 +72,9 @@ BENCHMARK_REGISTER_F(Groupby, Basic) ->UseManualTime() ->Unit(benchmark::kMillisecond) ->Arg(10000) - ->Arg(10000000); + ->Arg(1000000) + ->Arg(10000000) + ->Arg(100000000); void BM_pre_sorted_sum(benchmark::State& state) { diff --git a/cpp/benchmarks/hashing/hash_benchmark.cpp b/cpp/benchmarks/hashing/hash_benchmark.cpp index 77b10399693..4ccb0bfad9d 100644 --- a/cpp/benchmarks/hashing/hash_benchmark.cpp +++ b/cpp/benchmarks/hashing/hash_benchmark.cpp @@ -25,10 +25,14 @@ class HashBenchmark : public cudf::benchmark { }; -static void BM_hash(benchmark::State& state, cudf::hash_id hid) +enum contains_nulls { no_nulls, nulls }; + +static void BM_hash(benchmark::State& state, cudf::hash_id hid, contains_nulls has_nulls) { cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; auto const data = create_random_table({cudf::type_id::INT64}, 1, row_count{n_rows}); + if (has_nulls == contains_nulls::no_nulls) + data->get_column(0).set_null_mask(rmm::device_buffer{}, 0); for (auto _ : state) { cuda_event_timer raii(state, true, rmm::cuda_stream_default); @@ -36,16 +40,25 @@ static void BM_hash(benchmark::State& state, cudf::hash_id hid) } } -#define HASH_BENCHMARK_DEFINE(name) \ - BENCHMARK_DEFINE_F(HashBenchmark, name) \ - (::benchmark::State & st) { BM_hash(st, cudf::hash_id::name); } \ - BENCHMARK_REGISTER_F(HashBenchmark, name) \ - ->RangeMultiplier(4) \ - ->Ranges({{1 << 14, 1 << 24}}) \ - ->UseManualTime() \ +#define concat(a, b, c) a##b##c + +#define H_BENCHMARK_DEFINE(name, hid, n) \ + BENCHMARK_DEFINE_F(HashBenchmark, name) \ + (::benchmark::State & st) { BM_hash(st, cudf::hash_id::hid, contains_nulls::n); } \ + BENCHMARK_REGISTER_F(HashBenchmark, name) \ + ->RangeMultiplier(4) \ + ->Ranges({{1 << 14, 1 << 24}}) \ + ->UseManualTime() \ ->Unit(benchmark::kMillisecond); -HASH_BENCHMARK_DEFINE(HASH_MURMUR3) -HASH_BENCHMARK_DEFINE(HASH_MD5) -HASH_BENCHMARK_DEFINE(HASH_SERIAL_MURMUR3) -HASH_BENCHMARK_DEFINE(HASH_SPARK_MURMUR3) +#define HASH_BENCHMARK_DEFINE(hid, n) H_BENCHMARK_DEFINE(concat(hid, _, n), hid, n) + +HASH_BENCHMARK_DEFINE(HASH_MURMUR3, nulls) +HASH_BENCHMARK_DEFINE(HASH_MD5, nulls) +HASH_BENCHMARK_DEFINE(HASH_SERIAL_MURMUR3, nulls) +HASH_BENCHMARK_DEFINE(HASH_SPARK_MURMUR3, nulls) + +HASH_BENCHMARK_DEFINE(HASH_MURMUR3, no_nulls) +HASH_BENCHMARK_DEFINE(HASH_MD5, no_nulls) +HASH_BENCHMARK_DEFINE(HASH_SERIAL_MURMUR3, no_nulls) +HASH_BENCHMARK_DEFINE(HASH_SPARK_MURMUR3, no_nulls) diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 6ecb0796283..a15f20ef52d 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -44,23 +44,30 @@ namespace cudf { /** - * @brief Policy for what assumptions the optional iterator has about null values + * @brief Indicates the presence of nulls at compile-time or runtime. * - * - `YES` means that the column supports nulls and has null values, therefore - * the optional might not contain a value + * If used at compile-time, this indicator can tell the optimizer + * to include or exclude any null-checking clauses. * - * - `NO` means that the column has no null values, therefore the optional will - * always have a value - * - * - `DYNAMIC` defers the assumption of nullability to runtime with the users stating - * on construction of the iterator if column has nulls. */ -struct contains_nulls { - struct YES { +struct nullate { + struct YES : std::bool_constant { }; - struct NO { + struct NO : std::bool_constant { }; struct DYNAMIC { + DYNAMIC() = delete; + /** + * @brief Create a runtime nullate object. + * + * @see cudf::column_device_view::optional_begin for example usage + * + * @param b True if nulls are expected in the operation in which this + * object is applied. + */ + constexpr explicit DYNAMIC(bool b) noexcept : value{b} {} + constexpr operator bool() const noexcept { return value; } + bool value; ///< True if nulls are expected }; }; @@ -282,7 +289,7 @@ class alignas(16) column_device_view_base { // Forward declaration template struct value_accessor; -template +template struct optional_accessor; template struct pair_accessor; @@ -493,11 +500,11 @@ class alignas(16) column_device_view : public detail::column_device_view_base { } /** - * @brief optional iterator for navigating this column + * @brief Optional iterator for navigating this column */ - template + template using const_optional_iterator = - thrust::transform_iterator, count_it>; + thrust::transform_iterator, count_it>; /** * @brief Pair iterator for navigating this column @@ -520,117 +527,57 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * * Dereferencing the returned iterator returns a `thrust::optional`. * - * When the element of an iterator contextually converted to bool, the conversion returns true + * The element of this iterator contextually converts to bool. The conversion returns true * if the object contains a value and false if it does not contain a value. * - * optional_begin with mode `DYNAMIC` defers the assumption of nullability to - * runtime, with the user stating on construction of the iterator if column has nulls. - * `DYNAMIC` mode is nice when an algorithm is going to execute on multiple - * iterators and you don't want to compile all the combinations of iterator types - * - * Example: + * Calling this method with `nullate::DYNAMIC` defers the assumption of nullability to + * runtime with the caller indicating if the column has nulls. The `nullate::DYNAMIC` is + * useful when an algorithm is going to execute on multiple iterators and all the combinations of + * iterator types are not required at compile time. * - * \code{.cpp} + * @code{.cpp} * template * void some_function(cudf::column_view const& col_view){ * auto d_col = cudf::column_device_view::create(col_view); * // Create a `DYNAMIC` optional iterator - * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::DYNAMIC{}, - * col_view.has_nulls()); - * } - * \endcode - * - * This function does not participate in overload resolution if - * `column_device_view::has_element_accessor()` is false. - * - * @throws cudf::logic_error if the column is not nullable, and `DYNAMIC` mode used and - * the user has stated nulls exist - * @throws cudf::logic_error if column datatype and Element type mismatch. - */ - template ())> - auto optional_begin(contains_nulls::DYNAMIC, bool has_nulls) const - { - return const_optional_iterator{ - count_it{0}, detail::optional_accessor{*this, has_nulls}}; - } - - /** - * @brief Return an optional iterator to the first element of the column. - * - * Dereferencing the returned iterator returns a `thrust::optional`. - * - * When the element of an iterator contextually converted to bool, the conversion returns true - * if the object contains a value and false if it does not contain a value. - * - * optional_begin with mode `YES` means that the column supports nulls and - * potentially has null values, therefore the optional might not contain a value - * - * Example: - * - * \code{.cpp} - * template - * void some_function(cudf::column_view const& col_view){ - * auto d_col = cudf::column_device_view::create(col_view); - * if constexpr(has_nulls) { - * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::YES{}); - * //use optional_iterator - * } else { - * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::NO{}); - * //use optional_iterator - * } + * auto optional_iterator = + * d_col->optional_begin(cudf::nullate::DYNAMIC{col_view.has_nulls()}); * } - * \endcode + * @endcode * - * This function does not participate in overload resolution if - * `column_device_view::has_element_accessor()` is false. + * Calling this method with `nullate::YES` means that the column supports nulls and + * the optional returned might not contain a value. * - * @throws cudf::logic_error if the column is not nullable, and `YES` mode used - * @throws cudf::logic_error if column datatype and Element type mismatch. - */ - template ())> - auto optional_begin(contains_nulls::YES) const - { - return const_optional_iterator{ - count_it{0}, detail::optional_accessor{*this}}; - } - - /** - * @brief Return an optional iterator to the first element of the column. + * Calling this method with `nullate::NO` means that the column has no null values + * and the optional returned will always contain a value. * - * Dereferencing the returned iterator returns a `thrust::optional`. - * - * When the element of an iterator contextually converted to bool, the conversion returns true - * if the object contains a value and false if it does not contain a value. - * - * optional_begin with mode `NO` means that the column has no null values, - * therefore the optional will always contain a value. - * - * Example: - * - * \code{.cpp} + * @code{.cpp} * template * void some_function(cudf::column_view const& col_view){ * auto d_col = cudf::column_device_view::create(col_view); * if constexpr(has_nulls) { - * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::YES{}); + * auto optional_iterator = d_col->optional_begin(cudf::nullate::YES{}); * //use optional_iterator * } else { - * auto optional_iterator = d_col->optional_begin(cudf::contains_nulls::NO{}); + * auto optional_iterator = d_col->optional_begin(cudf::nullate::NO{}); * //use optional_iterator * } * } - * \endcode + * @endcode * * This function does not participate in overload resolution if * `column_device_view::has_element_accessor()` is false. * + * @throws cudf::logic_error if the column is not nullable and `has_nulls` evaluates to true. * @throws cudf::logic_error if column datatype and Element type mismatch. */ - template ())> - auto optional_begin(contains_nulls::NO) const + template ())> + auto optional_begin(Nullate has_nulls) const { - return const_optional_iterator{ - count_it{0}, detail::optional_accessor{*this}}; + return const_optional_iterator{ + count_it{0}, detail::optional_accessor{*this, has_nulls}}; } /** @@ -695,57 +642,21 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * @brief Return an optional iterator to the element following the last element of * the column. * - * Dereferencing the returned iterator returns a `thrust::optional`. + * The returned iterator represents a `thrust::optional` element. * * This function does not participate in overload resolution if * `column_device_view::has_element_accessor()` is false. * - * @throws cudf::logic_error if the column is not nullable, and `DYNAMIC` mode used and - * the user has stated nulls exist + * @throws cudf::logic_error if the column is not nullable and `has_nulls` is true * @throws cudf::logic_error if column datatype and Element type mismatch. */ - template ())> - auto optional_end(contains_nulls::DYNAMIC, bool has_nulls) const - { - return const_optional_iterator{ - count_it{size()}, detail::optional_accessor{*this, has_nulls}}; - } - - /** - * @brief Return an optional iterator to the element following the last element of - * the column. - * - * Dereferencing the returned iterator returns a `thrust::optional`. - * - * This function does not participate in overload resolution if - * `column_device_view::has_element_accessor()` is false. - * - * @throws cudf::logic_error if the column is not nullable, and `YES` mode used - * @throws cudf::logic_error if column datatype and Element type mismatch. - */ - template ())> - auto optional_end(contains_nulls::YES) const - { - return const_optional_iterator{ - count_it{size()}, detail::optional_accessor{*this}}; - } - - /** - * @brief Return an optional iterator to the element following the last element of - * the column. - * - * Dereferencing the returned iterator returns a `thrust::optional`. - * - * This function does not participate in overload resolution if - * `column_device_view::has_element_accessor()` is false. - * - * @throws cudf::logic_error if column datatype and Element type mismatch. - */ - template ())> - auto optional_end(contains_nulls::NO) const + template ())> + auto optional_end(Nullate has_nulls) const { - return const_optional_iterator{ - count_it{size()}, detail::optional_accessor{*this}}; + return const_optional_iterator{ + count_it{size()}, detail::optional_accessor{*this, has_nulls}}; } /** @@ -1201,77 +1112,56 @@ struct value_accessor { * @brief optional accessor of a column * * - * The optional_accessor always returns a thrust::optional of column[i]. The validity - * of the optional is determined by the contains_nulls_mode template parameter - * which has the following modes: + * The optional_accessor always returns a `thrust::optional` of `column[i]`. The validity + * of the optional is determined by the `Nullate` parameter which may be one of the following: * - * - `YES` means that the column supports nulls and has null values, therefore - * the optional might be valid or invalid + * - `nullate::YES` means that the column supports nulls and the optional returned + * might be valid or invalid. * - * - `NO` the user has attested that the column has no null values, + * - `nullate::NO` means the caller attests that the column has no null values, * no checks will occur and `thrust::optional{column[i]}` will be * return for each `i`. * - * - `DYNAMIC` defers the assumption of nullability to runtime with the users stating - * on construction of the iterator if column has nulls. - * When `with_nulls=true` the return value validity will be determined if column[i] - * is not null. - * When `with_nulls=false` the return value will always be valid + * - `nullate::DYNAMIC` defers the assumption of nullability to runtime and the caller + * specifies if the column has nulls at runtime. + * For `DYNAMIC{true}` the return value will be `thrust::optional{column[i]}` if + * element `i` is not null and `thrust::optional{}` if element `i` is null. + * For `DYNAMIC{false}` the return value will always be `thrust::optional{column[i]}`. * * @throws cudf::logic_error if column datatype and template T type mismatch. - * @throws cudf::logic_error if the column is not nullable, and `with_nulls=true` - * + * @throws cudf::logic_error if the column is not nullable and `with_nulls` evaluates to true * * @tparam T The type of elements in the column - * @tparam contains_nulls_mode Specifies if nulls are checked at runtime or compile time. + * @tparam Nullate A cudf::nullate type describing how to check for nulls. */ -template +template struct optional_accessor { column_device_view const col; ///< column view of column in device /** - * @brief constructor - * @param[in] _col column device view of cudf column + * @brief Constructor + * + * @param col Column on which to iterator over its elements. + * @param with_nulls Indicates if the `col` should be checked for nulls. */ - optional_accessor(column_device_view const& _col) : col{_col} + optional_accessor(column_device_view const& _col, Nullate with_nulls) + : col{_col}, has_nulls{with_nulls} { CUDF_EXPECTS(type_id_matches_device_storage_type(col.type().id()), "the data type mismatch"); + if (with_nulls) { CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column."); } } CUDA_DEVICE_CALLABLE thrust::optional operator()(cudf::size_type i) const { - if constexpr (std::is_same_v) { + if (has_nulls) { return (col.is_valid_nocheck(i)) ? thrust::optional{col.element(i)} : thrust::optional{thrust::nullopt}; } return thrust::optional{col.element(i)}; } -}; - -template -struct optional_accessor { - column_device_view const col; ///< column view of column in device - bool has_nulls; - - /** - * @brief constructor - * @param[in] _col column device view of cudf column - * @param[in] with_nulls Indicates if @p _col has nulls - */ - optional_accessor(column_device_view const& _col, bool with_nulls) - : col{_col}, has_nulls{with_nulls} - { - CUDF_EXPECTS(type_id_matches_device_storage_type(col.type().id()), "the data type mismatch"); - if (with_nulls) { CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column."); } - } - CUDA_DEVICE_CALLABLE - thrust::optional operator()(cudf::size_type i) const - { - return (has_nulls and col.is_null_nocheck(i)) ? thrust::optional{thrust::nullopt} - : thrust::optional{col.element(i)}; - } + Nullate has_nulls{}; }; /** diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 3e789299716..01742384972 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -171,127 +171,61 @@ auto make_null_replacement_iterator(column_device_view const& column, * * Dereferencing the returned iterator returns a `thrust::optional`. * - * When the element of an iterator contextually converted to bool, the conversion returns true + * The element of this iterator contextually converts to bool. The conversion returns true * if the object contains a value and false if it does not contain a value. * - * make_optional_iterator with mode `DYNAMIC` defers the assumption of nullability to - * runtime, with the user stating on construction of the iterator if column has nulls. - * `DYNAMIC` mode is nice when an algorithm is going to execute on multiple - * iterators and you don't want to compile all the combinations of iterator types + * Calling this function with `nullate::DYNAMIC` defers the assumption + * of nullability to runtime with the caller indicating if the column has nulls. + * This is useful when an algorithm is going to execute on multiple iterators and all + * the combinations of iterator types are not required at compile time. * - * Example: - * - * \code{.cpp} + * @code{.cpp} * template * void some_function(cudf::column_view const& col_view){ * auto d_col = cudf::column_device_view::create(col_view); * // Create a `DYNAMIC` optional iterator - * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, - * cudf::contains_nulls::DYNAMIC{}, - * col_view.has_nulls()); + * auto optional_iterator = + * cudf::detail::make_optional_iterator( + * d_col, cudf::nullate::DYNAMIC{col_view.has_nulls()}); * } - * \endcode - * - * @throws cudf::logic_error if the column is not nullable, and `DYNAMIC` mode used and - * the user has stated nulls exist - * @throws cudf::logic_error if column datatype and Element type mismatch. - * - * @tparam Element The type of elements in the column - * @param column The column to iterate - * @return Iterator that returns valid column elements and the validity of the - * element in a thrust::optional - */ -template -auto make_optional_iterator(column_device_view const& column, - contains_nulls::DYNAMIC, - bool has_nulls) -{ - return column.optional_begin(contains_nulls::DYNAMIC{}, has_nulls); -} - -/** - * @brief Constructs an optional iterator over a column's values and its validity. - * - * Dereferencing the returned iterator returns a `thrust::optional`. - * - * When the element of an iterator contextually converted to bool, the conversion returns true - * if the object contains a value and false if it does not contain a value. - * - * make_optional_iterator with mode `YES` means that the column supports nulls and - * potentially has null values, therefore the optional might not contain a value + * @endcode * - * Example: + * Calling this function with `nullate::YES` means that the column supports + * nulls and the optional returned might not contain a value. + * Calling this function with `nullate::NO` means that the column has no + * null values and the optional returned will always contain a value. * - * \code{.cpp} + * @code{.cpp} * template * void some_function(cudf::column_view const& col_view){ * auto d_col = cudf::column_device_view::create(col_view); * if constexpr(has_nulls) { - * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, - * cudf::contains_nulls::YES{}); + * auto optional_iterator = + * cudf::detail::make_optional_iterator(d_col, cudf::nullate::YES{}); * //use optional_iterator * } else { - * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, - * cudf::contains_nulls::NO{}); + * auto optional_iterator = + * cudf::detail::make_optional_iterator(d_col, cudf::nullate::NO{}); * //use optional_iterator * } * } - * \endcode + * @endcode * - * @throws cudf::logic_error if the column is not nullable, and `YES` mode used + * @throws cudf::logic_error if the column is not nullable and `has_nulls` is true. * @throws cudf::logic_error if column datatype and Element type mismatch. * - * @tparam Element The type of elements in the column - * @param column The column to iterate - * @return Iterator that returns column elements and the validity of the - * element as a thrust::optional - */ -template -auto make_optional_iterator(column_device_view const& column, contains_nulls::YES) -{ - return column.optional_begin(contains_nulls::YES{}); -} - -/** - * @brief Constructs an optional iterator over a column's values and its validity. - * - * Dereferencing the returned iterator returns a `thrust::optional`. - * - * When the element of an iterator contextually converted to bool, the conversion returns true - * if the object contains a value and false if it does not contain a value. - * - * make_optional_iterator with mode `NO` means that the column has no null values, - * therefore the optional will always contain a value. - * - * Example: - * - * \code{.cpp} - * template - * void some_function(cudf::column_view const& col_view){ - * auto d_col = cudf::column_device_view::create(col_view); - * if constexpr(has_nulls) { - * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, - * cudf::contains_nulls::YES{}); - * //use optional_iterator - * } else { - * auto optional_iterator = cudf::detail::make_optional_iterator(d_col, - * cudf::contains_nulls::NO{}); - * //use optional_iterator - * } - * } - * \endcode - * - * @throws cudf::logic_error if column datatype and Element type mismatch. + * @tparam Element The type of elements in the column. + * @tparam Nullate A cudf::nullate type describing how to check for nulls. * - * @tparam Element The type of elements in the column * @param column The column to iterate - * @return Iterator that returns column elements and the validity of the - * element in a thrust::optional + * @param has_nulls Indicates whether `column` is checked for nulls. + * @return Iterator that returns valid column elements and the validity of the + * element in a `thrust::optional` */ -template -auto make_optional_iterator(column_device_view const& column, contains_nulls::NO) +template +auto make_optional_iterator(column_device_view const& column, Nullate has_nulls) { - return column.optional_begin(contains_nulls::NO{}); + return column.optional_begin(has_nulls); } /** @@ -447,40 +381,38 @@ auto inline make_scalar_iterator(scalar const& scalar_value) scalar_value_accessor{scalar_value}); } -template -struct scalar_optional_accessor; - /** - * @brief optional accessor of a maybe-nullable scalar - * - * The scalar_optional_accessor always returns a thrust::optional of the scalar. - * The validity of the optional is determined by the contains_nulls_mode template parameter - * which has the following modes: + * @brief Optional accessor for a scalar * - * `DYNAMIC`: Defer nullability checks to runtime + * The `scalar_optional_accessor` always returns a `thrust::optional` of the scalar. + * The validity of the optional is determined by the `Nullate` parameter which may + * be one of the following: * - * - When `with_nulls=true` the return value will be a `thrust::optional{scalar}` - * when scalar is valid, and `thrust::optional{}` when the scalar is invalid. + * - `nullate::YES` means that the scalar may be valid or invalid and the optional returned + * will contain a value only if the scalar is valid. * - * - When `with_nulls=false` the return value will always be `thrust::optional{scalar}` + * - `nullate::NO` means the caller attests that the scalar will always be valid, + * no checks will occur and `thrust::optional{column[i]}` will return a value + * for each `i`. * - * `NO`: No null values will occur for this scalar, no checks will occur - * and `thrust::optional{scalar}` will always be returned. - * - * `YES`: null values will occur for this scalar, - * and `thrust::optional{scalar}` will always be returned. + * - `nullate::DYNAMIC` defers the assumption of nullability to runtime and the caller + * specifies if the scalar may be valid or invalid. + * For `DYNAMIC{true}` the return value will be a `thrust::optional{scalar}` when the + * scalar is valid and a `thrust::optional{}` when the scalar is invalid. + * For `DYNAMIC{false}` the return value will always be a `thrust::optional{scalar}`. * * @throws `cudf::logic_error` if scalar datatype and Element type mismatch. * * @tparam Element The type of return type of functor + * @tparam Nullate A cudf::nullate type describing how to check for nulls. */ -template +template struct scalar_optional_accessor : public scalar_value_accessor { using super_t = scalar_value_accessor; using value_type = thrust::optional; - scalar_optional_accessor(scalar const& scalar_value) - : scalar_value_accessor(scalar_value) + scalar_optional_accessor(scalar const& scalar_value, Nullate with_nulls) + : scalar_value_accessor(scalar_value), has_nulls{with_nulls} { } @@ -494,32 +426,14 @@ struct scalar_optional_accessor : public scalar_value_accessor { CUDA_HOST_DEVICE_CALLABLE const value_type operator()(size_type) const { - if constexpr (std::is_same_v) { + if (has_nulls) { return (super_t::dscalar.is_valid()) ? Element{super_t::dscalar.value()} : value_type{thrust::nullopt}; } return Element{super_t::dscalar.value()}; } -}; -template -struct scalar_optional_accessor - : public scalar_value_accessor { - using super_t = scalar_value_accessor; - using value_type = thrust::optional; - bool has_nulls; - - scalar_optional_accessor(scalar const& scalar_value, bool with_nulls) - : scalar_value_accessor(scalar_value), has_nulls{with_nulls} - { - } - - CUDA_HOST_DEVICE_CALLABLE - const value_type operator()(size_type) const - { - return (has_nulls and !super_t::dscalar.is_valid()) ? value_type{thrust::nullopt} - : Element{super_t::dscalar.value()}; - } + Nullate has_nulls{}; }; /** @@ -622,156 +536,70 @@ struct scalar_representation_pair_accessor : public scalar_value_accessor`. * - * When the element of an iterator contextually converted to bool, the conversion returns true + * The element of this iterator contextually converts to bool. The conversion returns true * if the object contains a value and false if it does not contain a value. * * The iterator behavior is undefined if the scalar is destroyed before iterator dereferencing. * - * make_optional_iterator with mode `DYNAMIC` defers the assumption of nullability to - * runtime, with the user stating on construction of the iterator if scalar has nulls. - * - * Example: + * Calling this function with `nullate::DYNAMIC` defers the assumption + * of nullability to runtime with the caller indicating if the scalar is valid. * - * \code{.cpp} + * @code{.cpp} * template * void some_function(cudf::column_view const& col_view, * scalar const& scalar_value, * bool col_has_nulls){ * auto d_col = cudf::column_device_view::create(col_view); - * auto column_iterator = cudf::detail::make_optional_iterator(d_col, - cudf::contains_nulls::DYNAMIC{}, col_has_nulls); - * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, - cudf::contains_nulls::DYNAMIC{}, scalar_value.is_valid()); + * auto column_iterator = cudf::detail::make_optional_iterator( + * d_col, cudf::nullate::DYNAMIC{col_has_nulls}); + * auto scalar_iterator = cudf::detail::make_optional_iterator( + * scalar_value, cudf::nullate::DYNAMIC{scalar_value.is_valid()}); * //use iterators * } - * \endcode - * - * @throws cudf::logic_error if the scalar is not nullable, and `DYNAMIC` mode used and - * the user has stated nulls exist - * @throws cudf::logic_error if scalar datatype and Element type mismatch. - * - * @tparam Element The type of elements in the scalar - * @tparam has_nulls If the scalar value will have a null at runtime - * @param scalar_value The scalar to iterate - * @return Iterator that returns scalar elements and validity of the - * element in a thrust::optional - */ -template -auto inline make_optional_iterator(scalar const& scalar_value, - contains_nulls::DYNAMIC, - bool has_nulls) -{ - CUDF_EXPECTS(type_id_matches_device_storage_type(scalar_value.type().id()), - "the data type mismatch"); - return thrust::make_transform_iterator( - thrust::make_constant_iterator(0), - scalar_optional_accessor{scalar_value, has_nulls}); -} - -/** - * @brief Constructs an optional iterator over a scalar's values and its validity. - * - * Dereferencing the returned iterator returns a `thrust::optional`. - * - * When the element of an iterator contextually converted to bool, the conversion returns true - * if the object contains a value and false if it does not contain a value. - * - * The iterator behavior is undefined if the scalar is destroyed before iterator dereferencing. - * - * make_optional_iterator ith mode `YES` means that the scalar supports nulls and - * potentially has null values, therefore the optional might not contain a value - * therefore the optional will always contain a value. + * @endcode * - * Example: + * Calling this function with `nullate::YES` means that the scalar maybe invalid + * and the optional return might not contain a value. + * Calling this function with `nullate::NO` means that the scalar is valid + * and the optional returned will always contain a value. * - * \code{.cpp} + * @code{.cpp} * template * void some_function(cudf::column_view const& col_view, scalar const& scalar_value){ * auto d_col = cudf::column_device_view::create(col_view); * if constexpr(any_nulls) { - * auto column_iterator = cudf::detail::make_optional_iterator(d_col, - * cudf::contains_nulls::YES{}); - * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, - * cudf::contains_nulls::YES{}); + * auto column_iterator = + * cudf::detail::make_optional_iterator(d_col, cudf::nullate::YES{}); + * auto scalar_iterator = + * cudf::detail::make_optional_iterator(scalar_value, cudf::nullate::YES{}); * //use iterators * } else { - * auto column_iterator = cudf::detail::make_optional_iterator(d_col, - * cudf::contains_nulls::NO{}); - * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, - * cudf::contains_nulls::NO{}); + * auto column_iterator = + * cudf::detail::make_optional_iterator(d_col, cudf::nullate::NO{}); + * auto scalar_iterator = + * cudf::detail::make_optional_iterator(scalar_value, cudf::nullate::NO{}); * //use iterators * } * } - * \endcode + * @endcode * - * @throws cudf::logic_error if the scalar is not nullable, and `YES` mode used * @throws cudf::logic_error if scalar datatype and Element type mismatch. * * @tparam Element The type of elements in the scalar - * @param scalar_value The scalar to iterate - * @return Iterator that returns scalar elements and the validity of the - * element in a thrust::optional - */ -template -auto inline make_optional_iterator(scalar const& scalar_value, contains_nulls::YES) -{ - CUDF_EXPECTS(type_id_matches_device_storage_type(scalar_value.type().id()), - "the data type mismatch"); - return thrust::make_transform_iterator( - thrust::make_constant_iterator(0), - scalar_optional_accessor{scalar_value}); -} - -/** - * @brief Constructs an optional iterator over a scalar's values and its validity. - * - * Dereferencing the returned iterator returns a `thrust::optional`. - * - * When the element of an iterator contextually converted to bool, the conversion returns true - * if the object contains a value and false if it does not contain a value. - * - * The iterator behavior is undefined if the scalar is destroyed before iterator dereferencing. - * - * make_optional_iterator with mode `NO` means that the scalar has no null values, - * therefore the optional will always contain a value. + * @tparam Nullate A cudf::nullate type describing how to check for nulls. * - * Example: - * - * \code{.cpp} - * template - * void some_function(cudf::column_view const& col_view, scalar const& scalar_value){ - * auto d_col = cudf::column_device_view::create(col_view); - * if constexpr(any_nulls) { - * auto column_iterator = cudf::detail::make_optional_iterator(d_col, - * cudf::contains_nulls::YES{}); - * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, - * cudf::contains_nulls::YES{}); - * //use iterators - * } else { - * auto column_iterator = cudf::detail::make_optional_iterator(d_col, - * cudf::contains_nulls::NO{}); - * auto scalar_iterator = cudf::detail::make_optional_iterator(scalar_value, - * cudf::contains_nulls::NO{}); - * //use iterators - * } - * } - * \endcode - * - * @throws cudf::logic_error if scalar datatype and Element type mismatch. - * - * @tparam Element The type of elements in the scalar - * @param scalar_value The scalar to iterate - * @return Iterator that returns scalar elements and the validity of the - * element in a thrust::optional + * @param scalar_value The scalar to be returned by the iterator. + * @param has_nulls Indicates if the scalar value may be invalid. + * @return Iterator that returns scalar and the validity of the scalar in a thrust::optional */ -template -auto inline make_optional_iterator(scalar const& scalar_value, contains_nulls::NO) +template +auto inline make_optional_iterator(scalar const& scalar_value, Nullate has_nulls) { CUDF_EXPECTS(type_id_matches_device_storage_type(scalar_value.type().id()), "the data type mismatch"); return thrust::make_transform_iterator( thrust::make_constant_iterator(0), - scalar_optional_accessor{scalar_value}); + scalar_optional_accessor{scalar_value, has_nulls}); } /** diff --git a/cpp/include/cudf/detail/merge.cuh b/cpp/include/cudf/detail/merge.cuh index ec83e348e33..f141d9b5d59 100644 --- a/cpp/include/cudf/detail/merge.cuh +++ b/cpp/include/cudf/detail/merge.cuh @@ -90,8 +90,8 @@ struct tagged_element_relational_comparator { column_device_view const* ptr_right_dview{r_side == side::LEFT ? &lhs : &rhs}; - auto erl_comparator = - element_relational_comparator(*ptr_left_dview, *ptr_right_dview, null_precedence); + auto erl_comparator = element_relational_comparator( + nullate::DYNAMIC{has_nulls}, *ptr_left_dview, *ptr_right_dview, null_precedence); return cudf::type_dispatcher(lhs.type(), erl_comparator, l_indx, r_indx); } diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index 70ccac2f75d..0f3ca073380 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -50,9 +50,9 @@ namespace detail { /** * @brief Compare the elements ordering with respect to `lhs`. * - * @param[in] lhs first element - * @param[in] rhs second element - * @return weak_ordering Indicates the relationship between the elements in + * @param lhs first element + * @param rhs second element + * @return Indicates the relationship between the elements in * the `lhs` and `rhs` columns. */ template @@ -69,14 +69,15 @@ __device__ weak_ordering compare_elements(Element lhs, Element rhs) /** * @brief A specialization for floating-point `Element` type relational comparison - * to derive the order of the elements with respect to `lhs`. Specialization is to - * handle `nan` in the order shown below. + * to derive the order of the elements with respect to `lhs`. + * + * This Specialization handles `nan` in the following order: * `[-Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN, null] (for null_order::AFTER)` * `[null, -Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN] (for null_order::BEFORE)` * - * @param[in] lhs first element - * @param[in] rhs second element - * @return weak_ordering Indicates the relationship between the elements in + * @param lhs first element + * @param rhs second element + * @return Indicates the relationship between the elements in * the `lhs` and `rhs` columns. */ template ::value>* = nullptr> @@ -119,7 +120,7 @@ inline __device__ auto null_compare(bool lhs_is_null, bool rhs_is_null, null_ord * * @param[in] lhs first element * @param[in] rhs second element - * @return weak_ordering Indicates the relationship between the elements in + * @return Indicates the relationship between the elements in * the `lhs` and `rhs` columns. */ template ::value>* = nullptr> @@ -132,9 +133,9 @@ __device__ weak_ordering relational_compare(Element lhs, Element rhs) * @brief A specialization for floating-point `Element` type to check if * `lhs` is equivalent to `rhs`. `nan == nan`. * - * @param[in] lhs first element - * @param[in] rhs second element - * @return bool `true` if `lhs` == `rhs` else `false`. + * @param lhs first element + * @param rhs second element + * @return `true` if `lhs` == `rhs` else `false`. */ template ::value>* = nullptr> __device__ bool equality_compare(Element lhs, Element rhs) @@ -147,9 +148,9 @@ __device__ bool equality_compare(Element lhs, Element rhs) * @brief A specialization for non-floating-point `Element` type to check if * `lhs` is equivalent to `rhs`. * - * @param[in] lhs first element - * @param[in] rhs second element - * @return bool `true` if `lhs` == `rhs` else `false`. + * @param lhs first element + * @param rhs second element + * @return `true` if `lhs` == `rhs` else `false`. */ template ::value>* = nullptr> __device__ bool equality_compare(Element const lhs, Element const rhs) @@ -160,9 +161,9 @@ __device__ bool equality_compare(Element const lhs, Element const rhs) /** * @brief Performs an equality comparison between two elements in two columns. * - * @tparam has_nulls Indicates the potential for null values in either column. + * @tparam Nullate A cudf::nullate type describing how to check for nulls. */ -template +template class element_equality_comparator { public: /** @@ -171,14 +172,17 @@ class element_equality_comparator { * * @note `lhs` and `rhs` may be the same. * + * @param has_nulls Indicates if either input column contains nulls. * @param lhs The column containing the first element * @param rhs The column containing the second element (may be the same as lhs) * @param nulls_are_equal Indicates if two null elements are treated as equivalent */ - __host__ __device__ element_equality_comparator(column_device_view lhs, - column_device_view rhs, - bool nulls_are_equal = true) - : lhs{lhs}, rhs{rhs}, nulls_are_equal{nulls_are_equal} + __host__ __device__ + element_equality_comparator(Nullate has_nulls, + column_device_view lhs, + column_device_view rhs, + null_equality nulls_are_equal = null_equality::EQUAL) + : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal} { } @@ -188,18 +192,17 @@ class element_equality_comparator { * @param lhs_element_index The index of the first element * @param rhs_element_index The index of the second element * @return True if both lhs and rhs element are both nulls and `nulls_are_equal` is true, or equal - * */ template ()>* = nullptr> __device__ bool operator()(size_type lhs_element_index, size_type rhs_element_index) const noexcept { - if (has_nulls) { + if (nulls) { bool const lhs_is_null{lhs.is_null(lhs_element_index)}; bool const rhs_is_null{rhs.is_null(rhs_element_index)}; if (lhs_is_null and rhs_is_null) { - return nulls_are_equal; + return nulls_are_equal == null_equality::EQUAL; } else if (lhs_is_null != rhs_is_null) { return false; } @@ -220,14 +223,18 @@ class element_equality_comparator { private: column_device_view lhs; column_device_view rhs; - bool nulls_are_equal; + Nullate nulls; + null_equality nulls_are_equal; }; -template +template class row_equality_comparator { public: - row_equality_comparator(table_device_view lhs, table_device_view rhs, bool nulls_are_equal = true) - : lhs{lhs}, rhs{rhs}, nulls_are_equal{nulls_are_equal} + row_equality_comparator(Nullate has_nulls, + table_device_view lhs, + table_device_view rhs, + null_equality nulls_are_equal = true) + : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal} { CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), "Mismatched number of columns."); } @@ -236,7 +243,7 @@ class row_equality_comparator { { auto equal_elements = [=](column_device_view l, column_device_view r) { return cudf::type_dispatcher(l.type(), - element_equality_comparator{l, r, nulls_are_equal}, + element_equality_comparator{nulls, l, r, nulls_are_equal}, lhs_row_index, rhs_row_index); }; @@ -247,15 +254,16 @@ class row_equality_comparator { private: table_device_view lhs; table_device_view rhs; - bool nulls_are_equal; + Nullate nulls; + null_equality nulls_are_equal; }; /** * @brief Performs a relational comparison between two elements in two columns. * - * @tparam has_nulls Indicates the potential for null values in either column. + * @tparam Nullate A cudf::nullate type describing how to check for nulls. */ -template +template class element_relational_comparator { public: /** @@ -266,13 +274,21 @@ class element_relational_comparator { * * @param lhs The column containing the first element * @param rhs The column containing the second element (may be the same as lhs) - * @param null_precedence Indicates how null values are ordered with other - * values + * @param has_nulls Indicates if either input column contains nulls. + * @param null_precedence Indicates how null values are ordered with other values */ - __host__ __device__ element_relational_comparator(column_device_view lhs, + __host__ __device__ element_relational_comparator(Nullate has_nulls, + column_device_view lhs, column_device_view rhs, null_order null_precedence) - : lhs{lhs}, rhs{rhs}, null_precedence{null_precedence} + : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, null_precedence{null_precedence} + { + } + + __host__ __device__ element_relational_comparator(Nullate has_nulls, + column_device_view lhs, + column_device_view rhs) + : lhs{lhs}, rhs{rhs}, nulls{has_nulls} { } @@ -281,7 +297,7 @@ class element_relational_comparator { * * @param lhs_element_index The index of the first element * @param rhs_element_index The index of the second element - * @return weak_ordering Indicates the relationship between the elements in + * @return Indicates the relationship between the elements in * the `lhs` and `rhs` columns. */ template +template class row_lexicographic_comparator { public: /** @@ -343,6 +360,7 @@ class row_lexicographic_comparator { * * @param lhs The first table * @param rhs The second table (may be the same table as `lhs`) + * @param has_nulls Indicates if either input table contains columns with nulls. * @param column_order Optional, device array the same length as a row that * indicates the desired ascending/descending order of each column in a row. * If `nullptr`, it is assumed all columns are sorted in ascending order. @@ -351,11 +369,16 @@ class row_lexicographic_comparator { * it is nullptr, then null precedence would be `null_order::BEFORE` for all * columns. */ - row_lexicographic_comparator(table_device_view lhs, + row_lexicographic_comparator(Nullate has_nulls, + table_device_view lhs, table_device_view rhs, order const* column_order = nullptr, null_order const* null_precedence = nullptr) - : _lhs{lhs}, _rhs{rhs}, _column_order{column_order}, _null_precedence{null_precedence} + : _lhs{lhs}, + _rhs{rhs}, + _nulls{has_nulls}, + _column_order{column_order}, + _null_precedence{null_precedence} { CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns."); CUDF_EXPECTS(detail::is_relationally_comparable(_lhs, _rhs), @@ -376,14 +399,14 @@ class row_lexicographic_comparator { for (size_type i = 0; i < _lhs.num_columns(); ++i) { bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING); - weak_ordering state{weak_ordering::EQUIVALENT}; null_order null_precedence = _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i]; auto comparator = - element_relational_comparator{_lhs.column(i), _rhs.column(i), null_precedence}; + element_relational_comparator{_nulls, _lhs.column(i), _rhs.column(i), null_precedence}; - state = cudf::type_dispatcher(_lhs.column(i).type(), comparator, lhs_index, rhs_index); + weak_ordering state = + cudf::type_dispatcher(_lhs.column(i).type(), comparator, lhs_index, rhs_index); if (state == weak_ordering::EQUIVALENT) { continue; } @@ -395,6 +418,7 @@ class row_lexicographic_comparator { private: table_device_view _lhs; table_device_view _rhs; + Nullate _nulls{}; null_order const* _null_precedence{}; order const* _column_order{}; }; // class row_lexicographic_comparator @@ -403,9 +427,9 @@ class row_lexicographic_comparator { * @brief Computes the hash value of an element in the given column. * * @tparam hash_function Hash functor to use for hashing elements. - * @tparam has_nulls Indicates the potential for null values in the column. + * @tparam Nullate A cudf::nullate type describing how to check for nulls. */ -template