Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow runtime has_nulls parameter for row operators #9623

Merged
merged 28 commits into from
Dec 6, 2021
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
30aab15
Allow runtime has_nulls parameter for row operators
davidwendt Nov 8, 2021
be00fed
Merge branch 'branch-22.02' into fea-row-op-has-nulls
davidwendt Nov 11, 2021
ef725e0
update groupby/scan for row-op
davidwendt Nov 12, 2021
2b378ae
Merge branch 'branch-22.02' into fea-row-op-has-nulls
davidwendt Nov 12, 2021
933c3c0
Merge branch 'branch-22.02' into fea-row-op-has-nulls
davidwendt Nov 12, 2021
0a9203d
use contains_nulls in place of has_nulls template parameter
davidwendt Nov 17, 2021
fbe2ed5
Merge branch 'branch-22.02' into fea-row-op-has-nulls
davidwendt Nov 17, 2021
a631b7c
Merge branch 'branch-22.02' into fea-row-op-has-nulls
davidwendt Nov 17, 2021
4c43dbf
use std::bool_constant for indicator truthiness
davidwendt Nov 18, 2021
cb6a9b5
Merge branch 'branch-22.02' into fea-row-op-has-nulls
davidwendt Nov 18, 2021
5c92b48
change contains_nulls to nullate
davidwendt Nov 18, 2021
f38cc0e
fix merge conflict
davidwendt Nov 18, 2021
4aaa819
Merge branch 'branch-22.02' into fea-row-op-has-nulls
davidwendt Nov 19, 2021
d58daea
update clang-format version
davidwendt Nov 19, 2021
159438a
move nullate to column-dev-view header
davidwendt Nov 19, 2021
3c42a25
Merge branch 'branch-22.02' into fea-row-op-has-nulls
davidwendt Nov 19, 2021
a33ac35
Merge branch 'branch-22.02' into fea-row-op-has-nulls
davidwendt Nov 22, 2021
69327b7
Merge branch 'branch-22.02' into fea-row-op-has-nulls
davidwendt Nov 23, 2021
336174d
add more doxygen to DYNAMIC
davidwendt Nov 23, 2021
56de367
add updates to benchmarks to verify changes in this PR
davidwendt Nov 29, 2021
e55d04e
Merge branch 'branch-22.02' into fea-row-op-has-nulls
davidwendt Nov 30, 2021
2508797
Merge branch 'branch-22.02' into fea-row-op-has-nulls
davidwendt Dec 1, 2021
e0c456a
removed commented-out default ctor
davidwendt Dec 1, 2021
109d2e3
fix new call to row_arg_minmax_fn
davidwendt Dec 1, 2021
e37bbdb
Merge branch 'branch-22.02' into fea-row-op-has-nulls
davidwendt Dec 2, 2021
144e476
Merge branch 'branch-22.02' into fea-row-op-has-nulls
davidwendt Dec 3, 2021
0962359
fix various tparam texts
davidwendt Dec 3, 2021
c4f6a08
Merge branch 'branch-22.02' into fea-row-op-has-nulls
davidwendt Dec 3, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
254 changes: 68 additions & 186 deletions cpp/include/cudf/column/column_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -44,23 +44,22 @@
namespace cudf {

/**
* @brief Policy for what assumptions the optional iterator has about null values
* @brief Indicates the presence of nulls at compile-time or runtime.
*
* - `YES` means that the column supports nulls and has null values, therefore
* the optional might not contain a value
* If used at compile-time, this indicator can tell the optimizer
* to include or exclude any null-checking clauses.
*
* - `NO` means that the column has no null values, therefore the optional will
* always have a value
*
* - `DYNAMIC` defers the assumption of nullability to runtime with the users stating
* on construction of the iterator if column has nulls.
*/
struct contains_nulls {
struct YES {
struct nullate {
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
struct YES : std::bool_constant<true> {
};
struct NO {
struct NO : std::bool_constant<false> {
};
struct DYNAMIC {
DYNAMIC() = delete;
constexpr explicit DYNAMIC(bool b) noexcept : value{b} {}
constexpr operator bool() const noexcept { return value; }
bool value;
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
};
};

Expand Down Expand Up @@ -282,7 +281,7 @@ class alignas(16) column_device_view_base {
// Forward declaration
template <typename T>
struct value_accessor;
template <typename T, typename contains_nulls_mode>
template <typename T, typename Nullate>
struct optional_accessor;
template <typename T, bool has_nulls>
struct pair_accessor;
Expand Down Expand Up @@ -493,11 +492,11 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
}

/**
* @brief optional iterator for navigating this column
* @brief Optional iterator for navigating this column
*/
template <typename T, typename contains_nulls_mode>
template <typename T, typename Nullate>
using const_optional_iterator =
thrust::transform_iterator<detail::optional_accessor<T, contains_nulls_mode>, count_it>;
thrust::transform_iterator<detail::optional_accessor<T, Nullate>, count_it>;

/**
* @brief Pair iterator for navigating this column
Expand All @@ -520,117 +519,57 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
*
* Dereferencing the returned iterator returns a `thrust::optional<T>`.
*
* When the element of an iterator contextually converted to bool, the conversion returns true
* The element of this iterator contextually converts to bool. The conversion returns true
* if the object contains a value and false if it does not contain a value.
*
* optional_begin with mode `DYNAMIC` defers the assumption of nullability to
* runtime, with the user stating on construction of the iterator if column has nulls.
* `DYNAMIC` mode is nice when an algorithm is going to execute on multiple
* iterators and you don't want to compile all the combinations of iterator types
*
* Example:
* Calling this method with `nullate::DYNAMIC` defers the assumption of nullability to
* runtime with the caller indicating if the column has nulls. The `nullate::DYNAMIC` is
* useful when an algorithm is going to execute on multiple iterators and all the combinations of
* iterator types are not required at compile time.
*
* \code{.cpp}
* @code{.cpp}
* template<typename T>
* void some_function(cudf::column_view<T> const& col_view){
* auto d_col = cudf::column_device_view::create(col_view);
* // Create a `DYNAMIC` optional iterator
* auto optional_iterator = d_col->optional_begin<T>(cudf::contains_nulls::DYNAMIC{},
* col_view.has_nulls());
* }
* \endcode
*
* This function does not participate in overload resolution if
* `column_device_view::has_element_accessor<T>()` is false.
*
* @throws cudf::logic_error if the column is not nullable, and `DYNAMIC` mode used and
* the user has stated nulls exist
* @throws cudf::logic_error if column datatype and Element type mismatch.
*/
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
auto optional_begin(contains_nulls::DYNAMIC, bool has_nulls) const
{
return const_optional_iterator<T, contains_nulls::DYNAMIC>{
count_it{0}, detail::optional_accessor<T, contains_nulls::DYNAMIC>{*this, has_nulls}};
}

/**
* @brief Return an optional iterator to the first element of the column.
*
* Dereferencing the returned iterator returns a `thrust::optional<T>`.
*
* When the element of an iterator contextually converted to bool, the conversion returns true
* if the object contains a value and false if it does not contain a value.
*
* optional_begin with mode `YES` means that the column supports nulls and
* potentially has null values, therefore the optional might not contain a value
*
* Example:
*
* \code{.cpp}
* template<typename T, bool has_nulls>
* void some_function(cudf::column_view<T> const& col_view){
* auto d_col = cudf::column_device_view::create(col_view);
* if constexpr(has_nulls) {
* auto optional_iterator = d_col->optional_begin<T>(cudf::contains_nulls::YES{});
* //use optional_iterator
* } else {
* auto optional_iterator = d_col->optional_begin<T>(cudf::contains_nulls::NO{});
* //use optional_iterator
* }
* auto optional_iterator =
* d_col->optional_begin<T>(cudf::nullate::DYNAMIC{col_view.has_nulls()});
* }
* \endcode
* @endcode
*
* This function does not participate in overload resolution if
* `column_device_view::has_element_accessor<T>()` is false.
* Calling this method with `nullate::YES` means that the column supports nulls and
* the optional returned might not contain a value.
*
* @throws cudf::logic_error if the column is not nullable, and `YES` mode used
* @throws cudf::logic_error if column datatype and Element type mismatch.
*/
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
auto optional_begin(contains_nulls::YES) const
{
return const_optional_iterator<T, contains_nulls::YES>{
count_it{0}, detail::optional_accessor<T, contains_nulls::YES>{*this}};
}

/**
* @brief Return an optional iterator to the first element of the column.
* Calling this method with `nullate::NO` means that the column has no null values
* and the optional returned will always contain a value.
*
* Dereferencing the returned iterator returns a `thrust::optional<T>`.
*
* When the element of an iterator contextually converted to bool, the conversion returns true
* if the object contains a value and false if it does not contain a value.
*
* optional_begin with mode `NO` means that the column has no null values,
* therefore the optional will always contain a value.
*
* Example:
*
* \code{.cpp}
* @code{.cpp}
* template<typename T, bool has_nulls>
* void some_function(cudf::column_view<T> const& col_view){
* auto d_col = cudf::column_device_view::create(col_view);
* if constexpr(has_nulls) {
* auto optional_iterator = d_col->optional_begin<T>(cudf::contains_nulls::YES{});
* auto optional_iterator = d_col->optional_begin<T>(cudf::nullate::YES{});
* //use optional_iterator
* } else {
* auto optional_iterator = d_col->optional_begin<T>(cudf::contains_nulls::NO{});
* auto optional_iterator = d_col->optional_begin<T>(cudf::nullate::NO{});
* //use optional_iterator
* }
* }
* \endcode
* @endcode
*
* This function does not participate in overload resolution if
* `column_device_view::has_element_accessor<T>()` is false.
*
* @throws cudf::logic_error if the column is not nullable and `has_nulls` evaluates to true.
* @throws cudf::logic_error if column datatype and Element type mismatch.
*/
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
auto optional_begin(contains_nulls::NO) const
template <typename T,
typename Nullate,
CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
auto optional_begin(Nullate has_nulls) const
{
return const_optional_iterator<T, contains_nulls::NO>{
count_it{0}, detail::optional_accessor<T, contains_nulls::NO>{*this}};
return const_optional_iterator<T, Nullate>{
count_it{0}, detail::optional_accessor<T, Nullate>{*this, has_nulls}};
}

/**
Expand Down Expand Up @@ -695,57 +634,21 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
* @brief Return an optional iterator to the element following the last element of
* the column.
*
* Dereferencing the returned iterator returns a `thrust::optional<T>`.
* The returned iterator represents a `thrust::optional<T>` element.
*
* This function does not participate in overload resolution if
* `column_device_view::has_element_accessor<T>()` is false.
*
* @throws cudf::logic_error if the column is not nullable, and `DYNAMIC` mode used and
* the user has stated nulls exist
* @throws cudf::logic_error if the column is not nullable and `has_nulls` is true
* @throws cudf::logic_error if column datatype and Element type mismatch.
*/
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
auto optional_end(contains_nulls::DYNAMIC, bool has_nulls) const
{
return const_optional_iterator<T, contains_nulls::DYNAMIC>{
count_it{size()}, detail::optional_accessor<T, contains_nulls::DYNAMIC>{*this, has_nulls}};
}

/**
* @brief Return an optional iterator to the element following the last element of
* the column.
*
* Dereferencing the returned iterator returns a `thrust::optional<T>`.
*
* This function does not participate in overload resolution if
* `column_device_view::has_element_accessor<T>()` is false.
*
* @throws cudf::logic_error if the column is not nullable, and `YES` mode used
* @throws cudf::logic_error if column datatype and Element type mismatch.
*/
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
auto optional_end(contains_nulls::YES) const
{
return const_optional_iterator<T, contains_nulls::YES>{
count_it{size()}, detail::optional_accessor<T, contains_nulls::YES>{*this}};
}

/**
* @brief Return an optional iterator to the element following the last element of
* the column.
*
* Dereferencing the returned iterator returns a `thrust::optional<T>`.
*
* This function does not participate in overload resolution if
* `column_device_view::has_element_accessor<T>()` is false.
*
* @throws cudf::logic_error if column datatype and Element type mismatch.
*/
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
auto optional_end(contains_nulls::NO) const
template <typename T,
typename Nullate,
CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
auto optional_end(Nullate has_nulls) const
{
return const_optional_iterator<T, contains_nulls::NO>{
count_it{size()}, detail::optional_accessor<T, contains_nulls::NO>{*this}};
return const_optional_iterator<T, Nullate>{
count_it{size()}, detail::optional_accessor<T, Nullate>{*this, has_nulls}};
}

/**
Expand Down Expand Up @@ -1201,77 +1104,56 @@ struct value_accessor {
* @brief optional accessor of a column
*
*
* The optional_accessor always returns a thrust::optional of column[i]. The validity
* of the optional is determined by the contains_nulls_mode template parameter
* which has the following modes:
* The optional_accessor always returns a `thrust::optional` of `column[i]`. The validity
* of the optional is determined by the `Nullate` parameter which may be one of the following:
*
* - `YES` means that the column supports nulls and has null values, therefore
* the optional might be valid or invalid
* - `nullate::YES` means that the column supports nulls and the optional returned
* might be valid or invalid.
*
* - `NO` the user has attested that the column has no null values,
* - `nullate::NO` means the caller attests that the column has no null values,
* no checks will occur and `thrust::optional{column[i]}` will be
* return for each `i`.
*
* - `DYNAMIC` defers the assumption of nullability to runtime with the users stating
* on construction of the iterator if column has nulls.
* When `with_nulls=true` the return value validity will be determined if column[i]
* is not null.
* When `with_nulls=false` the return value will always be valid
* - `nullate::DYNAMIC` defers the assumption of nullability to runtime and the caller
* specifies if the column has nulls at runtime.
* For `DYNAMIC{true}` the return value will be `thrust::optional{column[i]}` if
* element `i` is not null and `thrust::optional{}` if element `i` is null.
* For `DYNAMIC{false}` the return value will always be `thrust::optional{column[i]}`.
*
* @throws cudf::logic_error if column datatype and template T type mismatch.
* @throws cudf::logic_error if the column is not nullable, and `with_nulls=true`
*
* @throws cudf::logic_error if the column is not nullable and `with_nulls` evaluates to true
*
* @tparam T The type of elements in the column
* @tparam contains_nulls_mode Specifies if nulls are checked at runtime or compile time.
* @tparam Nullate Indicates how nulls can be checked at runtime.
*/
template <typename T, typename contains_nulls_mode>
template <typename T, typename Nullate>
struct optional_accessor {
column_device_view const col; ///< column view of column in device

/**
* @brief constructor
* @param[in] _col column device view of cudf column
* @brief Constructor
*
* @param col Column on which to iterator over its elements.
* @param with_nulls Indicates if the `col` should be checked for nulls.
*/
optional_accessor(column_device_view const& _col) : col{_col}
optional_accessor(column_device_view const& _col, Nullate with_nulls)
: col{_col}, has_nulls{with_nulls}
{
CUDF_EXPECTS(type_id_matches_device_storage_type<T>(col.type().id()), "the data type mismatch");
if (with_nulls) { CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column."); }
}

CUDA_DEVICE_CALLABLE
thrust::optional<T> operator()(cudf::size_type i) const
{
if constexpr (std::is_same_v<contains_nulls_mode, contains_nulls::YES>) {
if (has_nulls) {
return (col.is_valid_nocheck(i)) ? thrust::optional<T>{col.element<T>(i)}
: thrust::optional<T>{thrust::nullopt};
}
return thrust::optional<T>{col.element<T>(i)};
}
};

template <typename T>
struct optional_accessor<T, contains_nulls::DYNAMIC> {
column_device_view const col; ///< column view of column in device
bool has_nulls;

/**
* @brief constructor
* @param[in] _col column device view of cudf column
* @param[in] with_nulls Indicates if @p _col has nulls
*/
optional_accessor(column_device_view const& _col, bool with_nulls)
: col{_col}, has_nulls{with_nulls}
{
CUDF_EXPECTS(type_id_matches_device_storage_type<T>(col.type().id()), "the data type mismatch");
if (with_nulls) { CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column."); }
}

CUDA_DEVICE_CALLABLE
thrust::optional<T> operator()(cudf::size_type i) const
{
return (has_nulls and col.is_null_nocheck(i)) ? thrust::optional<T>{thrust::nullopt}
: thrust::optional<T>{col.element<T>(i)};
}
Nullate has_nulls{};
};

/**
Expand Down
Loading