Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Introduces make_optional_iterator for nullable column and scalars #7772

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
249 changes: 248 additions & 1 deletion cpp/include/cudf/column/column_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/optional.h>

#include <algorithm>

Expand All @@ -40,6 +41,28 @@
*/

namespace cudf {

/**
* @brief Policy for what assumptions the optional iterator has about null values
*
* - `YES` means that the column supports nulls and has null values, therefore
* the optional might not contain a value
*
* - `NO` means that the column has no null values, therefore the optional will
* always have a value
*
* - `DYNAMIC` defers the assumption of nullability to runtime with the users stating
* on construction of the iterator if column has nulls.
*/
namespace contains_nulls {
struct YES {
};
struct NO {
};
struct DYNAMIC {
};
} // namespace contains_nulls

namespace detail {
/**
* @brief An immutable, non-owning view of device data as a column of elements
Expand Down Expand Up @@ -255,10 +278,11 @@ class alignas(16) column_device_view_base {
: std::true_type {
};
};

// Forward declaration
template <typename T>
struct value_accessor;
template <typename T, typename contains_nulls_mode>
struct optional_accessor;
template <typename T, bool has_nulls>
struct pair_accessor;
template <typename T, bool has_nulls>
Expand Down Expand Up @@ -484,6 +508,13 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
return const_iterator<T>{count_it{size()}, detail::value_accessor<T>{*this}};
}

/**
* @brief optional iterator for navigating this column
*/
template <typename T, typename contains_nulls_mode>
using const_optional_iterator =
thrust::transform_iterator<detail::optional_accessor<T, contains_nulls_mode>, count_it>;

/**
* @brief Pair iterator for navigating this column
*/
Expand All @@ -500,6 +531,89 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
using const_pair_rep_iterator =
thrust::transform_iterator<detail::pair_rep_accessor<T, has_nulls>, count_it>;

/**
* @brief Return an optional iterator to the first element of the column.
*
* Dereferencing the returned iterator returns a `thrust::optional<T>`.
*
* When the element of an iterator contextually converted to bool, the conversion returns true
* if the object contains a value and false if it does not contain a value.
*
* optional_begin with mode `DYNAMIC` defers the assumption of nullability to
* runtime, with the user stating on construction of the iterator if column has nulls.
*
* Example:
*
* \code{.cpp}
* template<typename T>
* void some_function( cudf::column_view<T> const& col_view, bool has_nulls){
* auto d_col = cudf::column_device_view::create(col_view);
* // Create a `DYNAMIC` optional iterator
* auto optional_iterator = d_col->optional_begin<T>(cudf::contains_nulls::DYNAMIC{}, has_nulls);
* }
* \endcode
robertmaynard marked this conversation as resolved.
Show resolved Hide resolved
*
* This function does not participate in overload resolution if
* `column_device_view::has_element_accessor<T>()` is false.
*
* @throws cudf::logic_error if the column is not nullable, and `DYNAMIC` mode used and
* the user has stated nulls exist
* @throws cudf::logic_error if column datatype and Element type mismatch.
*/
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
robertmaynard marked this conversation as resolved.
Show resolved Hide resolved
auto optional_begin(contains_nulls::DYNAMIC, bool has_nulls) const
{
return const_optional_iterator<T, contains_nulls::DYNAMIC>{
count_it{0}, detail::optional_accessor<T, contains_nulls::DYNAMIC>{*this, has_nulls}};
}

/**
* @brief Return an optional iterator to the first element of the column.
*
* Dereferencing the returned iterator returns a `thrust::optional<T>`.
*
* When the element of an iterator contextually converted to bool, the conversion returns true
* if the object contains a value and false if it does not contain a value.
*
* optional_begin with mode `YES` means that the column supports nulls and
* potentially has null values, therefore the optional might not contain a value
*
* This function does not participate in overload resolution if
* `column_device_view::has_element_accessor<T>()` is false.
*
* @throws cudf::logic_error if the column is not nullable, and `YES` mode used
* @throws cudf::logic_error if column datatype and Element type mismatch.
*/
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
auto optional_begin(contains_nulls::YES) const
{
return const_optional_iterator<T, contains_nulls::YES>{
count_it{0}, detail::optional_accessor<T, contains_nulls::YES>{*this}};
}

/**
* @brief Return an optional iterator to the first element of the column.
*
* Dereferencing the returned iterator returns a `thrust::optional<T>`.
*
* When the element of an iterator contextually converted to bool, the conversion returns true
* if the object contains a value and false if it does not contain a value.
*
* optional_begin with mode `NO` means that the column has no null values,
* therefore the optional will always contain a value.
*
* This function does not participate in overload resolution if
* `column_device_view::has_element_accessor<T>()` is false.
*
* @throws cudf::logic_error if column datatype and Element type mismatch.
*/
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
auto optional_begin(contains_nulls::NO) const
{
return const_optional_iterator<T, contains_nulls::NO>{
count_it{0}, detail::optional_accessor<T, contains_nulls::NO>{*this}};
}

/**
* @brief Return a pair iterator to the first element of the column.
*
Expand Down Expand Up @@ -558,6 +672,63 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
detail::pair_rep_accessor<T, has_nulls>{*this}};
}

/**
* @brief Return an optional iterator to the element following the last element of
* the column.
*
* Dereferencing the returned iterator returns a `thrust::optional<T>`.
*
* This function does not participate in overload resolution if
* `column_device_view::has_element_accessor<T>()` is false.
*
* @throws cudf::logic_error if the column is not nullable, and `DYNAMIC` mode used and
* the user has stated nulls exist
* @throws cudf::logic_error if column datatype and Element type mismatch.
*/
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
auto optional_end(contains_nulls::DYNAMIC, bool has_nulls) const
{
return const_optional_iterator<T, contains_nulls::DYNAMIC>{
count_it{size()}, detail::optional_accessor<T, contains_nulls::DYNAMIC>{*this, has_nulls}};
}

/**
* @brief Return an optional iterator to the element following the last element of
* the column.
*
* Dereferencing the returned iterator returns a `thrust::optional<T>`.
*
* This function does not participate in overload resolution if
* `column_device_view::has_element_accessor<T>()` is false.
*
* @throws cudf::logic_error if the column is not nullable, and `YES` mode used
* @throws cudf::logic_error if column datatype and Element type mismatch.
*/
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
auto optional_end(contains_nulls::YES) const
{
return const_optional_iterator<T, contains_nulls::YES>{
count_it{size()}, detail::optional_accessor<T, contains_nulls::YES>{*this}};
}

/**
* @brief Return an optional iterator to the element following the last element of
* the column.
*
* Dereferencing the returned iterator returns a `thrust::optional<T>`.
*
* This function does not participate in overload resolution if
* `column_device_view::has_element_accessor<T>()` is false.
*
* @throws cudf::logic_error if column datatype and Element type mismatch.
*/
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
auto optional_end(contains_nulls::NO) const
{
return const_optional_iterator<T, contains_nulls::NO>{
count_it{size()}, detail::optional_accessor<T, contains_nulls::NO>{*this}};
}

/**
* @brief Return a pair iterator to the element following the last element of
* the column.
Expand Down Expand Up @@ -999,6 +1170,82 @@ struct value_accessor {
__device__ T operator()(cudf::size_type i) const { return col.element<T>(i); }
};

/**
* @brief optional accessor of a column
*
*
* The optional_accessor always returns a thrust::optional of column[i]. The validity
* of the optional is determined by the contains_nulls_mode template parameter
* which has the following modes:
*
* - `YES` means that the column supports nulls and has null values, therefore
* the optional might be valid or invalid
*
* - `NO` the user has attested that the column has no null values,
* no checks will occur and `thrust::optional{column[i]}` will be
* return for each `i`.
*
* - `DYNAMIC` defers the assumption of nullability to runtime with the users stating
* on construction of the iterator if column has nulls.
* When `with_nulls=true` the return value validity will be determined if column[i]
* is not null.
* When `with_nulls=false` the return value will always be valid
*
* @throws cudf::logic_error if column datatype and template T type mismatch.
* @throws cudf::logic_error if the column is not nullable, and `with_nulls=true`
* @throws cudf::logic_error if column datatype and template T type mismatch.
robertmaynard marked this conversation as resolved.
Show resolved Hide resolved
*
*
* @tparam T The type of elements in the column
robertmaynard marked this conversation as resolved.
Show resolved Hide resolved
*/
template <typename T, typename contains_nulls_mode>
struct optional_accessor {
column_device_view const col; ///< column view of column in device

/**
* @brief constructor
* @param[in] _col column device view of cudf column
*/
optional_accessor(column_device_view const& _col) : col{_col}
{
CUDF_EXPECTS(type_id_matches_device_storage_type<T>(col.type().id()), "the data type mismatch");
}

CUDA_DEVICE_CALLABLE
thrust::optional<T> operator()(cudf::size_type i) const
{
if constexpr (std::is_same_v<contains_nulls_mode, contains_nulls::YES>) {
return (col.is_valid_nocheck(i)) ? thrust::optional<T>{col.element<T>(i)}
: thrust::optional<T>{thrust::nullopt};
}
return thrust::optional<T>{col.element<T>(i)};
}
};

template <typename T>
struct optional_accessor<T, contains_nulls::DYNAMIC> {
column_device_view const col; ///< column view of column in device
bool has_nulls;

/**
* @brief constructor
* @param[in] _col column device view of cudf column
*/
optional_accessor(column_device_view const& _col, bool with_nulls)
: col{_col}, has_nulls{with_nulls}
{
CUDF_EXPECTS(type_id_matches_device_storage_type<T>(col.type().id()), "the data type mismatch");
if (with_nulls) { CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column."); }
}

CUDA_DEVICE_CALLABLE
thrust::optional<T> operator()(cudf::size_type i) const
{
return (has_nulls and col.is_null_nocheck(i)) ? thrust::optional<T>{thrust::nullopt}
: thrust::optional<T>{col.element<T>(i)};
}
};

/**
* @brief pair accessor of column with/without null bitmask
* A unary functor returns pair with scalar value at `id` and boolean validity
Expand Down
Loading