Skip to content

Commit

Permalink
Merge branch 'branch-0.20' of https://github.com/rapidsai/cudf into d…
Browse files Browse the repository at this point in the history
…ecimalquantile
  • Loading branch information
Chris Jarrett committed Apr 21, 2021
2 parents 415da8c + d501d2c commit 8f0b933
Show file tree
Hide file tree
Showing 42 changed files with 2,371 additions and 860 deletions.
2 changes: 1 addition & 1 deletion conda/environments/cudf_dev_cuda11.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
- python>=3.7,<3.9
- numba>=0.49,!=0.51.0
- numpy
- pandas>=1.0,<=1.2.4
- pandas>=1.0,<1.3.0dev0
- pyarrow=1.0.1
- fastavro>=0.22.9
- notebook>=0.5.0
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/cudf_dev_cuda11.1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
- python>=3.7,<3.9
- numba>=0.49,!=0.51.0
- numpy
- pandas>=1.0,<=1.2.4
- pandas>=1.0,<1.3.0dev0
- pyarrow=1.0.1
- fastavro>=0.22.9
- notebook>=0.5.0
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/cudf_dev_cuda11.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
- python>=3.7,<3.9
- numba>=0.49,!=0.51.0
- numpy
- pandas>=1.0,<=1.2.4
- pandas>=1.0,<1.3.0dev0
- pyarrow=1.0.1
- fastavro>=0.22.9
- notebook>=0.5.0
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ requirements:
- protobuf
- python
- typing_extensions
- pandas >=1.0,<=1.2.4
- pandas >=1.0,<1.3.0dev0
- cupy >7.1.0,<9.0.0a0
- numba >=0.49.0
- numpy
Expand Down
3 changes: 2 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,8 @@ add_library(cudf
src/strings/case.cu
src/strings/char_types/char_cases.cu
src/strings/char_types/char_types.cu
src/strings/combine.cu
src/strings/combine/concatenate.cu
src/strings/combine/join.cu
src/strings/contains.cu
src/strings/convert/convert_booleans.cu
src/strings/convert/convert_datetime.cu
Expand Down
284 changes: 283 additions & 1 deletion cpp/include/cudf/column/column_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/optional.h>

#include <algorithm>

Expand All @@ -40,6 +41,28 @@
*/

namespace cudf {

/**
* @brief Policy for what assumptions the optional iterator has about null values
*
* - `YES` means that the column supports nulls and has null values, therefore
* the optional might not contain a value
*
* - `NO` means that the column has no null values, therefore the optional will
* always have a value
*
* - `DYNAMIC` defers the assumption of nullability to runtime with the users stating
* on construction of the iterator if column has nulls.
*/
namespace contains_nulls {
struct YES {
};
struct NO {
};
struct DYNAMIC {
};
} // namespace contains_nulls

namespace detail {
/**
* @brief An immutable, non-owning view of device data as a column of elements
Expand Down Expand Up @@ -255,10 +278,11 @@ class alignas(16) column_device_view_base {
: std::true_type {
};
};

// Forward declaration
template <typename T>
struct value_accessor;
template <typename T, typename contains_nulls_mode>
struct optional_accessor;
template <typename T, bool has_nulls>
struct pair_accessor;
template <typename T, bool has_nulls>
Expand Down Expand Up @@ -484,6 +508,13 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
return const_iterator<T>{count_it{size()}, detail::value_accessor<T>{*this}};
}

/**
* @brief optional iterator for navigating this column
*/
template <typename T, typename contains_nulls_mode>
using const_optional_iterator =
thrust::transform_iterator<detail::optional_accessor<T, contains_nulls_mode>, count_it>;

/**
* @brief Pair iterator for navigating this column
*/
Expand All @@ -500,6 +531,124 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
using const_pair_rep_iterator =
thrust::transform_iterator<detail::pair_rep_accessor<T, has_nulls>, count_it>;

/**
* @brief Return an optional iterator to the first element of the column.
*
* Dereferencing the returned iterator returns a `thrust::optional<T>`.
*
* When the element of an iterator contextually converted to bool, the conversion returns true
* if the object contains a value and false if it does not contain a value.
*
* optional_begin with mode `DYNAMIC` defers the assumption of nullability to
* runtime, with the user stating on construction of the iterator if column has nulls.
* `DYNAMIC` mode is nice when an algorithm is going to execute on mutliple
* iterators and you don't want to compile all the combinations of iterator types
*
* Example:
*
* \code{.cpp}
* template<typename T>
* void some_function(cudf::column_view<T> const& col_view){
* auto d_col = cudf::column_device_view::create(col_view);
* // Create a `DYNAMIC` optional iterator
* auto optional_iterator = d_col->optional_begin<T>(cudf::contains_nulls::DYNAMIC{},
* col_view.has_nulls());
* }
* \endcode
*
* This function does not participate in overload resolution if
* `column_device_view::has_element_accessor<T>()` is false.
*
* @throws cudf::logic_error if the column is not nullable, and `DYNAMIC` mode used and
* the user has stated nulls exist
* @throws cudf::logic_error if column datatype and Element type mismatch.
*/
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
auto optional_begin(contains_nulls::DYNAMIC, bool has_nulls) const
{
return const_optional_iterator<T, contains_nulls::DYNAMIC>{
count_it{0}, detail::optional_accessor<T, contains_nulls::DYNAMIC>{*this, has_nulls}};
}

/**
* @brief Return an optional iterator to the first element of the column.
*
* Dereferencing the returned iterator returns a `thrust::optional<T>`.
*
* When the element of an iterator contextually converted to bool, the conversion returns true
* if the object contains a value and false if it does not contain a value.
*
* optional_begin with mode `YES` means that the column supports nulls and
* potentially has null values, therefore the optional might not contain a value
*
* Example:
*
* \code{.cpp}
* template<typename T, bool has_nulls>
* void some_function(cudf::column_view<T> const& col_view){
* auto d_col = cudf::column_device_view::create(col_view);
* if constexpr(has_nulls) {
* auto optional_iterator = d_col->optional_begin<T>(cudf::contains_nulls::YES{});
* //use optional_iterator
* } else {
* auto optional_iterator = d_col->optional_begin<T>(cudf::contains_nulls::NO{});
* //use optional_iterator
* }
* }
* \endcode
*
* This function does not participate in overload resolution if
* `column_device_view::has_element_accessor<T>()` is false.
*
* @throws cudf::logic_error if the column is not nullable, and `YES` mode used
* @throws cudf::logic_error if column datatype and Element type mismatch.
*/
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
auto optional_begin(contains_nulls::YES) const
{
return const_optional_iterator<T, contains_nulls::YES>{
count_it{0}, detail::optional_accessor<T, contains_nulls::YES>{*this}};
}

/**
* @brief Return an optional iterator to the first element of the column.
*
* Dereferencing the returned iterator returns a `thrust::optional<T>`.
*
* When the element of an iterator contextually converted to bool, the conversion returns true
* if the object contains a value and false if it does not contain a value.
*
* optional_begin with mode `NO` means that the column has no null values,
* therefore the optional will always contain a value.
*
* Example:
*
* \code{.cpp}
* template<typename T, bool has_nulls>
* void some_function(cudf::column_view<T> const& col_view){
* auto d_col = cudf::column_device_view::create(col_view);
* if constexpr(has_nulls) {
* auto optional_iterator = d_col->optional_begin<T>(cudf::contains_nulls::YES{});
* //use optional_iterator
* } else {
* auto optional_iterator = d_col->optional_begin<T>(cudf::contains_nulls::NO{});
* //use optional_iterator
* }
* }
* \endcode
*
* This function does not participate in overload resolution if
* `column_device_view::has_element_accessor<T>()` is false.
*
* @throws cudf::logic_error if column datatype and Element type mismatch.
*/
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
auto optional_begin(contains_nulls::NO) const
{
return const_optional_iterator<T, contains_nulls::NO>{
count_it{0}, detail::optional_accessor<T, contains_nulls::NO>{*this}};
}

/**
* @brief Return a pair iterator to the first element of the column.
*
Expand Down Expand Up @@ -558,6 +707,63 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
detail::pair_rep_accessor<T, has_nulls>{*this}};
}

/**
* @brief Return an optional iterator to the element following the last element of
* the column.
*
* Dereferencing the returned iterator returns a `thrust::optional<T>`.
*
* This function does not participate in overload resolution if
* `column_device_view::has_element_accessor<T>()` is false.
*
* @throws cudf::logic_error if the column is not nullable, and `DYNAMIC` mode used and
* the user has stated nulls exist
* @throws cudf::logic_error if column datatype and Element type mismatch.
*/
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
auto optional_end(contains_nulls::DYNAMIC, bool has_nulls) const
{
return const_optional_iterator<T, contains_nulls::DYNAMIC>{
count_it{size()}, detail::optional_accessor<T, contains_nulls::DYNAMIC>{*this, has_nulls}};
}

/**
* @brief Return an optional iterator to the element following the last element of
* the column.
*
* Dereferencing the returned iterator returns a `thrust::optional<T>`.
*
* This function does not participate in overload resolution if
* `column_device_view::has_element_accessor<T>()` is false.
*
* @throws cudf::logic_error if the column is not nullable, and `YES` mode used
* @throws cudf::logic_error if column datatype and Element type mismatch.
*/
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
auto optional_end(contains_nulls::YES) const
{
return const_optional_iterator<T, contains_nulls::YES>{
count_it{size()}, detail::optional_accessor<T, contains_nulls::YES>{*this}};
}

/**
* @brief Return an optional iterator to the element following the last element of
* the column.
*
* Dereferencing the returned iterator returns a `thrust::optional<T>`.
*
* This function does not participate in overload resolution if
* `column_device_view::has_element_accessor<T>()` is false.
*
* @throws cudf::logic_error if column datatype and Element type mismatch.
*/
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
auto optional_end(contains_nulls::NO) const
{
return const_optional_iterator<T, contains_nulls::NO>{
count_it{size()}, detail::optional_accessor<T, contains_nulls::NO>{*this}};
}

/**
* @brief Return a pair iterator to the element following the last element of
* the column.
Expand Down Expand Up @@ -999,6 +1205,82 @@ struct value_accessor {
__device__ T operator()(cudf::size_type i) const { return col.element<T>(i); }
};

/**
* @brief optional accessor of a column
*
*
* The optional_accessor always returns a thrust::optional of column[i]. The validity
* of the optional is determined by the contains_nulls_mode template parameter
* which has the following modes:
*
* - `YES` means that the column supports nulls and has null values, therefore
* the optional might be valid or invalid
*
* - `NO` the user has attested that the column has no null values,
* no checks will occur and `thrust::optional{column[i]}` will be
* return for each `i`.
*
* - `DYNAMIC` defers the assumption of nullability to runtime with the users stating
* on construction of the iterator if column has nulls.
* When `with_nulls=true` the return value validity will be determined if column[i]
* is not null.
* When `with_nulls=false` the return value will always be valid
*
* @throws cudf::logic_error if column datatype and template T type mismatch.
* @throws cudf::logic_error if the column is not nullable, and `with_nulls=true`
*
*
* @tparam T The type of elements in the column
* @tparam contains_nulls_mode Specifies if nulls are checked at runtime or compile time.
*/
template <typename T, typename contains_nulls_mode>
struct optional_accessor {
column_device_view const col; ///< column view of column in device

/**
* @brief constructor
* @param[in] _col column device view of cudf column
*/
optional_accessor(column_device_view const& _col) : col{_col}
{
CUDF_EXPECTS(type_id_matches_device_storage_type<T>(col.type().id()), "the data type mismatch");
}

CUDA_DEVICE_CALLABLE
thrust::optional<T> operator()(cudf::size_type i) const
{
if constexpr (std::is_same_v<contains_nulls_mode, contains_nulls::YES>) {
return (col.is_valid_nocheck(i)) ? thrust::optional<T>{col.element<T>(i)}
: thrust::optional<T>{thrust::nullopt};
}
return thrust::optional<T>{col.element<T>(i)};
}
};

template <typename T>
struct optional_accessor<T, contains_nulls::DYNAMIC> {
column_device_view const col; ///< column view of column in device
bool has_nulls;

/**
* @brief constructor
* @param[in] _col column device view of cudf column
*/
optional_accessor(column_device_view const& _col, bool with_nulls)
: col{_col}, has_nulls{with_nulls}
{
CUDF_EXPECTS(type_id_matches_device_storage_type<T>(col.type().id()), "the data type mismatch");
if (with_nulls) { CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column."); }
}

CUDA_DEVICE_CALLABLE
thrust::optional<T> operator()(cudf::size_type i) const
{
return (has_nulls and col.is_null_nocheck(i)) ? thrust::optional<T>{thrust::nullopt}
: thrust::optional<T>{col.element<T>(i)};
}
};

/**
* @brief pair accessor of column with/without null bitmask
* A unary functor returns pair with scalar value at `id` and boolean validity
Expand Down
Loading

0 comments on commit 8f0b933

Please sign in to comment.