Skip to content

Commit

Permalink
Refactor host device macros (#9797)
Browse files Browse the repository at this point in the history
This PR is a follow-up to #9530 to standardize the names of the macros used for the `__host__ __device__` attributes. Aliases for `__device__` and combinations with inlining have been removed, and the only remaining macro is `CUDF_HOST_DEVICE` which is `__host__ __device__` in device code and empty in host code. See #9530 (comment) for more discussion.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Jake Hemstad (https://github.com/jrhemstad)
  - Bradley Dice (https://github.com/bdice)
  - Mark Harris (https://github.com/harrism)

URL: #9797
  • Loading branch information
vyasr authored Jan 10, 2022
1 parent cee55fd commit 496aa47
Show file tree
Hide file tree
Showing 41 changed files with 610 additions and 647 deletions.
93 changes: 45 additions & 48 deletions cpp/include/cudf/ast/detail/expression_evaluator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,8 @@ struct expression_result {
/**
* Helper function to get the subclass type to dispatch methods to.
*/
CUDA_DEVICE_CALLABLE Subclass& subclass() { return static_cast<Subclass&>(*this); }
CUDA_DEVICE_CALLABLE Subclass const& subclass() const
{
return static_cast<Subclass const&>(*this);
}
__device__ inline Subclass& subclass() { return static_cast<Subclass&>(*this); }
__device__ inline Subclass const& subclass() const { return static_cast<Subclass const&>(*this); }

// TODO: The index is ignored by the value subclass, but is included in this
// signature because it is required by the implementation in the template
Expand All @@ -73,15 +70,15 @@ struct expression_result {
// used, whereas passing it as a parameter keeps it in registers for fast
// access at the point where indexing occurs.
template <typename Element>
CUDA_DEVICE_CALLABLE void set_value(cudf::size_type index,
possibly_null_value_t<Element, has_nulls> const& result)
__device__ inline void set_value(cudf::size_type index,
possibly_null_value_t<Element, has_nulls> const& result)
{
subclass().template set_value<Element>(index, result);
}

CUDA_DEVICE_CALLABLE bool is_valid() const { return subclass().is_valid(); }
__device__ inline bool is_valid() const { return subclass().is_valid(); }

CUDA_DEVICE_CALLABLE T value() const { return subclass().value(); }
__device__ inline T value() const { return subclass().value(); }
};

/**
Expand All @@ -97,11 +94,11 @@ struct expression_result {
template <typename T, bool has_nulls>
struct value_expression_result
: public expression_result<value_expression_result<T, has_nulls>, T, has_nulls> {
CUDA_DEVICE_CALLABLE value_expression_result() {}
__device__ inline value_expression_result() {}

template <typename Element>
CUDA_DEVICE_CALLABLE void set_value(cudf::size_type index,
possibly_null_value_t<Element, has_nulls> const& result)
__device__ inline void set_value(cudf::size_type index,
possibly_null_value_t<Element, has_nulls> const& result)
{
if constexpr (std::is_same_v<Element, T>) {
_obj = result;
Expand All @@ -113,7 +110,7 @@ struct value_expression_result
/**
* @brief Returns true if the underlying data is valid and false otherwise.
*/
CUDA_DEVICE_CALLABLE bool is_valid() const
__device__ inline bool is_valid() const
{
if constexpr (has_nulls) { return _obj.has_value(); }
return true;
Expand All @@ -125,7 +122,7 @@ struct value_expression_result
* If the underlying data is not valid, behavior is undefined. Callers should
* use is_valid to check for validity before accessing the value.
*/
CUDA_DEVICE_CALLABLE T value() const
__device__ inline T value() const
{
// Using two separate constexprs silences compiler warnings, whereas an
// if/else does not. An unconditional return is not ignored by the compiler
Expand Down Expand Up @@ -156,13 +153,11 @@ struct mutable_column_expression_result
: public expression_result<mutable_column_expression_result<has_nulls>,
mutable_column_device_view,
has_nulls> {
CUDA_DEVICE_CALLABLE mutable_column_expression_result(mutable_column_device_view& obj) : _obj(obj)
{
}
__device__ inline mutable_column_expression_result(mutable_column_device_view& obj) : _obj(obj) {}

template <typename Element>
CUDA_DEVICE_CALLABLE void set_value(cudf::size_type index,
possibly_null_value_t<Element, has_nulls> const& result)
__device__ inline void set_value(cudf::size_type index,
possibly_null_value_t<Element, has_nulls> const& result)
{
if constexpr (has_nulls) {
if (result.has_value()) {
Expand All @@ -179,7 +174,7 @@ struct mutable_column_expression_result
/**
* @brief Not implemented for this specialization.
*/
CUDA_DEVICE_CALLABLE bool is_valid() const
__device__ inline bool is_valid() const
{
// Not implemented since it would require modifying the API in the parent class to accept an
// index.
Expand All @@ -191,7 +186,7 @@ struct mutable_column_expression_result
/**
* @brief Not implemented for this specialization.
*/
CUDA_DEVICE_CALLABLE mutable_column_device_view value() const
__device__ inline mutable_column_device_view value() const
{
// Not implemented since it would require modifying the API in the parent class to accept an
// index.
Expand Down Expand Up @@ -222,7 +217,7 @@ struct single_dispatch_binary_operator {
* @param args Forwarded arguments to `operator()` of `f`.
*/
template <typename LHS, typename F, typename... Ts>
CUDA_DEVICE_CALLABLE auto operator()(F&& f, Ts&&... args)
__device__ inline auto operator()(F&& f, Ts&&... args)
{
f.template operator()<LHS, LHS>(std::forward<Ts>(args)...);
}
Expand All @@ -247,9 +242,9 @@ struct expression_evaluator {
* storing intermediates.
*/
CUDA_DEVICE_CALLABLE expression_evaluator(table_device_view const& left,
table_device_view const& right,
expression_device_view const& plan)
__device__ inline expression_evaluator(table_device_view const& left,
table_device_view const& right,
expression_device_view const& plan)
: left(left), right(right), plan(plan)
{
}
Expand All @@ -262,8 +257,8 @@ struct expression_evaluator {
* @param thread_intermediate_storage Pointer to this thread's portion of shared memory for
* storing intermediates.
*/
CUDA_DEVICE_CALLABLE expression_evaluator(table_device_view const& table,
expression_device_view const& plan)
__device__ inline expression_evaluator(table_device_view const& table,
expression_device_view const& plan)
: expression_evaluator(table, table, plan)
{
}
Expand All @@ -282,7 +277,7 @@ struct expression_evaluator {
* @return Element The type- and null-resolved data.
*/
template <typename Element, CUDF_ENABLE_IF(column_device_view::has_element_accessor<Element>())>
CUDA_DEVICE_CALLABLE possibly_null_value_t<Element, has_nulls> resolve_input(
__device__ inline possibly_null_value_t<Element, has_nulls> resolve_input(
detail::device_data_reference const& input_reference,
IntermediateDataType<has_nulls>* thread_intermediate_storage,
cudf::size_type left_row_index,
Expand Down Expand Up @@ -333,7 +328,7 @@ struct expression_evaluator {

template <typename Element,
CUDF_ENABLE_IF(not column_device_view::has_element_accessor<Element>())>
CUDA_DEVICE_CALLABLE possibly_null_value_t<Element, has_nulls> resolve_input(
__device__ inline possibly_null_value_t<Element, has_nulls> resolve_input(
detail::device_data_reference const& device_data_reference,
IntermediateDataType<has_nulls>* thread_intermediate_storage,
cudf::size_type left_row_index,
Expand All @@ -358,7 +353,7 @@ struct expression_evaluator {
* @param op The operator to act with.
*/
template <typename Input, typename ResultSubclass, typename T, bool result_has_nulls>
CUDA_DEVICE_CALLABLE void operator()(
__device__ inline void operator()(
expression_result<ResultSubclass, T, result_has_nulls>& output_object,
cudf::size_type const input_row_index,
detail::device_data_reference const& input,
Expand Down Expand Up @@ -395,7 +390,7 @@ struct expression_evaluator {
* @param op The operator to act with.
*/
template <typename LHS, typename RHS, typename ResultSubclass, typename T, bool result_has_nulls>
CUDA_DEVICE_CALLABLE void operator()(
__device__ inline void operator()(
expression_result<ResultSubclass, T, result_has_nulls>& output_object,
cudf::size_type const left_row_index,
cudf::size_type const right_row_index,
Expand Down Expand Up @@ -431,9 +426,10 @@ struct expression_evaluator {
* @param row_index Row index of all input and output data column(s).
*/
template <typename ResultSubclass, typename T, bool result_has_nulls>
CUDF_DFI void evaluate(expression_result<ResultSubclass, T, result_has_nulls>& output_object,
cudf::size_type const row_index,
IntermediateDataType<has_nulls>* thread_intermediate_storage)
__device__ __forceinline__ void evaluate(
expression_result<ResultSubclass, T, result_has_nulls>& output_object,
cudf::size_type const row_index,
IntermediateDataType<has_nulls>* thread_intermediate_storage)
{
evaluate(output_object, row_index, row_index, row_index, thread_intermediate_storage);
}
Expand All @@ -451,11 +447,12 @@ struct expression_evaluator {
* @param output_row_index The row in the output to insert the result.
*/
template <typename ResultSubclass, typename T, bool result_has_nulls>
CUDF_DFI void evaluate(expression_result<ResultSubclass, T, result_has_nulls>& output_object,
cudf::size_type const left_row_index,
cudf::size_type const right_row_index,
cudf::size_type const output_row_index,
IntermediateDataType<has_nulls>* thread_intermediate_storage)
__device__ __forceinline__ void evaluate(
expression_result<ResultSubclass, T, result_has_nulls>& output_object,
cudf::size_type const left_row_index,
cudf::size_type const right_row_index,
cudf::size_type const output_row_index,
IntermediateDataType<has_nulls>* thread_intermediate_storage)
{
cudf::size_type operator_source_index{0};
for (cudf::size_type operator_index = 0; operator_index < plan.operators.size();
Expand Down Expand Up @@ -517,7 +514,7 @@ struct expression_evaluator {
*/
struct expression_output_handler {
public:
CUDA_DEVICE_CALLABLE expression_output_handler() {}
__device__ inline expression_output_handler() {}

/**
* @brief Resolves an output data reference and assigns result value.
Expand All @@ -539,7 +536,7 @@ struct expression_evaluator {
typename T,
bool result_has_nulls,
CUDF_ENABLE_IF(is_rep_layout_compatible<Element>())>
CUDA_DEVICE_CALLABLE void resolve_output(
__device__ inline void resolve_output(
expression_result<ResultSubclass, T, result_has_nulls>& output_object,
detail::device_data_reference const& device_data_reference,
cudf::size_type const row_index,
Expand All @@ -563,7 +560,7 @@ struct expression_evaluator {
typename T,
bool result_has_nulls,
CUDF_ENABLE_IF(!is_rep_layout_compatible<Element>())>
CUDA_DEVICE_CALLABLE void resolve_output(
__device__ inline void resolve_output(
expression_result<ResultSubclass, T, result_has_nulls>& output_object,
detail::device_data_reference const& device_data_reference,
cudf::size_type const row_index,
Expand All @@ -582,7 +579,7 @@ struct expression_evaluator {
*/
template <typename Input>
struct unary_expression_output_handler : public expression_output_handler {
CUDA_DEVICE_CALLABLE unary_expression_output_handler() {}
__device__ inline unary_expression_output_handler() {}

/**
* @brief Callable to perform a unary operation.
Expand All @@ -602,7 +599,7 @@ struct expression_evaluator {
std::enable_if_t<
detail::is_valid_unary_op<detail::operator_functor<op, has_nulls>,
possibly_null_value_t<Input, has_nulls>>>* = nullptr>
CUDA_DEVICE_CALLABLE void operator()(
__device__ inline void operator()(
expression_result<ResultSubclass, T, result_has_nulls>& output_object,
cudf::size_type const output_row_index,
possibly_null_value_t<Input, has_nulls> const& input,
Expand All @@ -626,7 +623,7 @@ struct expression_evaluator {
std::enable_if_t<
!detail::is_valid_unary_op<detail::operator_functor<op, has_nulls>,
possibly_null_value_t<Input, has_nulls>>>* = nullptr>
CUDA_DEVICE_CALLABLE void operator()(
__device__ inline void operator()(
expression_result<ResultSubclass, T, result_has_nulls>& output_object,
cudf::size_type const output_row_index,
possibly_null_value_t<Input, has_nulls> const& input,
Expand All @@ -645,7 +642,7 @@ struct expression_evaluator {
*/
template <typename LHS, typename RHS>
struct binary_expression_output_handler : public expression_output_handler {
CUDA_DEVICE_CALLABLE binary_expression_output_handler() {}
__device__ inline binary_expression_output_handler() {}

/**
* @brief Callable to perform a binary operation.
Expand All @@ -667,7 +664,7 @@ struct expression_evaluator {
possibly_null_value_t<LHS, has_nulls>,
possibly_null_value_t<RHS, has_nulls>>>* =
nullptr>
CUDA_DEVICE_CALLABLE void operator()(
__device__ inline void operator()(
expression_result<ResultSubclass, T, result_has_nulls>& output_object,
cudf::size_type const output_row_index,
possibly_null_value_t<LHS, has_nulls> const& lhs,
Expand All @@ -693,7 +690,7 @@ struct expression_evaluator {
!detail::is_valid_binary_op<detail::operator_functor<op, has_nulls>,
possibly_null_value_t<LHS, has_nulls>,
possibly_null_value_t<RHS, has_nulls>>>* = nullptr>
CUDA_DEVICE_CALLABLE void operator()(
__device__ inline void operator()(
expression_result<ResultSubclass, T, result_has_nulls>& output_object,
cudf::size_type const output_row_index,
possibly_null_value_t<LHS, has_nulls> const& lhs,
Expand Down
Loading

0 comments on commit 496aa47

Please sign in to comment.