From 496aa47973aa89daf5a3f0b0b76405a8c7f2b92e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 10 Jan 2022 16:33:15 -0600 Subject: [PATCH] Refactor host device macros (#9797) This PR is a follow-up to #9530 to standardize the names of the macros used for the `__host__ __device__` attributes. Aliases for `__device__` and combinations with inlining have been removed, and the only remaining macro is `CUDF_HOST_DEVICE` which is `__host__ __device__` in device code and empty in host code. See https://github.com/rapidsai/cudf/pull/9530#discussion_r739597422 for more discussion. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Jake Hemstad (https://github.com/jrhemstad) - Bradley Dice (https://github.com/bdice) - Mark Harris (https://github.com/harrism) URL: https://github.com/rapidsai/cudf/pull/9797 --- .../cudf/ast/detail/expression_evaluator.cuh | 93 ++++++----- cpp/include/cudf/ast/detail/operators.hpp | 152 +++++++++--------- .../cudf/column/column_device_view.cuh | 13 +- .../cudf/detail/aggregation/aggregation.hpp | 18 +-- cpp/include/cudf/detail/indexalator.cuh | 44 ++--- cpp/include/cudf/detail/iterator.cuh | 22 +-- cpp/include/cudf/detail/null_mask.cuh | 6 +- .../cudf/detail/reduction_operators.cuh | 41 +++-- .../detail/utilities/device_operators.cuh | 28 ++-- .../cudf/detail/utilities/hash_functions.cuh | 100 ++++++------ .../utilities/transform_unary_functions.cuh | 25 ++- cpp/include/cudf/fixed_point/fixed_point.hpp | 126 +++++++-------- cpp/include/cudf/fixed_point/temporary.hpp | 4 +- .../cudf/lists/detail/scatter_helper.cuh | 24 +-- cpp/include/cudf/lists/list_device_view.cuh | 43 +++-- .../cudf/lists/lists_column_device_view.cuh | 12 +- cpp/include/cudf/strings/json.hpp | 4 +- cpp/include/cudf/strings/string_view.cuh | 4 +- cpp/include/cudf/strings/string_view.hpp | 124 +++++++------- cpp/include/cudf/types.hpp | 14 +- cpp/include/cudf/utilities/bit.hpp | 22 +-- .../cudf/utilities/type_dispatcher.hpp | 16 +- cpp/include/cudf/wrappers/dictionary.hpp | 36 ++--- cpp/src/binaryop/compiled/binary_ops.cu | 18 +-- cpp/src/binaryop/compiled/binary_ops.cuh | 12 +- cpp/src/binaryop/compiled/operation.cuh | 74 ++++----- cpp/src/datetime/datetime_ops.cu | 16 +- cpp/src/groupby/sort/group_correlation.cu | 2 +- .../sort/group_single_pass_reduction_util.cuh | 2 +- cpp/src/groupby/sort/sort_helper.cu | 3 +- cpp/src/hash/md5_hash.cu | 29 ++-- cpp/src/io/orc/timezone.cuh | 8 +- cpp/src/quantiles/quantiles_util.hpp | 33 ++-- cpp/src/reductions/scan/scan_inclusive.cu | 3 +- cpp/src/rolling/rolling_detail.hpp | 8 +- cpp/src/strings/json/json_path.cu | 36 ++--- cpp/src/strings/regex/regex.cuh | 2 +- cpp/src/strings/regex/regex.inl | 10 +- cpp/src/unary/cast_ops.cu | 18 +-- .../optional_iterator_test_numeric.cu | 5 +- .../iterator/pair_iterator_test_numeric.cu | 7 +- 41 files changed, 610 insertions(+), 647 deletions(-) diff --git a/cpp/include/cudf/ast/detail/expression_evaluator.cuh b/cpp/include/cudf/ast/detail/expression_evaluator.cuh index f974088c8e7..0b739482c4d 100644 --- a/cpp/include/cudf/ast/detail/expression_evaluator.cuh +++ b/cpp/include/cudf/ast/detail/expression_evaluator.cuh @@ -57,11 +57,8 @@ struct expression_result { /** * Helper function to get the subclass type to dispatch methods to. */ - CUDA_DEVICE_CALLABLE Subclass& subclass() { return static_cast(*this); } - CUDA_DEVICE_CALLABLE Subclass const& subclass() const - { - return static_cast(*this); - } + __device__ inline Subclass& subclass() { return static_cast(*this); } + __device__ inline Subclass const& subclass() const { return static_cast(*this); } // TODO: The index is ignored by the value subclass, but is included in this // signature because it is required by the implementation in the template @@ -73,15 +70,15 @@ struct expression_result { // used, whereas passing it as a parameter keeps it in registers for fast // access at the point where indexing occurs. template - CUDA_DEVICE_CALLABLE void set_value(cudf::size_type index, - possibly_null_value_t const& result) + __device__ inline void set_value(cudf::size_type index, + possibly_null_value_t const& result) { subclass().template set_value(index, result); } - CUDA_DEVICE_CALLABLE bool is_valid() const { return subclass().is_valid(); } + __device__ inline bool is_valid() const { return subclass().is_valid(); } - CUDA_DEVICE_CALLABLE T value() const { return subclass().value(); } + __device__ inline T value() const { return subclass().value(); } }; /** @@ -97,11 +94,11 @@ struct expression_result { template struct value_expression_result : public expression_result, T, has_nulls> { - CUDA_DEVICE_CALLABLE value_expression_result() {} + __device__ inline value_expression_result() {} template - CUDA_DEVICE_CALLABLE void set_value(cudf::size_type index, - possibly_null_value_t const& result) + __device__ inline void set_value(cudf::size_type index, + possibly_null_value_t const& result) { if constexpr (std::is_same_v) { _obj = result; @@ -113,7 +110,7 @@ struct value_expression_result /** * @brief Returns true if the underlying data is valid and false otherwise. */ - CUDA_DEVICE_CALLABLE bool is_valid() const + __device__ inline bool is_valid() const { if constexpr (has_nulls) { return _obj.has_value(); } return true; @@ -125,7 +122,7 @@ struct value_expression_result * If the underlying data is not valid, behavior is undefined. Callers should * use is_valid to check for validity before accessing the value. */ - CUDA_DEVICE_CALLABLE T value() const + __device__ inline T value() const { // Using two separate constexprs silences compiler warnings, whereas an // if/else does not. An unconditional return is not ignored by the compiler @@ -156,13 +153,11 @@ struct mutable_column_expression_result : public expression_result, mutable_column_device_view, has_nulls> { - CUDA_DEVICE_CALLABLE mutable_column_expression_result(mutable_column_device_view& obj) : _obj(obj) - { - } + __device__ inline mutable_column_expression_result(mutable_column_device_view& obj) : _obj(obj) {} template - CUDA_DEVICE_CALLABLE void set_value(cudf::size_type index, - possibly_null_value_t const& result) + __device__ inline void set_value(cudf::size_type index, + possibly_null_value_t const& result) { if constexpr (has_nulls) { if (result.has_value()) { @@ -179,7 +174,7 @@ struct mutable_column_expression_result /** * @brief Not implemented for this specialization. */ - CUDA_DEVICE_CALLABLE bool is_valid() const + __device__ inline bool is_valid() const { // Not implemented since it would require modifying the API in the parent class to accept an // index. @@ -191,7 +186,7 @@ struct mutable_column_expression_result /** * @brief Not implemented for this specialization. */ - CUDA_DEVICE_CALLABLE mutable_column_device_view value() const + __device__ inline mutable_column_device_view value() const { // Not implemented since it would require modifying the API in the parent class to accept an // index. @@ -222,7 +217,7 @@ struct single_dispatch_binary_operator { * @param args Forwarded arguments to `operator()` of `f`. */ template - CUDA_DEVICE_CALLABLE auto operator()(F&& f, Ts&&... args) + __device__ inline auto operator()(F&& f, Ts&&... args) { f.template operator()(std::forward(args)...); } @@ -247,9 +242,9 @@ struct expression_evaluator { * storing intermediates. */ - CUDA_DEVICE_CALLABLE expression_evaluator(table_device_view const& left, - table_device_view const& right, - expression_device_view const& plan) + __device__ inline expression_evaluator(table_device_view const& left, + table_device_view const& right, + expression_device_view const& plan) : left(left), right(right), plan(plan) { } @@ -262,8 +257,8 @@ struct expression_evaluator { * @param thread_intermediate_storage Pointer to this thread's portion of shared memory for * storing intermediates. */ - CUDA_DEVICE_CALLABLE expression_evaluator(table_device_view const& table, - expression_device_view const& plan) + __device__ inline expression_evaluator(table_device_view const& table, + expression_device_view const& plan) : expression_evaluator(table, table, plan) { } @@ -282,7 +277,7 @@ struct expression_evaluator { * @return Element The type- and null-resolved data. */ template ())> - CUDA_DEVICE_CALLABLE possibly_null_value_t resolve_input( + __device__ inline possibly_null_value_t resolve_input( detail::device_data_reference const& input_reference, IntermediateDataType* thread_intermediate_storage, cudf::size_type left_row_index, @@ -333,7 +328,7 @@ struct expression_evaluator { template ())> - CUDA_DEVICE_CALLABLE possibly_null_value_t resolve_input( + __device__ inline possibly_null_value_t resolve_input( detail::device_data_reference const& device_data_reference, IntermediateDataType* thread_intermediate_storage, cudf::size_type left_row_index, @@ -358,7 +353,7 @@ struct expression_evaluator { * @param op The operator to act with. */ template - CUDA_DEVICE_CALLABLE void operator()( + __device__ inline void operator()( expression_result& output_object, cudf::size_type const input_row_index, detail::device_data_reference const& input, @@ -395,7 +390,7 @@ struct expression_evaluator { * @param op The operator to act with. */ template - CUDA_DEVICE_CALLABLE void operator()( + __device__ inline void operator()( expression_result& output_object, cudf::size_type const left_row_index, cudf::size_type const right_row_index, @@ -431,9 +426,10 @@ struct expression_evaluator { * @param row_index Row index of all input and output data column(s). */ template - CUDF_DFI void evaluate(expression_result& output_object, - cudf::size_type const row_index, - IntermediateDataType* thread_intermediate_storage) + __device__ __forceinline__ void evaluate( + expression_result& output_object, + cudf::size_type const row_index, + IntermediateDataType* thread_intermediate_storage) { evaluate(output_object, row_index, row_index, row_index, thread_intermediate_storage); } @@ -451,11 +447,12 @@ struct expression_evaluator { * @param output_row_index The row in the output to insert the result. */ template - CUDF_DFI void evaluate(expression_result& output_object, - cudf::size_type const left_row_index, - cudf::size_type const right_row_index, - cudf::size_type const output_row_index, - IntermediateDataType* thread_intermediate_storage) + __device__ __forceinline__ void evaluate( + expression_result& output_object, + cudf::size_type const left_row_index, + cudf::size_type const right_row_index, + cudf::size_type const output_row_index, + IntermediateDataType* thread_intermediate_storage) { cudf::size_type operator_source_index{0}; for (cudf::size_type operator_index = 0; operator_index < plan.operators.size(); @@ -517,7 +514,7 @@ struct expression_evaluator { */ struct expression_output_handler { public: - CUDA_DEVICE_CALLABLE expression_output_handler() {} + __device__ inline expression_output_handler() {} /** * @brief Resolves an output data reference and assigns result value. @@ -539,7 +536,7 @@ struct expression_evaluator { typename T, bool result_has_nulls, CUDF_ENABLE_IF(is_rep_layout_compatible())> - CUDA_DEVICE_CALLABLE void resolve_output( + __device__ inline void resolve_output( expression_result& output_object, detail::device_data_reference const& device_data_reference, cudf::size_type const row_index, @@ -563,7 +560,7 @@ struct expression_evaluator { typename T, bool result_has_nulls, CUDF_ENABLE_IF(!is_rep_layout_compatible())> - CUDA_DEVICE_CALLABLE void resolve_output( + __device__ inline void resolve_output( expression_result& output_object, detail::device_data_reference const& device_data_reference, cudf::size_type const row_index, @@ -582,7 +579,7 @@ struct expression_evaluator { */ template struct unary_expression_output_handler : public expression_output_handler { - CUDA_DEVICE_CALLABLE unary_expression_output_handler() {} + __device__ inline unary_expression_output_handler() {} /** * @brief Callable to perform a unary operation. @@ -602,7 +599,7 @@ struct expression_evaluator { std::enable_if_t< detail::is_valid_unary_op, possibly_null_value_t>>* = nullptr> - CUDA_DEVICE_CALLABLE void operator()( + __device__ inline void operator()( expression_result& output_object, cudf::size_type const output_row_index, possibly_null_value_t const& input, @@ -626,7 +623,7 @@ struct expression_evaluator { std::enable_if_t< !detail::is_valid_unary_op, possibly_null_value_t>>* = nullptr> - CUDA_DEVICE_CALLABLE void operator()( + __device__ inline void operator()( expression_result& output_object, cudf::size_type const output_row_index, possibly_null_value_t const& input, @@ -645,7 +642,7 @@ struct expression_evaluator { */ template struct binary_expression_output_handler : public expression_output_handler { - CUDA_DEVICE_CALLABLE binary_expression_output_handler() {} + __device__ inline binary_expression_output_handler() {} /** * @brief Callable to perform a binary operation. @@ -667,7 +664,7 @@ struct expression_evaluator { possibly_null_value_t, possibly_null_value_t>>* = nullptr> - CUDA_DEVICE_CALLABLE void operator()( + __device__ inline void operator()( expression_result& output_object, cudf::size_type const output_row_index, possibly_null_value_t const& lhs, @@ -693,7 +690,7 @@ struct expression_evaluator { !detail::is_valid_binary_op, possibly_null_value_t, possibly_null_value_t>>* = nullptr> - CUDA_DEVICE_CALLABLE void operator()( + __device__ inline void operator()( expression_result& output_object, cudf::size_type const output_row_index, possibly_null_value_t const& lhs, diff --git a/cpp/include/cudf/ast/detail/operators.hpp b/cpp/include/cudf/ast/detail/operators.hpp index cffefcaf9cd..d7fd109f12a 100644 --- a/cpp/include/cudf/ast/detail/operators.hpp +++ b/cpp/include/cudf/ast/detail/operators.hpp @@ -49,9 +49,7 @@ constexpr bool is_valid_unary_op = cuda::std::is_invocable::value; * @param args Forwarded arguments to `operator()` of `f`. */ template -CUDA_HOST_DEVICE_CALLABLE constexpr void ast_operator_dispatcher(ast_operator op, - F&& f, - Ts&&... args) +CUDF_HOST_DEVICE inline constexpr void ast_operator_dispatcher(ast_operator op, F&& f, Ts&&... args) { switch (op) { case ast_operator::ADD: @@ -234,7 +232,7 @@ struct operator_functor { static constexpr auto arity{2}; template - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) -> decltype(lhs + rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(lhs + rhs) { return lhs + rhs; } @@ -245,7 +243,7 @@ struct operator_functor { static constexpr auto arity{2}; template - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) -> decltype(lhs - rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(lhs - rhs) { return lhs - rhs; } @@ -256,7 +254,7 @@ struct operator_functor { static constexpr auto arity{2}; template - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) -> decltype(lhs * rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(lhs * rhs) { return lhs * rhs; } @@ -267,7 +265,7 @@ struct operator_functor { static constexpr auto arity{2}; template - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) -> decltype(lhs / rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(lhs / rhs) { return lhs / rhs; } @@ -278,7 +276,7 @@ struct operator_functor { static constexpr auto arity{2}; template - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(static_cast(lhs) / static_cast(rhs)) { return static_cast(lhs) / static_cast(rhs); @@ -290,7 +288,7 @@ struct operator_functor { static constexpr auto arity{2}; template - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(floor(static_cast(lhs) / static_cast(rhs))) { return floor(static_cast(lhs) / static_cast(rhs)); @@ -305,7 +303,7 @@ struct operator_functor { typename RHS, typename CommonType = std::common_type_t, std::enable_if_t::value>* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(static_cast(lhs) % static_cast(rhs)) { return static_cast(lhs) % static_cast(rhs); @@ -315,7 +313,7 @@ struct operator_functor { typename RHS, typename CommonType = std::common_type_t, std::enable_if_t>* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(fmodf(static_cast(lhs), static_cast(rhs))) { return fmodf(static_cast(lhs), static_cast(rhs)); @@ -325,7 +323,7 @@ struct operator_functor { typename RHS, typename CommonType = std::common_type_t, std::enable_if_t>* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(fmod(static_cast(lhs), static_cast(rhs))) { return fmod(static_cast(lhs), static_cast(rhs)); @@ -340,7 +338,7 @@ struct operator_functor { typename RHS, typename CommonType = std::common_type_t, std::enable_if_t::value>* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(((static_cast(lhs) % static_cast(rhs)) + static_cast(rhs)) % static_cast(rhs)) @@ -354,7 +352,7 @@ struct operator_functor { typename RHS, typename CommonType = std::common_type_t, std::enable_if_t>* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(fmodf(fmodf(static_cast(lhs), static_cast(rhs)) + static_cast(rhs), static_cast(rhs))) @@ -368,7 +366,7 @@ struct operator_functor { typename RHS, typename CommonType = std::common_type_t, std::enable_if_t>* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(fmod(fmod(static_cast(lhs), static_cast(rhs)) + static_cast(rhs), static_cast(rhs))) @@ -384,7 +382,7 @@ struct operator_functor { static constexpr auto arity{2}; template - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) -> decltype(std::pow(lhs, rhs)) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(std::pow(lhs, rhs)) { return std::pow(lhs, rhs); } @@ -395,7 +393,7 @@ struct operator_functor { static constexpr auto arity{2}; template - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) -> decltype(lhs == rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(lhs == rhs) { return lhs == rhs; } @@ -412,7 +410,7 @@ struct operator_functor { static constexpr auto arity{2}; template - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) -> decltype(lhs != rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(lhs != rhs) { return lhs != rhs; } @@ -423,7 +421,7 @@ struct operator_functor { static constexpr auto arity{2}; template - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) -> decltype(lhs < rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(lhs < rhs) { return lhs < rhs; } @@ -434,7 +432,7 @@ struct operator_functor { static constexpr auto arity{2}; template - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) -> decltype(lhs > rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(lhs > rhs) { return lhs > rhs; } @@ -445,7 +443,7 @@ struct operator_functor { static constexpr auto arity{2}; template - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) -> decltype(lhs <= rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(lhs <= rhs) { return lhs <= rhs; } @@ -456,7 +454,7 @@ struct operator_functor { static constexpr auto arity{2}; template - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) -> decltype(lhs >= rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(lhs >= rhs) { return lhs >= rhs; } @@ -467,7 +465,7 @@ struct operator_functor { static constexpr auto arity{2}; template - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) -> decltype(lhs & rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(lhs & rhs) { return lhs & rhs; } @@ -478,7 +476,7 @@ struct operator_functor { static constexpr auto arity{2}; template - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) -> decltype(lhs | rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(lhs | rhs) { return lhs | rhs; } @@ -489,7 +487,7 @@ struct operator_functor { static constexpr auto arity{2}; template - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) -> decltype(lhs ^ rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(lhs ^ rhs) { return lhs ^ rhs; } @@ -500,7 +498,7 @@ struct operator_functor { static constexpr auto arity{2}; template - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) -> decltype(lhs && rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(lhs && rhs) { return lhs && rhs; } @@ -517,7 +515,7 @@ struct operator_functor { static constexpr auto arity{2}; template - CUDA_DEVICE_CALLABLE auto operator()(LHS lhs, RHS rhs) -> decltype(lhs || rhs) + __device__ inline auto operator()(LHS lhs, RHS rhs) -> decltype(lhs || rhs) { return lhs || rhs; } @@ -534,7 +532,7 @@ struct operator_functor { static constexpr auto arity{1}; template - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(input) + __device__ inline auto operator()(InputT input) -> decltype(input) { return input; } @@ -545,7 +543,7 @@ struct operator_functor { static constexpr auto arity{1}; template ::value>* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(std::sin(input)) + __device__ inline auto operator()(InputT input) -> decltype(std::sin(input)) { return std::sin(input); } @@ -556,7 +554,7 @@ struct operator_functor { static constexpr auto arity{1}; template ::value>* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(std::cos(input)) + __device__ inline auto operator()(InputT input) -> decltype(std::cos(input)) { return std::cos(input); } @@ -567,7 +565,7 @@ struct operator_functor { static constexpr auto arity{1}; template ::value>* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(std::tan(input)) + __device__ inline auto operator()(InputT input) -> decltype(std::tan(input)) { return std::tan(input); } @@ -578,7 +576,7 @@ struct operator_functor { static constexpr auto arity{1}; template ::value>* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(std::asin(input)) + __device__ inline auto operator()(InputT input) -> decltype(std::asin(input)) { return std::asin(input); } @@ -589,7 +587,7 @@ struct operator_functor { static constexpr auto arity{1}; template ::value>* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(std::acos(input)) + __device__ inline auto operator()(InputT input) -> decltype(std::acos(input)) { return std::acos(input); } @@ -600,7 +598,7 @@ struct operator_functor { static constexpr auto arity{1}; template ::value>* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(std::atan(input)) + __device__ inline auto operator()(InputT input) -> decltype(std::atan(input)) { return std::atan(input); } @@ -611,7 +609,7 @@ struct operator_functor { static constexpr auto arity{1}; template ::value>* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(std::sinh(input)) + __device__ inline auto operator()(InputT input) -> decltype(std::sinh(input)) { return std::sinh(input); } @@ -622,7 +620,7 @@ struct operator_functor { static constexpr auto arity{1}; template ::value>* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(std::cosh(input)) + __device__ inline auto operator()(InputT input) -> decltype(std::cosh(input)) { return std::cosh(input); } @@ -633,7 +631,7 @@ struct operator_functor { static constexpr auto arity{1}; template ::value>* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(std::tanh(input)) + __device__ inline auto operator()(InputT input) -> decltype(std::tanh(input)) { return std::tanh(input); } @@ -644,7 +642,7 @@ struct operator_functor { static constexpr auto arity{1}; template ::value>* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(std::asinh(input)) + __device__ inline auto operator()(InputT input) -> decltype(std::asinh(input)) { return std::asinh(input); } @@ -655,7 +653,7 @@ struct operator_functor { static constexpr auto arity{1}; template ::value>* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(std::acosh(input)) + __device__ inline auto operator()(InputT input) -> decltype(std::acosh(input)) { return std::acosh(input); } @@ -666,7 +664,7 @@ struct operator_functor { static constexpr auto arity{1}; template ::value>* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(std::atanh(input)) + __device__ inline auto operator()(InputT input) -> decltype(std::atanh(input)) { return std::atanh(input); } @@ -677,7 +675,7 @@ struct operator_functor { static constexpr auto arity{1}; template - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(std::exp(input)) + __device__ inline auto operator()(InputT input) -> decltype(std::exp(input)) { return std::exp(input); } @@ -688,7 +686,7 @@ struct operator_functor { static constexpr auto arity{1}; template - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(std::log(input)) + __device__ inline auto operator()(InputT input) -> decltype(std::log(input)) { return std::log(input); } @@ -699,7 +697,7 @@ struct operator_functor { static constexpr auto arity{1}; template - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(std::sqrt(input)) + __device__ inline auto operator()(InputT input) -> decltype(std::sqrt(input)) { return std::sqrt(input); } @@ -710,7 +708,7 @@ struct operator_functor { static constexpr auto arity{1}; template - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(std::cbrt(input)) + __device__ inline auto operator()(InputT input) -> decltype(std::cbrt(input)) { return std::cbrt(input); } @@ -721,7 +719,7 @@ struct operator_functor { static constexpr auto arity{1}; template - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(std::ceil(input)) + __device__ inline auto operator()(InputT input) -> decltype(std::ceil(input)) { return std::ceil(input); } @@ -732,7 +730,7 @@ struct operator_functor { static constexpr auto arity{1}; template - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(std::floor(input)) + __device__ inline auto operator()(InputT input) -> decltype(std::floor(input)) { return std::floor(input); } @@ -744,13 +742,13 @@ struct operator_functor { // Only accept signed or unsigned types (both require is_arithmetic to be true) template ::value>* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(std::abs(input)) + __device__ inline auto operator()(InputT input) -> decltype(std::abs(input)) { return std::abs(input); } template ::value>* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(input) + __device__ inline auto operator()(InputT input) -> decltype(input) { return input; } @@ -761,7 +759,7 @@ struct operator_functor { static constexpr auto arity{1}; template - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(std::rint(input)) + __device__ inline auto operator()(InputT input) -> decltype(std::rint(input)) { return std::rint(input); } @@ -772,7 +770,7 @@ struct operator_functor { static constexpr auto arity{1}; template - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(~input) + __device__ inline auto operator()(InputT input) -> decltype(~input) { return ~input; } @@ -783,7 +781,7 @@ struct operator_functor { static constexpr auto arity{1}; template - CUDA_DEVICE_CALLABLE auto operator()(InputT input) -> decltype(!input) + __device__ inline auto operator()(InputT input) -> decltype(!input) { return !input; } @@ -793,7 +791,7 @@ template struct cast { static constexpr auto arity{1}; template - CUDA_DEVICE_CALLABLE auto operator()(From f) -> decltype(static_cast(f)) + __device__ inline auto operator()(From f) -> decltype(static_cast(f)) { return static_cast(f); } @@ -822,7 +820,7 @@ struct operator_functor { typename RHS, std::size_t arity_placeholder = arity, std::enable_if_t* = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(LHS const lhs, RHS const rhs) + __device__ inline auto operator()(LHS const lhs, RHS const rhs) -> possibly_null_value_t { using Out = possibly_null_value_t; @@ -832,7 +830,7 @@ struct operator_functor { template * = nullptr> - CUDA_DEVICE_CALLABLE auto operator()(Input const input) + __device__ inline auto operator()(Input const input) -> possibly_null_value_t { using Out = possibly_null_value_t; @@ -848,7 +846,7 @@ struct operator_functor { static constexpr auto arity = NonNullOperator::arity; template - CUDA_DEVICE_CALLABLE auto operator()(LHS const lhs, RHS const rhs) + __device__ inline auto operator()(LHS const lhs, RHS const rhs) -> possibly_null_value_t { // Case 1: Neither is null, so the output is given by the operation. @@ -869,7 +867,7 @@ struct operator_functor { static constexpr auto arity = NonNullOperator::arity; template - CUDA_DEVICE_CALLABLE auto operator()(LHS const lhs, RHS const rhs) + __device__ inline auto operator()(LHS const lhs, RHS const rhs) -> possibly_null_value_t { // Case 1: Neither is null, so the output is given by the operation. @@ -892,7 +890,7 @@ struct operator_functor { static constexpr auto arity = NonNullOperator::arity; template - CUDA_DEVICE_CALLABLE auto operator()(LHS const lhs, RHS const rhs) + __device__ inline auto operator()(LHS const lhs, RHS const rhs) -> possibly_null_value_t { // Case 1: Neither is null, so the output is given by the operation. @@ -922,7 +920,7 @@ struct single_dispatch_binary_operator_types { typename F, typename... Ts, std::enable_if_t>* = nullptr> - CUDA_HOST_DEVICE_CALLABLE void operator()(F&& f, Ts&&... args) + CUDF_HOST_DEVICE inline void operator()(F&& f, Ts&&... args) { f.template operator()(std::forward(args)...); } @@ -931,7 +929,7 @@ struct single_dispatch_binary_operator_types { typename F, typename... Ts, std::enable_if_t>* = nullptr> - CUDA_HOST_DEVICE_CALLABLE void operator()(F&& f, Ts&&... args) + CUDF_HOST_DEVICE inline void operator()(F&& f, Ts&&... args) { #ifndef __CUDA_ARCH__ CUDF_FAIL("Invalid binary operation."); @@ -960,10 +958,10 @@ struct type_dispatch_binary_op { * @param args Forwarded arguments to `operator()` of `f`. */ template - CUDA_HOST_DEVICE_CALLABLE void operator()(cudf::data_type lhs_type, - cudf::data_type rhs_type, - F&& f, - Ts&&... args) + CUDF_HOST_DEVICE inline void operator()(cudf::data_type lhs_type, + cudf::data_type rhs_type, + F&& f, + Ts&&... args) { // Single dispatch (assume lhs_type == rhs_type) type_dispatcher( @@ -986,7 +984,7 @@ struct type_dispatch_binary_op { * @param args Forwarded arguments to `operator()` of `f`. */ template -CUDA_HOST_DEVICE_CALLABLE constexpr void binary_operator_dispatcher( +CUDF_HOST_DEVICE inline constexpr void binary_operator_dispatcher( ast_operator op, cudf::data_type lhs_type, cudf::data_type rhs_type, F&& f, Ts&&... args) { ast_operator_dispatcher(op, @@ -1011,7 +1009,7 @@ struct dispatch_unary_operator_types { typename F, typename... Ts, std::enable_if_t>* = nullptr> - CUDA_HOST_DEVICE_CALLABLE void operator()(F&& f, Ts&&... args) + CUDF_HOST_DEVICE inline void operator()(F&& f, Ts&&... args) { f.template operator()(std::forward(args)...); } @@ -1020,7 +1018,7 @@ struct dispatch_unary_operator_types { typename F, typename... Ts, std::enable_if_t>* = nullptr> - CUDA_HOST_DEVICE_CALLABLE void operator()(F&& f, Ts&&... args) + CUDF_HOST_DEVICE inline void operator()(F&& f, Ts&&... args) { #ifndef __CUDA_ARCH__ CUDF_FAIL("Invalid unary operation."); @@ -1035,7 +1033,7 @@ struct dispatch_unary_operator_types { */ struct type_dispatch_unary_op { template - CUDA_HOST_DEVICE_CALLABLE void operator()(cudf::data_type input_type, F&& f, Ts&&... args) + CUDF_HOST_DEVICE inline void operator()(cudf::data_type input_type, F&& f, Ts&&... args) { type_dispatcher( input_type, @@ -1056,10 +1054,10 @@ struct type_dispatch_unary_op { * @param args Forwarded arguments to `operator()` of `f`. */ template -CUDA_HOST_DEVICE_CALLABLE constexpr void unary_operator_dispatcher(ast_operator op, - cudf::data_type input_type, - F&& f, - Ts&&... args) +CUDF_HOST_DEVICE inline constexpr void unary_operator_dispatcher(ast_operator op, + cudf::data_type input_type, + F&& f, + Ts&&... args) { ast_operator_dispatcher(op, detail::type_dispatch_unary_op{}, @@ -1084,7 +1082,7 @@ struct return_type_functor { typename LHS, typename RHS, std::enable_if_t>* = nullptr> - CUDA_HOST_DEVICE_CALLABLE void operator()(cudf::data_type& result) + CUDF_HOST_DEVICE inline void operator()(cudf::data_type& result) { using Out = cuda::std::invoke_result_t; result = cudf::data_type(cudf::type_to_id()); @@ -1094,7 +1092,7 @@ struct return_type_functor { typename LHS, typename RHS, std::enable_if_t>* = nullptr> - CUDA_HOST_DEVICE_CALLABLE void operator()(cudf::data_type& result) + CUDF_HOST_DEVICE inline void operator()(cudf::data_type& result) { #ifndef __CUDA_ARCH__ CUDF_FAIL("Invalid binary operation. Return type cannot be determined."); @@ -1113,7 +1111,7 @@ struct return_type_functor { template >* = nullptr> - CUDA_HOST_DEVICE_CALLABLE void operator()(cudf::data_type& result) + CUDF_HOST_DEVICE inline void operator()(cudf::data_type& result) { using Out = cuda::std::invoke_result_t; result = cudf::data_type(cudf::type_to_id()); @@ -1122,7 +1120,7 @@ struct return_type_functor { template >* = nullptr> - CUDA_HOST_DEVICE_CALLABLE void operator()(cudf::data_type& result) + CUDF_HOST_DEVICE inline void operator()(cudf::data_type& result) { #ifndef __CUDA_ARCH__ CUDF_FAIL("Invalid unary operation. Return type cannot be determined."); @@ -1161,7 +1159,7 @@ inline cudf::data_type ast_operator_return_type(ast_operator op, */ struct arity_functor { template - CUDA_HOST_DEVICE_CALLABLE void operator()(cudf::size_type& result) + CUDF_HOST_DEVICE inline void operator()(cudf::size_type& result) { // Arity is not dependent on null handling, so just use the false implementation here. result = operator_functor::arity; @@ -1174,7 +1172,7 @@ struct arity_functor { * @param op Operator used to determine arity. * @return Arity of the operator. */ -CUDA_HOST_DEVICE_CALLABLE cudf::size_type ast_operator_arity(ast_operator op) +CUDF_HOST_DEVICE inline cudf::size_type ast_operator_arity(ast_operator op) { auto result = cudf::size_type(0); ast_operator_dispatcher(op, detail::arity_functor{}, result); diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index a15f20ef52d..b29df1852b2 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -1151,8 +1151,7 @@ struct optional_accessor { if (with_nulls) { CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column."); } } - CUDA_DEVICE_CALLABLE - thrust::optional operator()(cudf::size_type i) const + __device__ inline thrust::optional operator()(cudf::size_type i) const { if (has_nulls) { return (col.is_valid_nocheck(i)) ? thrust::optional{col.element(i)} @@ -1196,8 +1195,7 @@ struct pair_accessor { if (has_nulls) { CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column."); } } - CUDA_DEVICE_CALLABLE - thrust::pair operator()(cudf::size_type i) const + __device__ inline thrust::pair operator()(cudf::size_type i) const { return {col.element(i), (has_nulls ? col.is_valid_nocheck(i) : true)}; } @@ -1237,21 +1235,20 @@ struct pair_rep_accessor { if (has_nulls) { CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column."); } } - CUDA_DEVICE_CALLABLE - thrust::pair operator()(cudf::size_type i) const + __device__ inline thrust::pair operator()(cudf::size_type i) const { return {get_rep(i), (has_nulls ? col.is_valid_nocheck(i) : true)}; } private: template , void>* = nullptr> - CUDA_DEVICE_CALLABLE auto get_rep(cudf::size_type i) const + __device__ inline auto get_rep(cudf::size_type i) const { return col.element(i); } template , void>* = nullptr> - CUDA_DEVICE_CALLABLE auto get_rep(cudf::size_type i) const + __device__ inline auto get_rep(cudf::size_type i) const { return col.element(i).value(); } diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index c2bd7a4893c..3674efbcc7b 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -1324,9 +1324,9 @@ AGG_KIND_MAPPING(aggregation::VARIANCE, var_aggregation); */ #pragma nv_exec_check_disable template -CUDA_HOST_DEVICE_CALLABLE decltype(auto) aggregation_dispatcher(aggregation::Kind k, - F&& f, - Ts&&... args) +CUDF_HOST_DEVICE inline decltype(auto) aggregation_dispatcher(aggregation::Kind k, + F&& f, + Ts&&... args) { switch (k) { case aggregation::SUM: @@ -1418,7 +1418,7 @@ template struct dispatch_aggregation { #pragma nv_exec_check_disable template - CUDA_HOST_DEVICE_CALLABLE decltype(auto) operator()(F&& f, Ts&&... args) const + CUDF_HOST_DEVICE inline decltype(auto) operator()(F&& f, Ts&&... args) const { return f.template operator()(std::forward(args)...); } @@ -1427,9 +1427,7 @@ struct dispatch_aggregation { struct dispatch_source { #pragma nv_exec_check_disable template - CUDA_HOST_DEVICE_CALLABLE decltype(auto) operator()(aggregation::Kind k, - F&& f, - Ts&&... args) const + CUDF_HOST_DEVICE inline decltype(auto) operator()(aggregation::Kind k, F&& f, Ts&&... args) const { return aggregation_dispatcher( k, dispatch_aggregation{}, std::forward(f), std::forward(args)...); @@ -1453,8 +1451,10 @@ struct dispatch_source { */ #pragma nv_exec_check_disable template -CUDA_HOST_DEVICE_CALLABLE constexpr decltype(auto) dispatch_type_and_aggregation( - data_type type, aggregation::Kind k, F&& f, Ts&&... args) +CUDF_HOST_DEVICE inline constexpr decltype(auto) dispatch_type_and_aggregation(data_type type, + aggregation::Kind k, + F&& f, + Ts&&... args) { return type_dispatcher(type, dispatch_source{}, k, std::forward(f), std::forward(args)...); } diff --git a/cpp/include/cudf/detail/indexalator.cuh b/cpp/include/cudf/detail/indexalator.cuh index d0fa4e02440..11c82da8097 100644 --- a/cpp/include/cudf/detail/indexalator.cuh +++ b/cpp/include/cudf/detail/indexalator.cuh @@ -55,7 +55,7 @@ struct base_indexalator { /** * @brief Prefix increment operator. */ - CUDA_HOST_DEVICE_CALLABLE T& operator++() + CUDF_HOST_DEVICE inline T& operator++() { T& derived = static_cast(*this); derived.p_ += width_; @@ -65,7 +65,7 @@ struct base_indexalator { /** * @brief Postfix increment operator. */ - CUDA_HOST_DEVICE_CALLABLE T operator++(int) + CUDF_HOST_DEVICE inline T operator++(int) { T tmp{static_cast(*this)}; operator++(); @@ -75,7 +75,7 @@ struct base_indexalator { /** * @brief Prefix decrement operator. */ - CUDA_HOST_DEVICE_CALLABLE T& operator--() + CUDF_HOST_DEVICE inline T& operator--() { T& derived = static_cast(*this); derived.p_ -= width_; @@ -85,7 +85,7 @@ struct base_indexalator { /** * @brief Postfix decrement operator. */ - CUDA_HOST_DEVICE_CALLABLE T operator--(int) + CUDF_HOST_DEVICE inline T operator--(int) { T tmp{static_cast(*this)}; operator--(); @@ -95,7 +95,7 @@ struct base_indexalator { /** * @brief Compound assignment by sum operator. */ - CUDA_HOST_DEVICE_CALLABLE T& operator+=(difference_type offset) + CUDF_HOST_DEVICE inline T& operator+=(difference_type offset) { T& derived = static_cast(*this); derived.p_ += offset * width_; @@ -105,7 +105,7 @@ struct base_indexalator { /** * @brief Increment by offset operator. */ - CUDA_HOST_DEVICE_CALLABLE T operator+(difference_type offset) const + CUDF_HOST_DEVICE inline T operator+(difference_type offset) const { auto tmp = T{static_cast(*this)}; tmp.p_ += (offset * width_); @@ -115,7 +115,7 @@ struct base_indexalator { /** * @brief Addition assignment operator. */ - CUDA_HOST_DEVICE_CALLABLE friend T operator+(difference_type offset, T const& rhs) + CUDF_HOST_DEVICE inline friend T operator+(difference_type offset, T const& rhs) { T tmp{rhs}; tmp.p_ += (offset * rhs.width_); @@ -125,7 +125,7 @@ struct base_indexalator { /** * @brief Compound assignment by difference operator. */ - CUDA_HOST_DEVICE_CALLABLE T& operator-=(difference_type offset) + CUDF_HOST_DEVICE inline T& operator-=(difference_type offset) { T& derived = static_cast(*this); derived.p_ -= offset * width_; @@ -135,7 +135,7 @@ struct base_indexalator { /** * @brief Decrement by offset operator. */ - CUDA_HOST_DEVICE_CALLABLE T operator-(difference_type offset) const + CUDF_HOST_DEVICE inline T operator-(difference_type offset) const { auto tmp = T{static_cast(*this)}; tmp.p_ -= (offset * width_); @@ -145,7 +145,7 @@ struct base_indexalator { /** * @brief Subtraction assignment operator. */ - CUDA_HOST_DEVICE_CALLABLE friend T operator-(difference_type offset, T const& rhs) + CUDF_HOST_DEVICE inline friend T operator-(difference_type offset, T const& rhs) { T tmp{rhs}; tmp.p_ -= (offset * rhs.width_); @@ -155,7 +155,7 @@ struct base_indexalator { /** * @brief Compute offset from iterator difference operator. */ - CUDA_HOST_DEVICE_CALLABLE difference_type operator-(T const& rhs) const + CUDF_HOST_DEVICE inline difference_type operator-(T const& rhs) const { return (static_cast(*this).p_ - rhs.p_) / width_; } @@ -163,42 +163,42 @@ struct base_indexalator { /** * @brief Equals to operator. */ - CUDA_HOST_DEVICE_CALLABLE bool operator==(T const& rhs) const + CUDF_HOST_DEVICE inline bool operator==(T const& rhs) const { return rhs.p_ == static_cast(*this).p_; } /** * @brief Not equals to operator. */ - CUDA_HOST_DEVICE_CALLABLE bool operator!=(T const& rhs) const + CUDF_HOST_DEVICE inline bool operator!=(T const& rhs) const { return rhs.p_ != static_cast(*this).p_; } /** * @brief Less than operator. */ - CUDA_HOST_DEVICE_CALLABLE bool operator<(T const& rhs) const + CUDF_HOST_DEVICE inline bool operator<(T const& rhs) const { return static_cast(*this).p_ < rhs.p_; } /** * @brief Greater than operator. */ - CUDA_HOST_DEVICE_CALLABLE bool operator>(T const& rhs) const + CUDF_HOST_DEVICE inline bool operator>(T const& rhs) const { return static_cast(*this).p_ > rhs.p_; } /** * @brief Less than or equals to operator. */ - CUDA_HOST_DEVICE_CALLABLE bool operator<=(T const& rhs) const + CUDF_HOST_DEVICE inline bool operator<=(T const& rhs) const { return static_cast(*this).p_ <= rhs.p_; } /** * @brief Greater than or equals to operator. */ - CUDA_HOST_DEVICE_CALLABLE bool operator>=(T const& rhs) const + CUDF_HOST_DEVICE inline bool operator>=(T const& rhs) const { return static_cast(*this).p_ >= rhs.p_; } @@ -253,7 +253,7 @@ struct input_indexalator : base_indexalator { /** * @brief Indirection operator returns the value at the current iterator position. */ - CUDA_DEVICE_CALLABLE size_type operator*() const { return operator[](0); } + __device__ inline size_type operator*() const { return operator[](0); } /** * @brief Dispatch functor for resolving a size_type value from any index type. @@ -275,7 +275,7 @@ struct input_indexalator : base_indexalator { * @brief Array subscript operator returns a value at the input * `idx` position as a `size_type` value. */ - CUDA_DEVICE_CALLABLE size_type operator[](size_type idx) const + __device__ inline size_type operator[](size_type idx) const { void const* tp = p_ + (idx * width_); return type_dispatcher(dtype_, index_as_size_type{}, tp); @@ -339,14 +339,14 @@ struct output_indexalator : base_indexalator { * @brief Indirection operator returns this iterator instance in order * to capture the `operator=(size_type)` calls. */ - CUDA_DEVICE_CALLABLE output_indexalator const& operator*() const { return *this; } + __device__ inline output_indexalator const& operator*() const { return *this; } /** * @brief Array subscript operator returns an iterator instance at the specified `idx` position. * * This allows capturing the subsequent `operator=(size_type)` call in this class. */ - CUDA_DEVICE_CALLABLE output_indexalator const operator[](size_type idx) const + __device__ inline output_indexalator const operator[](size_type idx) const { output_indexalator tmp{*this}; tmp.p_ += (idx * width_); @@ -372,7 +372,7 @@ struct output_indexalator : base_indexalator { /** * @brief Assign a size_type value to the current iterator position. */ - CUDA_DEVICE_CALLABLE output_indexalator const& operator=(size_type const value) const + __device__ inline output_indexalator const& operator=(size_type const value) const { void* tp = p_; type_dispatcher(dtype_, size_type_to_index{}, tp, value); diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 01742384972..10d9cda55dd 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -107,8 +107,7 @@ struct null_replaced_value_accessor { if (has_nulls) CUDF_EXPECTS(col.nullable(), "column with nulls must have a validity bitmask"); } - CUDA_DEVICE_CALLABLE - Element operator()(cudf::size_type i) const + __device__ inline Element operator()(cudf::size_type i) const { return has_nulls && col.is_null_nocheck(i) ? null_replacement : col.element(i); } @@ -135,8 +134,7 @@ struct validity_accessor { CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column."); } - CUDA_DEVICE_CALLABLE - bool operator()(cudf::size_type i) const { return col.is_valid_nocheck(i); } + __device__ inline bool operator()(cudf::size_type i) const { return col.is_valid_nocheck(i); } }; /** @@ -344,8 +342,7 @@ struct scalar_value_accessor { * * @return value of the scalar. */ - CUDA_DEVICE_CALLABLE - const Element operator()(size_type) const + __device__ inline const Element operator()(size_type) const { #if defined(__CUDA_ARCH__) return dscalar.value(); @@ -423,8 +420,7 @@ struct scalar_optional_accessor : public scalar_value_accessor { * * @return a thrust::optional for the scalar value. */ - CUDA_HOST_DEVICE_CALLABLE - const value_type operator()(size_type) const + CUDF_HOST_DEVICE inline const value_type operator()(size_type) const { if (has_nulls) { return (super_t::dscalar.is_valid()) ? Element{super_t::dscalar.value()} @@ -457,8 +453,7 @@ struct scalar_pair_accessor : public scalar_value_accessor { * * @return a pair with value and validity of the scalar. */ - CUDA_HOST_DEVICE_CALLABLE - const value_type operator()(size_type) const + CUDF_HOST_DEVICE inline const value_type operator()(size_type) const { #if defined(__CUDA_ARCH__) return {Element(super_t::dscalar.value()), super_t::dscalar.is_valid()}; @@ -509,8 +504,7 @@ struct scalar_representation_pair_accessor : public scalar_value_accessor::value, void>* = nullptr> - CUDA_DEVICE_CALLABLE rep_type get_rep(DeviceScalar const& dscalar) const + __device__ inline rep_type get_rep(DeviceScalar const& dscalar) const { return dscalar.value(); } template ::value, void>* = nullptr> - CUDA_DEVICE_CALLABLE rep_type get_rep(DeviceScalar const& dscalar) const + __device__ inline rep_type get_rep(DeviceScalar const& dscalar) const { return dscalar.rep(); } diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh index 6090477c28d..df06ad9e4f3 100644 --- a/cpp/include/cudf/detail/null_mask.cuh +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -261,7 +261,7 @@ __global__ void subtract_set_bits_range_boundaries_kernel(bitmask_type const* bi */ struct bit_to_word_index { bit_to_word_index(bool inclusive) : inclusive(inclusive) {} - CUDA_DEVICE_CALLABLE size_type operator()(const size_type& bit_index) const + __device__ inline size_type operator()(const size_type& bit_index) const { return word_index(bit_index) + ((inclusive || intra_word_index(bit_index) == 0) ? 0 : 1); } @@ -269,7 +269,7 @@ struct bit_to_word_index { }; struct popc { - CUDA_DEVICE_CALLABLE size_type operator()(bitmask_type word) const { return __popc(word); } + __device__ inline size_type operator()(bitmask_type word) const { return __popc(word); } }; // Count set/unset bits in a segmented null mask, using offset iterators accessible by the device. @@ -377,7 +377,7 @@ size_type validate_segmented_indices(IndexIterator indices_begin, IndexIterator } struct index_alternator { - CUDA_DEVICE_CALLABLE size_type operator()(const size_type& i) const + __device__ inline size_type operator()(const size_type& i) const { return *(d_indices + 2 * i + (is_end ? 1 : 0)); } diff --git a/cpp/include/cudf/detail/reduction_operators.cuh b/cpp/include/cudf/detail/reduction_operators.cuh index 866e26cd655..5a0cb4c1714 100644 --- a/cpp/include/cudf/detail/reduction_operators.cuh +++ b/cpp/include/cudf/detail/reduction_operators.cuh @@ -19,7 +19,7 @@ #include #include #include -#include //for CUDA_HOST_DEVICE_CALLABLE +#include //for CUDF_HOST_DEVICE #include #include @@ -32,14 +32,12 @@ struct var_std { ResultType value; /// the value ResultType value_squared; /// the value of squared - CUDA_HOST_DEVICE_CALLABLE - var_std(ResultType _value = 0, ResultType _value_squared = 0) + CUDF_HOST_DEVICE inline var_std(ResultType _value = 0, ResultType _value_squared = 0) : value(_value), value_squared(_value_squared){}; using this_t = var_std; - CUDA_HOST_DEVICE_CALLABLE - this_t operator+(this_t const& rhs) const + CUDF_HOST_DEVICE inline this_t operator+(this_t const& rhs) const { return this_t((this->value + rhs.value), (this->value_squared + rhs.value_squared)); }; @@ -50,8 +48,10 @@ template struct transformer_var_std { using OutputType = var_std; - CUDA_HOST_DEVICE_CALLABLE - OutputType operator()(ResultType const& value) { return OutputType(value, value * value); }; + CUDF_HOST_DEVICE inline OutputType operator()(ResultType const& value) + { + return OutputType(value, value * value); + }; }; // ------------------------------------------------------------------------ @@ -201,9 +201,9 @@ struct compound_op : public simple_op { * @return transformed output result of compound operator */ template - CUDA_HOST_DEVICE_CALLABLE static ResultType compute_result(const IntermediateType& input, - const cudf::size_type& count, - const cudf::size_type& ddof) + CUDF_HOST_DEVICE inline static ResultType compute_result(const IntermediateType& input, + const cudf::size_type& count, + const cudf::size_type& ddof) { // Enforced interface return Derived::template intermediate::compute_result(input, count, ddof); @@ -230,10 +230,9 @@ struct mean : public compound_op { using IntermediateType = ResultType; // sum value // compute `mean` from intermediate type `IntermediateType` - CUDA_HOST_DEVICE_CALLABLE - static ResultType compute_result(const IntermediateType& input, - const cudf::size_type& count, - const cudf::size_type& ddof) + CUDF_HOST_DEVICE inline static ResultType compute_result(const IntermediateType& input, + const cudf::size_type& count, + const cudf::size_type& ddof) { return (input / count); }; @@ -252,10 +251,9 @@ struct variance : public compound_op { using IntermediateType = var_std; // with sum of value, and sum of squared value // compute `variance` from intermediate type `IntermediateType` - CUDA_HOST_DEVICE_CALLABLE - static ResultType compute_result(const IntermediateType& input, - const cudf::size_type& count, - const cudf::size_type& ddof) + CUDF_HOST_DEVICE inline static ResultType compute_result(const IntermediateType& input, + const cudf::size_type& count, + const cudf::size_type& ddof) { ResultType mean = input.value / count; ResultType asum = input.value_squared; @@ -279,10 +277,9 @@ struct standard_deviation : public compound_op { using IntermediateType = var_std; // with sum of value, and sum of squared value // compute `standard deviation` from intermediate type `IntermediateType` - CUDA_HOST_DEVICE_CALLABLE - static ResultType compute_result(const IntermediateType& input, - const cudf::size_type& count, - const cudf::size_type& ddof) + CUDF_HOST_DEVICE inline static ResultType compute_result(const IntermediateType& input, + const cudf::size_type& count, + const cudf::size_type& ddof) { using intermediateOp = variance::template intermediate; ResultType var = intermediateOp::compute_result(input, count, ddof); diff --git a/cpp/include/cudf/detail/utilities/device_operators.cuh b/cpp/include/cudf/detail/utilities/device_operators.cuh index 95605dc8a71..a59ad4c42ee 100644 --- a/cpp/include/cudf/detail/utilities/device_operators.cuh +++ b/cpp/include/cudf/detail/utilities/device_operators.cuh @@ -40,7 +40,7 @@ namespace detail { template ()>* = nullptr> -CUDA_HOST_DEVICE_CALLABLE auto min(LHS const& lhs, RHS const& rhs) +CUDF_HOST_DEVICE inline auto min(LHS const& lhs, RHS const& rhs) { return std::min(lhs, rhs); } @@ -51,7 +51,7 @@ CUDA_HOST_DEVICE_CALLABLE auto min(LHS const& lhs, RHS const& rhs) template ()>* = nullptr> -CUDA_HOST_DEVICE_CALLABLE auto max(LHS const& lhs, RHS const& rhs) +CUDF_HOST_DEVICE inline auto max(LHS const& lhs, RHS const& rhs) { return std::max(lhs, rhs); } @@ -62,7 +62,7 @@ CUDA_HOST_DEVICE_CALLABLE auto max(LHS const& lhs, RHS const& rhs) */ struct DeviceSum { template ()>* = nullptr> - CUDA_HOST_DEVICE_CALLABLE auto operator()(const T& lhs, const T& rhs) -> decltype(lhs + rhs) + CUDF_HOST_DEVICE inline auto operator()(const T& lhs, const T& rhs) -> decltype(lhs + rhs) { return lhs + rhs; } @@ -94,13 +94,13 @@ struct DeviceSum { */ struct DeviceCount { template ()>* = nullptr> - CUDA_HOST_DEVICE_CALLABLE T operator()(const T& lhs, const T& rhs) + CUDF_HOST_DEVICE inline T operator()(const T& lhs, const T& rhs) { return T{DeviceCount{}(lhs.time_since_epoch(), rhs.time_since_epoch())}; } template ()>* = nullptr> - CUDA_HOST_DEVICE_CALLABLE T operator()(const T&, const T& rhs) + CUDF_HOST_DEVICE inline T operator()(const T&, const T& rhs) { return rhs + T{1}; } @@ -117,7 +117,7 @@ struct DeviceCount { */ struct DeviceMin { template - CUDA_HOST_DEVICE_CALLABLE auto operator()(const T& lhs, const T& rhs) + CUDF_HOST_DEVICE inline auto operator()(const T& lhs, const T& rhs) -> decltype(cudf::detail::min(lhs, rhs)) { return numeric::detail::min(lhs, rhs); @@ -142,7 +142,7 @@ struct DeviceMin { // @brief identity specialized for string_view template >* = nullptr> - CUDA_HOST_DEVICE_CALLABLE static constexpr T identity() + CUDF_HOST_DEVICE inline static constexpr T identity() { return string_view::max(); } @@ -159,7 +159,7 @@ struct DeviceMin { */ struct DeviceMax { template - CUDA_HOST_DEVICE_CALLABLE auto operator()(const T& lhs, const T& rhs) + CUDF_HOST_DEVICE inline auto operator()(const T& lhs, const T& rhs) -> decltype(cudf::detail::max(lhs, rhs)) { return numeric::detail::max(lhs, rhs); @@ -183,7 +183,7 @@ struct DeviceMax { } template >* = nullptr> - CUDA_HOST_DEVICE_CALLABLE static constexpr T identity() + CUDF_HOST_DEVICE inline static constexpr T identity() { return string_view::min(); } @@ -200,7 +200,7 @@ struct DeviceMax { */ struct DeviceProduct { template ()>* = nullptr> - CUDA_HOST_DEVICE_CALLABLE auto operator()(const T& lhs, const T& rhs) -> decltype(lhs * rhs) + CUDF_HOST_DEVICE inline auto operator()(const T& lhs, const T& rhs) -> decltype(lhs * rhs) { return lhs * rhs; } @@ -224,7 +224,7 @@ struct DeviceProduct { */ struct DeviceAnd { template ::value>* = nullptr> - CUDA_HOST_DEVICE_CALLABLE auto operator()(const T& lhs, const T& rhs) -> decltype(lhs & rhs) + CUDF_HOST_DEVICE inline auto operator()(const T& lhs, const T& rhs) -> decltype(lhs & rhs) { return (lhs & rhs); } @@ -235,7 +235,7 @@ struct DeviceAnd { */ struct DeviceOr { template ::value>* = nullptr> - CUDA_HOST_DEVICE_CALLABLE auto operator()(const T& lhs, const T& rhs) -> decltype(lhs | rhs) + CUDF_HOST_DEVICE inline auto operator()(const T& lhs, const T& rhs) -> decltype(lhs | rhs) { return (lhs | rhs); } @@ -246,7 +246,7 @@ struct DeviceOr { */ struct DeviceXor { template ::value>* = nullptr> - CUDA_HOST_DEVICE_CALLABLE auto operator()(const T& lhs, const T& rhs) -> decltype(lhs ^ rhs) + CUDF_HOST_DEVICE inline auto operator()(const T& lhs, const T& rhs) -> decltype(lhs ^ rhs) { return (lhs ^ rhs); } @@ -258,7 +258,7 @@ struct DeviceXor { struct DeviceLeadLag { const size_type row_offset; - explicit CUDA_HOST_DEVICE_CALLABLE DeviceLeadLag(size_type offset_) : row_offset(offset_) {} + explicit CUDF_HOST_DEVICE inline DeviceLeadLag(size_type offset_) : row_offset(offset_) {} }; } // namespace cudf diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index f3390d9387b..c35d24ddeac 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -31,7 +31,7 @@ namespace detail { * Normalization of floating point NaNs and zeros, passthrough for all other values. */ template -T CUDA_DEVICE_CALLABLE normalize_nans_and_zeros(T const& key) +T __device__ inline normalize_nans_and_zeros(T const& key) { if constexpr (is_floating_point()) { if (isnan(key)) { @@ -50,7 +50,7 @@ T CUDA_DEVICE_CALLABLE normalize_nans_and_zeros(T const& key) * Licensed under the MIT license. * See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -void CUDA_DEVICE_CALLABLE uint32ToLowercaseHexString(uint32_t num, char* destination) +void __device__ inline uint32ToLowercaseHexString(uint32_t num, char* destination) { // Transform 0xABCD1234 => 0x0000ABCD00001234 => 0x0B0A0D0C02010403 uint64_t x = num; @@ -86,12 +86,12 @@ struct MurmurHash3_32 { MurmurHash3_32() = default; constexpr MurmurHash3_32(uint32_t seed) : m_seed(seed) {} - CUDA_DEVICE_CALLABLE uint32_t rotl32(uint32_t x, int8_t r) const + __device__ inline uint32_t rotl32(uint32_t x, int8_t r) const { return (x << r) | (x >> (32 - r)); } - CUDA_DEVICE_CALLABLE uint32_t fmix32(uint32_t h) const + __device__ inline uint32_t fmix32(uint32_t h) const { h ^= h >> 16; h *= 0x85ebca6b; @@ -118,7 +118,7 @@ struct MurmurHash3_32 { * * @returns A hash value that intelligently combines the lhs and rhs hash values */ - CUDA_DEVICE_CALLABLE result_type hash_combine(result_type lhs, result_type rhs) + __device__ inline result_type hash_combine(result_type lhs, result_type rhs) { result_type combined{lhs}; @@ -127,11 +127,11 @@ struct MurmurHash3_32 { return combined; } - result_type CUDA_DEVICE_CALLABLE operator()(Key const& key) const { return compute(key); } + result_type __device__ inline operator()(Key const& key) const { return compute(key); } // compute wrapper for floating point types template ::value>* = nullptr> - hash_value_type CUDA_DEVICE_CALLABLE compute_floating_point(T const& key) const + hash_value_type __device__ inline compute_floating_point(T const& key) const { if (key == T{0.0}) { return compute(T{0.0}); @@ -144,7 +144,7 @@ struct MurmurHash3_32 { } template - result_type CUDA_DEVICE_CALLABLE compute(TKey const& key) const + result_type __device__ inline compute(TKey const& key) const { constexpr int len = sizeof(argument_type); uint8_t const* const data = reinterpret_cast(&key); @@ -191,7 +191,7 @@ struct MurmurHash3_32 { }; template <> -hash_value_type CUDA_DEVICE_CALLABLE MurmurHash3_32::operator()(bool const& key) const +hash_value_type __device__ inline MurmurHash3_32::operator()(bool const& key) const { return this->compute(static_cast(key)); } @@ -200,8 +200,8 @@ hash_value_type CUDA_DEVICE_CALLABLE MurmurHash3_32::operator()(bool const * @brief Specialization of MurmurHash3_32 operator for strings. */ template <> -hash_value_type CUDA_DEVICE_CALLABLE -MurmurHash3_32::operator()(cudf::string_view const& key) const +hash_value_type __device__ inline MurmurHash3_32::operator()( + cudf::string_view const& key) const { auto const len = key.size_bytes(); uint8_t const* data = reinterpret_cast(key.data()); @@ -249,49 +249,49 @@ MurmurHash3_32::operator()(cudf::string_view const& key) cons } template <> -hash_value_type CUDA_DEVICE_CALLABLE MurmurHash3_32::operator()(float const& key) const +hash_value_type __device__ inline MurmurHash3_32::operator()(float const& key) const { return this->compute_floating_point(key); } template <> -hash_value_type CUDA_DEVICE_CALLABLE MurmurHash3_32::operator()(double const& key) const +hash_value_type __device__ inline MurmurHash3_32::operator()(double const& key) const { return this->compute_floating_point(key); } template <> -hash_value_type CUDA_DEVICE_CALLABLE -MurmurHash3_32::operator()(numeric::decimal32 const& key) const +hash_value_type __device__ inline MurmurHash3_32::operator()( + numeric::decimal32 const& key) const { return this->compute(key.value()); } template <> -hash_value_type CUDA_DEVICE_CALLABLE -MurmurHash3_32::operator()(numeric::decimal64 const& key) const +hash_value_type __device__ inline MurmurHash3_32::operator()( + numeric::decimal64 const& key) const { return this->compute(key.value()); } template <> -hash_value_type CUDA_DEVICE_CALLABLE -MurmurHash3_32::operator()(numeric::decimal128 const& key) const +hash_value_type __device__ inline MurmurHash3_32::operator()( + numeric::decimal128 const& key) const { return this->compute(key.value()); } template <> -hash_value_type CUDA_DEVICE_CALLABLE -MurmurHash3_32::operator()(cudf::list_view const& key) const +hash_value_type __device__ inline MurmurHash3_32::operator()( + cudf::list_view const& key) const { cudf_assert(false && "List column hashing is not supported"); return 0; } template <> -hash_value_type CUDA_DEVICE_CALLABLE -MurmurHash3_32::operator()(cudf::struct_view const& key) const +hash_value_type __device__ inline MurmurHash3_32::operator()( + cudf::struct_view const& key) const { cudf_assert(false && "Direct hashing of struct_view is not supported"); return 0; @@ -305,12 +305,12 @@ struct SparkMurmurHash3_32 { SparkMurmurHash3_32() = default; constexpr SparkMurmurHash3_32(uint32_t seed) : m_seed(seed) {} - CUDA_DEVICE_CALLABLE uint32_t rotl32(uint32_t x, int8_t r) const + __device__ inline uint32_t rotl32(uint32_t x, int8_t r) const { return (x << r) | (x >> (32 - r)); } - CUDA_DEVICE_CALLABLE uint32_t fmix32(uint32_t h) const + __device__ inline uint32_t fmix32(uint32_t h) const { h ^= h >> 16; h *= 0x85ebca6b; @@ -320,11 +320,11 @@ struct SparkMurmurHash3_32 { return h; } - result_type CUDA_DEVICE_CALLABLE operator()(Key const& key) const { return compute(key); } + result_type __device__ inline operator()(Key const& key) const { return compute(key); } // compute wrapper for floating point types template ::value>* = nullptr> - hash_value_type CUDA_DEVICE_CALLABLE compute_floating_point(T const& key) const + hash_value_type __device__ inline compute_floating_point(T const& key) const { if (isnan(key)) { T nan = std::numeric_limits::quiet_NaN(); @@ -335,7 +335,7 @@ struct SparkMurmurHash3_32 { } template - result_type CUDA_DEVICE_CALLABLE compute(TKey const& key) const + result_type __device__ inline compute(TKey const& key) const { constexpr int len = sizeof(TKey); int8_t const* const data = reinterpret_cast(&key); @@ -379,71 +379,68 @@ struct SparkMurmurHash3_32 { }; template <> -hash_value_type CUDA_DEVICE_CALLABLE SparkMurmurHash3_32::operator()(bool const& key) const +hash_value_type __device__ inline SparkMurmurHash3_32::operator()(bool const& key) const { return this->compute(key); } template <> -hash_value_type CUDA_DEVICE_CALLABLE -SparkMurmurHash3_32::operator()(int8_t const& key) const +hash_value_type __device__ inline SparkMurmurHash3_32::operator()(int8_t const& key) const { return this->compute(key); } template <> -hash_value_type CUDA_DEVICE_CALLABLE -SparkMurmurHash3_32::operator()(uint8_t const& key) const +hash_value_type __device__ inline SparkMurmurHash3_32::operator()(uint8_t const& key) const { return this->compute(key); } template <> -hash_value_type CUDA_DEVICE_CALLABLE -SparkMurmurHash3_32::operator()(int16_t const& key) const +hash_value_type __device__ inline SparkMurmurHash3_32::operator()(int16_t const& key) const { return this->compute(key); } template <> -hash_value_type CUDA_DEVICE_CALLABLE -SparkMurmurHash3_32::operator()(uint16_t const& key) const +hash_value_type __device__ inline SparkMurmurHash3_32::operator()( + uint16_t const& key) const { return this->compute(key); } template <> -hash_value_type CUDA_DEVICE_CALLABLE -SparkMurmurHash3_32::operator()(numeric::decimal32 const& key) const +hash_value_type __device__ inline SparkMurmurHash3_32::operator()( + numeric::decimal32 const& key) const { return this->compute(key.value()); } template <> -hash_value_type CUDA_DEVICE_CALLABLE -SparkMurmurHash3_32::operator()(numeric::decimal64 const& key) const +hash_value_type __device__ inline SparkMurmurHash3_32::operator()( + numeric::decimal64 const& key) const { return this->compute(key.value()); } template <> -hash_value_type CUDA_DEVICE_CALLABLE -SparkMurmurHash3_32::operator()(numeric::decimal128 const& key) const +hash_value_type __device__ inline SparkMurmurHash3_32::operator()( + numeric::decimal128 const& key) const { return this->compute<__int128_t>(key.value()); } template <> -hash_value_type CUDA_DEVICE_CALLABLE -SparkMurmurHash3_32::operator()(cudf::list_view const& key) const +hash_value_type __device__ inline SparkMurmurHash3_32::operator()( + cudf::list_view const& key) const { cudf_assert(false && "List column hashing is not supported"); return 0; } template <> -hash_value_type CUDA_DEVICE_CALLABLE -SparkMurmurHash3_32::operator()(cudf::struct_view const& key) const +hash_value_type __device__ inline SparkMurmurHash3_32::operator()( + cudf::struct_view const& key) const { cudf_assert(false && "Direct hashing of struct_view is not supported"); return 0; @@ -453,8 +450,8 @@ SparkMurmurHash3_32::operator()(cudf::struct_view const& key) * @brief Specialization of MurmurHash3_32 operator for strings. */ template <> -hash_value_type CUDA_DEVICE_CALLABLE -SparkMurmurHash3_32::operator()(cudf::string_view const& key) const +hash_value_type __device__ inline SparkMurmurHash3_32::operator()( + cudf::string_view const& key) const { auto const len = key.size_bytes(); int8_t const* data = reinterpret_cast(key.data()); @@ -499,14 +496,13 @@ SparkMurmurHash3_32::operator()(cudf::string_view const& key) } template <> -hash_value_type CUDA_DEVICE_CALLABLE SparkMurmurHash3_32::operator()(float const& key) const +hash_value_type __device__ inline SparkMurmurHash3_32::operator()(float const& key) const { return this->compute_floating_point(key); } template <> -hash_value_type CUDA_DEVICE_CALLABLE -SparkMurmurHash3_32::operator()(double const& key) const +hash_value_type __device__ inline SparkMurmurHash3_32::operator()(double const& key) const { return this->compute_floating_point(key); } diff --git a/cpp/include/cudf/detail/utilities/transform_unary_functions.cuh b/cpp/include/cudf/detail/utilities/transform_unary_functions.cuh index 05a788abd45..12774f57c6a 100644 --- a/cpp/include/cudf/detail/utilities/transform_unary_functions.cuh +++ b/cpp/include/cudf/detail/utilities/transform_unary_functions.cuh @@ -43,14 +43,13 @@ struct null_replacing_transformer { using type = ResultType; Functor f; type replacement; - CUDA_HOST_DEVICE_CALLABLE - null_replacing_transformer(type null_replacement, Functor transformer) + CUDF_HOST_DEVICE inline null_replacing_transformer(type null_replacement, Functor transformer) : f(transformer), replacement(null_replacement) { } template - CUDA_HOST_DEVICE_CALLABLE type operator()(thrust::pair const& pair_value) + CUDF_HOST_DEVICE inline type operator()(thrust::pair const& pair_value) { if (pair_value.second) return f(pair_value.first); @@ -76,22 +75,21 @@ struct meanvar { ElementType value_squared; /// the value of squared cudf::size_type count; /// the count - CUDA_HOST_DEVICE_CALLABLE - meanvar(ElementType _value = 0, ElementType _value_squared = 0, cudf::size_type _count = 0) + CUDF_HOST_DEVICE inline meanvar(ElementType _value = 0, + ElementType _value_squared = 0, + cudf::size_type _count = 0) : value(_value), value_squared(_value_squared), count(_count){}; using this_t = cudf::meanvar; - CUDA_HOST_DEVICE_CALLABLE - this_t operator+(this_t const& rhs) const + CUDF_HOST_DEVICE inline this_t operator+(this_t const& rhs) const { return this_t((this->value + rhs.value), (this->value_squared + rhs.value_squared), (this->count + rhs.count)); }; - CUDA_HOST_DEVICE_CALLABLE - bool operator==(this_t const& rhs) const + CUDF_HOST_DEVICE inline bool operator==(this_t const& rhs) const { return ((this->value == rhs.value) && (this->value_squared == rhs.value_squared) && (this->count == rhs.count)); @@ -113,8 +111,10 @@ struct meanvar { */ template struct transformer_squared { - CUDA_HOST_DEVICE_CALLABLE - ElementType operator()(ElementType const& value) { return (value * value); }; + CUDF_HOST_DEVICE inline ElementType operator()(ElementType const& value) + { + return (value * value); + }; }; /** @@ -130,8 +130,7 @@ template struct transformer_meanvar { using ResultType = meanvar; - CUDA_HOST_DEVICE_CALLABLE - ResultType operator()(thrust::pair const& pair) + CUDF_HOST_DEVICE inline ResultType operator()(thrust::pair const& pair) { ElementType v = pair.first; return meanvar(v, v * v, (pair.second) ? 1 : 0); diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index e8223b53997..727dce0db9d 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -79,7 +79,7 @@ template && is_supported_representation_type())>* = nullptr> -CUDA_HOST_DEVICE_CALLABLE Rep ipow(T exponent) +CUDF_HOST_DEVICE inline Rep ipow(T exponent) { cudf_assert(exponent >= 0 && "integer exponentiation with negative exponent is not possible."); if (exponent == 0) return static_cast(1); @@ -108,7 +108,7 @@ CUDA_HOST_DEVICE_CALLABLE Rep ipow(T exponent) * @return Shifted value of type T */ template -CUDA_HOST_DEVICE_CALLABLE constexpr T right_shift(T const& val, scale_type const& scale) +CUDF_HOST_DEVICE inline constexpr T right_shift(T const& val, scale_type const& scale) { return val / ipow(static_cast(scale)); } @@ -125,7 +125,7 @@ CUDA_HOST_DEVICE_CALLABLE constexpr T right_shift(T const& val, scale_type const * @return Shifted value of type T */ template -CUDA_HOST_DEVICE_CALLABLE constexpr T left_shift(T const& val, scale_type const& scale) +CUDF_HOST_DEVICE inline constexpr T left_shift(T const& val, scale_type const& scale) { return val * ipow(static_cast(-scale)); } @@ -144,7 +144,7 @@ CUDA_HOST_DEVICE_CALLABLE constexpr T left_shift(T const& val, scale_type const& * @return Shifted value of type T */ template -CUDA_HOST_DEVICE_CALLABLE constexpr T shift(T const& val, scale_type const& scale) +CUDF_HOST_DEVICE inline constexpr T shift(T const& val, scale_type const& scale) { if (scale == 0) return val; @@ -179,7 +179,7 @@ template () && is_supported_representation_type()>* = nullptr> - CUDA_HOST_DEVICE_CALLABLE explicit fixed_point(T const& value, scale_type const& scale) + CUDF_HOST_DEVICE inline explicit fixed_point(T const& value, scale_type const& scale) : _value{static_cast(detail::shift(value, scale))}, _scale{scale} { } @@ -226,7 +226,7 @@ class fixed_point { template () && is_supported_representation_type()>* = nullptr> - CUDA_HOST_DEVICE_CALLABLE explicit fixed_point(T const& value, scale_type const& scale) + CUDF_HOST_DEVICE inline explicit fixed_point(T const& value, scale_type const& scale) // `value` is cast to `Rep` to avoid overflow in cases where // constructing to `Rep` that is wider than `T` : _value{detail::shift(static_cast(value), scale)}, _scale{scale} @@ -238,8 +238,10 @@ class fixed_point { * * @param s scaled_integer that contains scale and already shifted value */ - CUDA_HOST_DEVICE_CALLABLE - explicit fixed_point(scaled_integer s) : _value{s.value}, _scale{s.scale} {} + CUDF_HOST_DEVICE inline explicit fixed_point(scaled_integer s) + : _value{s.value}, _scale{s.scale} + { + } /** * @brief "Scale-less" constructor that constructs `fixed_point` number with a specified @@ -247,7 +249,7 @@ class fixed_point { */ template ()>* = nullptr> - CUDA_HOST_DEVICE_CALLABLE fixed_point(T const& value) + CUDF_HOST_DEVICE inline fixed_point(T const& value) : _value{static_cast(value)}, _scale{scale_type{0}} { } @@ -256,8 +258,7 @@ class fixed_point { * @brief Default constructor that constructs `fixed_point` number with a * value and scale of zero */ - CUDA_HOST_DEVICE_CALLABLE - fixed_point() : _value{0}, _scale{scale_type{0}} {} + CUDF_HOST_DEVICE inline fixed_point() : _value{0}, _scale{scale_type{0}} {} /** * @brief Explicit conversion operator for casting to floating point types @@ -289,7 +290,7 @@ class fixed_point { return static_cast(detail::shift(value, scale_type{-_scale})); } - CUDA_HOST_DEVICE_CALLABLE operator scaled_integer() const + CUDF_HOST_DEVICE inline operator scaled_integer() const { return scaled_integer{_value, _scale}; } @@ -299,21 +300,21 @@ class fixed_point { * * @return The underlying value of the `fixed_point` number */ - CUDA_HOST_DEVICE_CALLABLE rep value() const { return _value; } + CUDF_HOST_DEVICE inline rep value() const { return _value; } /** * @brief Method that returns the scale of the `fixed_point` number * * @return The scale of the `fixed_point` number */ - CUDA_HOST_DEVICE_CALLABLE scale_type scale() const { return _scale; } + CUDF_HOST_DEVICE inline scale_type scale() const { return _scale; } /** * @brief Explicit conversion operator to `bool` * * @return The `fixed_point` value as a boolean (zero is `false`, nonzero is `true`) */ - CUDA_HOST_DEVICE_CALLABLE explicit constexpr operator bool() const + CUDF_HOST_DEVICE inline explicit constexpr operator bool() const { return static_cast(_value); } @@ -326,7 +327,7 @@ class fixed_point { * @return The sum */ template - CUDA_HOST_DEVICE_CALLABLE fixed_point& operator+=(fixed_point const& rhs) + CUDF_HOST_DEVICE inline fixed_point& operator+=(fixed_point const& rhs) { *this = *this + rhs; return *this; @@ -340,7 +341,7 @@ class fixed_point { * @return The product */ template - CUDA_HOST_DEVICE_CALLABLE fixed_point& operator*=(fixed_point const& rhs) + CUDF_HOST_DEVICE inline fixed_point& operator*=(fixed_point const& rhs) { *this = *this * rhs; return *this; @@ -354,7 +355,7 @@ class fixed_point { * @return The difference */ template - CUDA_HOST_DEVICE_CALLABLE fixed_point& operator-=(fixed_point const& rhs) + CUDF_HOST_DEVICE inline fixed_point& operator-=(fixed_point const& rhs) { *this = *this - rhs; return *this; @@ -368,7 +369,7 @@ class fixed_point { * @return The quotient */ template - CUDA_HOST_DEVICE_CALLABLE fixed_point& operator/=(fixed_point const& rhs) + CUDF_HOST_DEVICE inline fixed_point& operator/=(fixed_point const& rhs) { *this = *this / rhs; return *this; @@ -379,8 +380,7 @@ class fixed_point { * * @return The incremented result */ - CUDA_HOST_DEVICE_CALLABLE - fixed_point& operator++() + CUDF_HOST_DEVICE inline fixed_point& operator++() { *this = *this + fixed_point{1, scale_type{_scale}}; return *this; @@ -398,7 +398,7 @@ class fixed_point { * @return The resulting `fixed_point` sum */ template - CUDA_HOST_DEVICE_CALLABLE friend fixed_point operator+( + CUDF_HOST_DEVICE inline friend fixed_point operator+( fixed_point const& lhs, fixed_point const& rhs); /** @@ -413,7 +413,7 @@ class fixed_point { * @return The resulting `fixed_point` difference */ template - CUDA_HOST_DEVICE_CALLABLE friend fixed_point operator-( + CUDF_HOST_DEVICE inline friend fixed_point operator-( fixed_point const& lhs, fixed_point const& rhs); /** @@ -426,7 +426,7 @@ class fixed_point { * @return The resulting `fixed_point` product */ template - CUDA_HOST_DEVICE_CALLABLE friend fixed_point operator*( + CUDF_HOST_DEVICE inline friend fixed_point operator*( fixed_point const& lhs, fixed_point const& rhs); /** @@ -439,7 +439,7 @@ class fixed_point { * @return The resulting `fixed_point` quotient */ template - CUDA_HOST_DEVICE_CALLABLE friend fixed_point operator/( + CUDF_HOST_DEVICE inline friend fixed_point operator/( fixed_point const& lhs, fixed_point const& rhs); /** @@ -454,8 +454,8 @@ class fixed_point { * @return true if `lhs` and `rhs` are equal, false if not */ template - CUDA_HOST_DEVICE_CALLABLE friend bool operator==(fixed_point const& lhs, - fixed_point const& rhs); + CUDF_HOST_DEVICE inline friend bool operator==(fixed_point const& lhs, + fixed_point const& rhs); /** * @brief operator != (for comparing two `fixed_point` numbers) @@ -469,8 +469,8 @@ class fixed_point { * @return true if `lhs` and `rhs` are not equal, false if not */ template - CUDA_HOST_DEVICE_CALLABLE friend bool operator!=(fixed_point const& lhs, - fixed_point const& rhs); + CUDF_HOST_DEVICE inline friend bool operator!=(fixed_point const& lhs, + fixed_point const& rhs); /** * @brief operator <= (for comparing two `fixed_point` numbers) @@ -484,8 +484,8 @@ class fixed_point { * @return true if `lhs` less than or equal to `rhs`, false if not */ template - CUDA_HOST_DEVICE_CALLABLE friend bool operator<=(fixed_point const& lhs, - fixed_point const& rhs); + CUDF_HOST_DEVICE inline friend bool operator<=(fixed_point const& lhs, + fixed_point const& rhs); /** * @brief operator >= (for comparing two `fixed_point` numbers) @@ -499,8 +499,8 @@ class fixed_point { * @return true if `lhs` greater than or equal to `rhs`, false if not */ template - CUDA_HOST_DEVICE_CALLABLE friend bool operator>=(fixed_point const& lhs, - fixed_point const& rhs); + CUDF_HOST_DEVICE inline friend bool operator>=(fixed_point const& lhs, + fixed_point const& rhs); /** * @brief operator < (for comparing two `fixed_point` numbers) @@ -514,8 +514,8 @@ class fixed_point { * @return true if `lhs` less than `rhs`, false if not */ template - CUDA_HOST_DEVICE_CALLABLE friend bool operator<(fixed_point const& lhs, - fixed_point const& rhs); + CUDF_HOST_DEVICE inline friend bool operator<(fixed_point const& lhs, + fixed_point const& rhs); /** * @brief operator > (for comparing two `fixed_point` numbers) @@ -529,8 +529,8 @@ class fixed_point { * @return true if `lhs` greater than `rhs`, false if not */ template - CUDA_HOST_DEVICE_CALLABLE friend bool operator>(fixed_point const& lhs, - fixed_point const& rhs); + CUDF_HOST_DEVICE inline friend bool operator>(fixed_point const& lhs, + fixed_point const& rhs); /** * @brief Method for creating a `fixed_point` number with a new `scale` @@ -541,7 +541,7 @@ class fixed_point { * @param scale The `scale` of the returned `fixed_point` number * @return `fixed_point` number with a new `scale` */ - CUDA_HOST_DEVICE_CALLABLE fixed_point rescaled(scale_type scale) const + CUDF_HOST_DEVICE inline fixed_point rescaled(scale_type scale) const { if (scale == _scale) return *this; Rep const value = detail::shift(_value, scale_type{scale - _scale}); @@ -580,7 +580,7 @@ class fixed_point { * @return true if addition causes overflow, false otherwise */ template -CUDA_HOST_DEVICE_CALLABLE auto addition_overflow(T lhs, T rhs) +CUDF_HOST_DEVICE inline auto addition_overflow(T lhs, T rhs) { return rhs > 0 ? lhs > cuda::std::numeric_limits::max() - rhs : lhs < cuda::std::numeric_limits::min() - rhs; @@ -595,7 +595,7 @@ CUDA_HOST_DEVICE_CALLABLE auto addition_overflow(T lhs, T rhs) * @return true if subtraction causes overflow, false otherwise */ template -CUDA_HOST_DEVICE_CALLABLE auto subtraction_overflow(T lhs, T rhs) +CUDF_HOST_DEVICE inline auto subtraction_overflow(T lhs, T rhs) { return rhs > 0 ? lhs < cuda::std::numeric_limits::min() + rhs : lhs > cuda::std::numeric_limits::max() + rhs; @@ -610,7 +610,7 @@ CUDA_HOST_DEVICE_CALLABLE auto subtraction_overflow(T lhs, T rhs) * @return true if division causes overflow, false otherwise */ template -CUDA_HOST_DEVICE_CALLABLE auto division_overflow(T lhs, T rhs) +CUDF_HOST_DEVICE inline auto division_overflow(T lhs, T rhs) { return lhs == cuda::std::numeric_limits::min() && rhs == -1; } @@ -624,7 +624,7 @@ CUDA_HOST_DEVICE_CALLABLE auto division_overflow(T lhs, T rhs) * @return true if multiplication causes overflow, false otherwise */ template -CUDA_HOST_DEVICE_CALLABLE auto multiplication_overflow(T lhs, T rhs) +CUDF_HOST_DEVICE inline auto multiplication_overflow(T lhs, T rhs) { auto const min = cuda::std::numeric_limits::min(); auto const max = cuda::std::numeric_limits::max(); @@ -638,8 +638,8 @@ CUDA_HOST_DEVICE_CALLABLE auto multiplication_overflow(T lhs, T rhs) // PLUS Operation template -CUDA_HOST_DEVICE_CALLABLE fixed_point operator+(fixed_point const& lhs, - fixed_point const& rhs) +CUDF_HOST_DEVICE inline fixed_point operator+(fixed_point const& lhs, + fixed_point const& rhs) { auto const scale = std::min(lhs._scale, rhs._scale); auto const sum = lhs.rescaled(scale)._value + rhs.rescaled(scale)._value; @@ -656,8 +656,8 @@ CUDA_HOST_DEVICE_CALLABLE fixed_point operator+(fixed_point -CUDA_HOST_DEVICE_CALLABLE fixed_point operator-(fixed_point const& lhs, - fixed_point const& rhs) +CUDF_HOST_DEVICE inline fixed_point operator-(fixed_point const& lhs, + fixed_point const& rhs) { auto const scale = std::min(lhs._scale, rhs._scale); auto const diff = lhs.rescaled(scale)._value - rhs.rescaled(scale)._value; @@ -674,8 +674,8 @@ CUDA_HOST_DEVICE_CALLABLE fixed_point operator-(fixed_point -CUDA_HOST_DEVICE_CALLABLE fixed_point operator*(fixed_point const& lhs, - fixed_point const& rhs) +CUDF_HOST_DEVICE inline fixed_point operator*(fixed_point const& lhs, + fixed_point const& rhs) { #if defined(__CUDACC_DEBUG__) @@ -689,8 +689,8 @@ CUDA_HOST_DEVICE_CALLABLE fixed_point operator*(fixed_point -CUDA_HOST_DEVICE_CALLABLE fixed_point operator/(fixed_point const& lhs, - fixed_point const& rhs) +CUDF_HOST_DEVICE inline fixed_point operator/(fixed_point const& lhs, + fixed_point const& rhs) { #if defined(__CUDACC_DEBUG__) @@ -704,8 +704,8 @@ CUDA_HOST_DEVICE_CALLABLE fixed_point operator/(fixed_point -CUDA_HOST_DEVICE_CALLABLE bool operator==(fixed_point const& lhs, - fixed_point const& rhs) +CUDF_HOST_DEVICE inline bool operator==(fixed_point const& lhs, + fixed_point const& rhs) { auto const scale = std::min(lhs._scale, rhs._scale); return lhs.rescaled(scale)._value == rhs.rescaled(scale)._value; @@ -713,8 +713,8 @@ CUDA_HOST_DEVICE_CALLABLE bool operator==(fixed_point const& lhs, // EQUALITY NOT COMPARISON Operation template -CUDA_HOST_DEVICE_CALLABLE bool operator!=(fixed_point const& lhs, - fixed_point const& rhs) +CUDF_HOST_DEVICE inline bool operator!=(fixed_point const& lhs, + fixed_point const& rhs) { auto const scale = std::min(lhs._scale, rhs._scale); return lhs.rescaled(scale)._value != rhs.rescaled(scale)._value; @@ -722,8 +722,8 @@ CUDA_HOST_DEVICE_CALLABLE bool operator!=(fixed_point const& lhs, // LESS THAN OR EQUAL TO Operation template -CUDA_HOST_DEVICE_CALLABLE bool operator<=(fixed_point const& lhs, - fixed_point const& rhs) +CUDF_HOST_DEVICE inline bool operator<=(fixed_point const& lhs, + fixed_point const& rhs) { auto const scale = std::min(lhs._scale, rhs._scale); return lhs.rescaled(scale)._value <= rhs.rescaled(scale)._value; @@ -731,8 +731,8 @@ CUDA_HOST_DEVICE_CALLABLE bool operator<=(fixed_point const& lhs, // GREATER THAN OR EQUAL TO Operation template -CUDA_HOST_DEVICE_CALLABLE bool operator>=(fixed_point const& lhs, - fixed_point const& rhs) +CUDF_HOST_DEVICE inline bool operator>=(fixed_point const& lhs, + fixed_point const& rhs) { auto const scale = std::min(lhs._scale, rhs._scale); return lhs.rescaled(scale)._value >= rhs.rescaled(scale)._value; @@ -740,8 +740,8 @@ CUDA_HOST_DEVICE_CALLABLE bool operator>=(fixed_point const& lhs, // LESS THAN Operation template -CUDA_HOST_DEVICE_CALLABLE bool operator<(fixed_point const& lhs, - fixed_point const& rhs) +CUDF_HOST_DEVICE inline bool operator<(fixed_point const& lhs, + fixed_point const& rhs) { auto const scale = std::min(lhs._scale, rhs._scale); return lhs.rescaled(scale)._value < rhs.rescaled(scale)._value; @@ -749,8 +749,8 @@ CUDA_HOST_DEVICE_CALLABLE bool operator<(fixed_point const& lhs, // GREATER THAN Operation template -CUDA_HOST_DEVICE_CALLABLE bool operator>(fixed_point const& lhs, - fixed_point const& rhs) +CUDF_HOST_DEVICE inline bool operator>(fixed_point const& lhs, + fixed_point const& rhs) { auto const scale = std::min(lhs._scale, rhs._scale); return lhs.rescaled(scale)._value > rhs.rescaled(scale)._value; diff --git a/cpp/include/cudf/fixed_point/temporary.hpp b/cpp/include/cudf/fixed_point/temporary.hpp index 2b50e273517..be900f252f6 100644 --- a/cpp/include/cudf/fixed_point/temporary.hpp +++ b/cpp/include/cudf/fixed_point/temporary.hpp @@ -62,13 +62,13 @@ constexpr auto abs(T value) } template -CUDA_HOST_DEVICE_CALLABLE auto min(T lhs, T rhs) +CUDF_HOST_DEVICE inline auto min(T lhs, T rhs) { return lhs < rhs ? lhs : rhs; } template -CUDA_HOST_DEVICE_CALLABLE auto max(T lhs, T rhs) +CUDF_HOST_DEVICE inline auto max(T lhs, T rhs) { return lhs > rhs ? lhs : rhs; } diff --git a/cpp/include/cudf/lists/detail/scatter_helper.cuh b/cpp/include/cudf/lists/detail/scatter_helper.cuh index 7d0586ed6a6..bdf68037944 100644 --- a/cpp/include/cudf/lists/detail/scatter_helper.cuh +++ b/cpp/include/cudf/lists/detail/scatter_helper.cuh @@ -65,9 +65,9 @@ struct unbound_list_view { * @param lists_column The actual source/target lists column * @param row_index Index of the row in lists_column that this instance represents */ - CUDA_DEVICE_CALLABLE unbound_list_view(label_type scatter_source_label, - cudf::detail::lists_column_device_view const& lists_column, - size_type const& row_index) + __device__ inline unbound_list_view(label_type scatter_source_label, + cudf::detail::lists_column_device_view const& lists_column, + size_type const& row_index) : _label{scatter_source_label}, _row_index{row_index} { _size = list_device_view{lists_column, row_index}.size(); @@ -81,9 +81,9 @@ struct unbound_list_view { * @param row_index Index of the row that this instance represents in the source/target column * @param size The number of elements in this list row */ - CUDA_DEVICE_CALLABLE unbound_list_view(label_type scatter_source_label, - size_type const& row_index, - size_type const& size) + __device__ inline unbound_list_view(label_type scatter_source_label, + size_type const& row_index, + size_type const& size) : _label{scatter_source_label}, _row_index{row_index}, _size{size} { } @@ -91,17 +91,17 @@ struct unbound_list_view { /** * @brief Returns number of elements in this list row. */ - CUDA_DEVICE_CALLABLE size_type size() const { return _size; } + __device__ inline size_type size() const { return _size; } /** * @brief Returns whether this row came from the `scatter()` source or target */ - CUDA_DEVICE_CALLABLE label_type label() const { return _label; } + __device__ inline label_type label() const { return _label; } /** * @brief Returns the index in the source/target column */ - CUDA_DEVICE_CALLABLE size_type row_index() const { return _row_index; } + __device__ inline size_type row_index() const { return _row_index; } /** * @brief Binds to source/target column (depending on SOURCE/TARGET labels), @@ -111,9 +111,9 @@ struct unbound_list_view { * @param scatter_target Target column for the scatter operation * @return A (bound) list_view for the row that this object represents */ - CUDA_DEVICE_CALLABLE list_device_view - bind_to_column(lists_column_device_view const& scatter_source, - lists_column_device_view const& scatter_target) const + __device__ inline list_device_view bind_to_column( + lists_column_device_view const& scatter_source, + lists_column_device_view const& scatter_target) const { return list_device_view(_label == label_type::SOURCE ? scatter_source : scatter_target, _row_index); diff --git a/cpp/include/cudf/lists/list_device_view.cuh b/cpp/include/cudf/lists/list_device_view.cuh index 659fb1e6b2a..5071f046e0c 100644 --- a/cpp/include/cudf/lists/list_device_view.cuh +++ b/cpp/include/cudf/lists/list_device_view.cuh @@ -32,8 +32,8 @@ class list_device_view { public: list_device_view() = default; - CUDA_DEVICE_CALLABLE list_device_view(lists_column_device_view const& lists_column, - size_type const& row_index) + __device__ inline list_device_view(lists_column_device_view const& lists_column, + size_type const& row_index) : lists_column(lists_column), _row_index(row_index) { column_device_view const& offsets = lists_column.offsets(); @@ -69,7 +69,7 @@ class list_device_view { * The offset of this element as stored in the child column (i.e. 5) * may be fetched using this method. */ - CUDA_DEVICE_CALLABLE size_type element_offset(size_type idx) const + __device__ inline size_type element_offset(size_type idx) const { cudf_assert(idx >= 0 && idx < size() && "idx out of bounds"); return begin_offset + idx; @@ -83,7 +83,7 @@ class list_device_view { * @return The element at the specified index of the list row. */ template - CUDA_DEVICE_CALLABLE T element(size_type idx) const + __device__ inline T element(size_type idx) const { return lists_column.child().element(element_offset(idx)); } @@ -91,7 +91,7 @@ class list_device_view { /** * @brief Checks whether element is null at specified index in the list row. */ - CUDA_DEVICE_CALLABLE bool is_null(size_type idx) const + __device__ inline bool is_null(size_type idx) const { cudf_assert(idx >= 0 && idx < size() && "Index out of bounds."); auto element_offset = begin_offset + idx; @@ -101,17 +101,17 @@ class list_device_view { /** * @brief Checks whether this list row is null. */ - CUDA_DEVICE_CALLABLE bool is_null() const { return lists_column.is_null(_row_index); } + __device__ inline bool is_null() const { return lists_column.is_null(_row_index); } /** * @brief Fetches the number of elements in this list row. */ - CUDA_DEVICE_CALLABLE size_type size() const { return _size; } + __device__ inline size_type size() const { return _size; } /** * @brief Fetches the lists_column_device_view that contains this list. */ - CUDA_DEVICE_CALLABLE lists_column_device_view const& get_column() const { return lists_column; } + __device__ inline lists_column_device_view const& get_column() const { return lists_column; } template struct pair_accessor; @@ -141,7 +141,7 @@ class list_device_view { * 2. `p.second == false` */ template - CUDA_DEVICE_CALLABLE const_pair_iterator pair_begin() const + __device__ inline const_pair_iterator pair_begin() const { return const_pair_iterator{thrust::counting_iterator(0), pair_accessor{*this}}; } @@ -151,7 +151,7 @@ class list_device_view { * list_device_view. */ template - CUDA_DEVICE_CALLABLE const_pair_iterator pair_end() const + __device__ inline const_pair_iterator pair_end() const { return const_pair_iterator{thrust::counting_iterator(size()), pair_accessor{*this}}; @@ -173,7 +173,7 @@ class list_device_view { * 2. `p.second == false` */ template - CUDA_DEVICE_CALLABLE const_pair_rep_iterator pair_rep_begin() const + __device__ inline const_pair_rep_iterator pair_rep_begin() const { return const_pair_rep_iterator{thrust::counting_iterator(0), pair_rep_accessor{*this}}; @@ -184,7 +184,7 @@ class list_device_view { * list_device_view. */ template - CUDA_DEVICE_CALLABLE const_pair_rep_iterator pair_rep_end() const + __device__ inline const_pair_rep_iterator pair_rep_end() const { return const_pair_rep_iterator{thrust::counting_iterator(size()), pair_rep_accessor{*this}}; @@ -215,7 +215,7 @@ class list_device_view { * * @param _list The `list_device_view` whose rows are being accessed. */ - explicit CUDA_HOST_DEVICE_CALLABLE pair_accessor(list_device_view const& _list) : list{_list} {} + explicit CUDF_HOST_DEVICE inline pair_accessor(list_device_view const& _list) : list{_list} {} /** * @brief Accessor for the {data, validity} pair at the specified index @@ -223,8 +223,7 @@ class list_device_view { * @param i Index into the list_device_view * @return A pair of data element and its validity flag. */ - CUDA_DEVICE_CALLABLE - thrust::pair operator()(cudf::size_type i) const + __device__ inline thrust::pair operator()(cudf::size_type i) const { return {list.element(i), !list.is_null(i)}; } @@ -253,8 +252,7 @@ class list_device_view { * * @param _list The `list_device_view` whose rows are being accessed. */ - explicit CUDA_HOST_DEVICE_CALLABLE pair_rep_accessor(list_device_view const& _list) - : list{_list} + explicit CUDF_HOST_DEVICE inline pair_rep_accessor(list_device_view const& _list) : list{_list} { } @@ -264,21 +262,20 @@ class list_device_view { * @param i Index into the list_device_view * @return A pair of data element and its validity flag. */ - CUDA_DEVICE_CALLABLE - thrust::pair operator()(cudf::size_type i) const + __device__ inline thrust::pair operator()(cudf::size_type i) const { return {get_rep(i), !list.is_null(i)}; } private: template , void>* = nullptr> - CUDA_DEVICE_CALLABLE rep_type get_rep(cudf::size_type i) const + __device__ inline rep_type get_rep(cudf::size_type i) const { return list.element(i); } template , void>* = nullptr> - CUDA_DEVICE_CALLABLE rep_type get_rep(cudf::size_type i) const + __device__ inline rep_type get_rep(cudf::size_type i) const { return list.element(i).value(); } @@ -291,7 +288,7 @@ class list_device_view { */ struct list_size_functor { column_device_view const d_column; - CUDA_HOST_DEVICE_CALLABLE list_size_functor(column_device_view const& d_col) : d_column(d_col) + CUDF_HOST_DEVICE inline list_size_functor(column_device_view const& d_col) : d_column(d_col) { #if defined(__CUDA_ARCH__) cudf_assert(d_col.type().id() == type_id::LIST && "Only list type column is supported"); @@ -299,7 +296,7 @@ struct list_size_functor { CUDF_EXPECTS(d_col.type().id() == type_id::LIST, "Only list type column is supported"); #endif } - CUDA_DEVICE_CALLABLE size_type operator()(size_type idx) + __device__ inline size_type operator()(size_type idx) { if (d_column.is_null(idx)) return size_type{0}; auto d_offsets = diff --git a/cpp/include/cudf/lists/lists_column_device_view.cuh b/cpp/include/cudf/lists/lists_column_device_view.cuh index d8f082c9a42..aff088a7f44 100644 --- a/cpp/include/cudf/lists/lists_column_device_view.cuh +++ b/cpp/include/cudf/lists/lists_column_device_view.cuh @@ -46,12 +46,12 @@ class lists_column_device_view { /** * @brief Fetches number of rows in the lists column */ - CUDA_HOST_DEVICE_CALLABLE cudf::size_type size() const { return underlying.size(); } + CUDF_HOST_DEVICE inline cudf::size_type size() const { return underlying.size(); } /** * @brief Fetches the offsets column of the underlying list column. */ - CUDA_DEVICE_CALLABLE column_device_view offsets() const + __device__ inline column_device_view offsets() const { return underlying.child(lists_column_view::offsets_column_index); } @@ -59,7 +59,7 @@ class lists_column_device_view { /** * @brief Fetches the child column of the underlying list column. */ - CUDA_DEVICE_CALLABLE column_device_view child() const + __device__ inline column_device_view child() const { return underlying.child(lists_column_view::child_column_index); } @@ -67,19 +67,19 @@ class lists_column_device_view { /** * @brief Indicates whether the list column is nullable. */ - CUDA_DEVICE_CALLABLE bool nullable() const { return underlying.nullable(); } + __device__ inline bool nullable() const { return underlying.nullable(); } /** * @brief Indicates whether the row (i.e. list) at the specified * index is null. */ - CUDA_DEVICE_CALLABLE bool is_null(size_type idx) const { return underlying.is_null(idx); } + __device__ inline bool is_null(size_type idx) const { return underlying.is_null(idx); } /** * @brief Fetches the offset of the underlying column_device_view, * in case it is a sliced/offset column. */ - CUDA_DEVICE_CALLABLE size_type offset() const { return underlying.offset(); } + __device__ inline size_type offset() const { return underlying.offset(); } private: column_device_view underlying; diff --git a/cpp/include/cudf/strings/json.hpp b/cpp/include/cudf/strings/json.hpp index 9081fa23eec..8435c47eaf5 100644 --- a/cpp/include/cudf/strings/json.hpp +++ b/cpp/include/cudf/strings/json.hpp @@ -48,7 +48,7 @@ class get_json_object_options { * @brief Returns true/false depending on whether single-quotes for representing strings * are allowed. */ - CUDA_HOST_DEVICE_CALLABLE bool get_allow_single_quotes() const { return allow_single_quotes; } + CUDF_HOST_DEVICE inline bool get_allow_single_quotes() const { return allow_single_quotes; } /** * @brief Returns true/false depending on whether individually returned string values have @@ -72,7 +72,7 @@ class get_json_object_options { * * @endcode */ - CUDA_HOST_DEVICE_CALLABLE bool get_strip_quotes_from_single_strings() const + CUDF_HOST_DEVICE inline bool get_strip_quotes_from_single_strings() const { return strip_quotes_from_single_strings; } diff --git a/cpp/include/cudf/strings/string_view.cuh b/cpp/include/cudf/strings/string_view.cuh index 238d55d580e..43a90997c86 100644 --- a/cpp/include/cudf/strings/string_view.cuh +++ b/cpp/include/cudf/strings/string_view.cuh @@ -80,7 +80,7 @@ static __constant__ char max_string_sentinel[5]{"\xF7\xBF\xBF\xBF"}; * * @return An empty string */ -CUDA_HOST_DEVICE_CALLABLE string_view string_view::min() { return string_view(); } +CUDF_HOST_DEVICE inline string_view string_view::min() { return string_view(); } /** * @brief Return maximum value associated with the string type @@ -91,7 +91,7 @@ CUDA_HOST_DEVICE_CALLABLE string_view string_view::min() { return string_view(); * @return A string value which represents the highest possible valid UTF-8 encoded * character. */ -CUDA_HOST_DEVICE_CALLABLE string_view string_view::max() +CUDF_HOST_DEVICE inline string_view string_view::max() { const char* psentinel{nullptr}; #if defined(__CUDA_ARCH__) diff --git a/cpp/include/cudf/strings/string_view.hpp b/cpp/include/cudf/strings/string_view.hpp index be182cb0e9d..22409ab3dc7 100644 --- a/cpp/include/cudf/strings/string_view.hpp +++ b/cpp/include/cudf/strings/string_view.hpp @@ -51,20 +51,20 @@ class string_view { /** * @brief Return the number of bytes in this string */ - CUDA_HOST_DEVICE_CALLABLE size_type size_bytes() const { return _bytes; } + CUDF_HOST_DEVICE inline size_type size_bytes() const { return _bytes; } /** * @brief Return the number of characters in this string */ - CUDA_DEVICE_CALLABLE size_type length() const; + __device__ inline size_type length() const; /** * @brief Return a pointer to the internal device array */ - CUDA_HOST_DEVICE_CALLABLE const char* data() const { return _data; } + CUDF_HOST_DEVICE inline const char* data() const { return _data; } /** * @brief Return true if string has no characters */ - CUDA_HOST_DEVICE_CALLABLE bool empty() const { return size_bytes() == 0; } + CUDF_HOST_DEVICE inline bool empty() const { return size_bytes() == 0; } /** * @brief Handy iterator for navigating through encoded characters. @@ -76,28 +76,28 @@ class string_view { using reference = char_utf8&; using pointer = char_utf8*; using iterator_category = std::input_iterator_tag; - CUDA_DEVICE_CALLABLE const_iterator(const string_view& str, size_type pos); + __device__ inline const_iterator(const string_view& str, size_type pos); const_iterator(const const_iterator& mit) = default; const_iterator(const_iterator&& mit) = default; const_iterator& operator=(const const_iterator&) = default; const_iterator& operator=(const_iterator&&) = default; - CUDA_DEVICE_CALLABLE const_iterator& operator++(); - CUDA_DEVICE_CALLABLE const_iterator operator++(int); - CUDA_DEVICE_CALLABLE const_iterator& operator+=(difference_type); - CUDA_DEVICE_CALLABLE const_iterator operator+(difference_type); - CUDA_DEVICE_CALLABLE const_iterator& operator--(); - CUDA_DEVICE_CALLABLE const_iterator operator--(int); - CUDA_DEVICE_CALLABLE const_iterator& operator-=(difference_type); - CUDA_DEVICE_CALLABLE const_iterator operator-(difference_type); - CUDA_DEVICE_CALLABLE bool operator==(const const_iterator&) const; - CUDA_DEVICE_CALLABLE bool operator!=(const const_iterator&) const; - CUDA_DEVICE_CALLABLE bool operator<(const const_iterator&) const; - CUDA_DEVICE_CALLABLE bool operator<=(const const_iterator&) const; - CUDA_DEVICE_CALLABLE bool operator>(const const_iterator&) const; - CUDA_DEVICE_CALLABLE bool operator>=(const const_iterator&) const; - CUDA_DEVICE_CALLABLE char_utf8 operator*() const; - CUDA_DEVICE_CALLABLE size_type position() const; - CUDA_DEVICE_CALLABLE size_type byte_offset() const; + __device__ inline const_iterator& operator++(); + __device__ inline const_iterator operator++(int); + __device__ inline const_iterator& operator+=(difference_type); + __device__ inline const_iterator operator+(difference_type); + __device__ inline const_iterator& operator--(); + __device__ inline const_iterator operator--(int); + __device__ inline const_iterator& operator-=(difference_type); + __device__ inline const_iterator operator-(difference_type); + __device__ inline bool operator==(const const_iterator&) const; + __device__ inline bool operator!=(const const_iterator&) const; + __device__ inline bool operator<(const const_iterator&) const; + __device__ inline bool operator<=(const const_iterator&) const; + __device__ inline bool operator>(const const_iterator&) const; + __device__ inline bool operator>=(const const_iterator&) const; + __device__ inline char_utf8 operator*() const; + __device__ inline size_type position() const; + __device__ inline size_type byte_offset() const; private: const char* p{}; @@ -109,24 +109,24 @@ class string_view { /** * @brief Return new iterator pointing to the beginning of this string */ - CUDA_DEVICE_CALLABLE const_iterator begin() const; + __device__ inline const_iterator begin() const; /** * @brief Return new iterator pointing past the end of this string */ - CUDA_DEVICE_CALLABLE const_iterator end() const; + __device__ inline const_iterator end() const; /** * @brief Return single UTF-8 character at the given character position * * @param pos Character position */ - CUDA_DEVICE_CALLABLE char_utf8 operator[](size_type pos) const; + __device__ inline char_utf8 operator[](size_type pos) const; /** * @brief Return the byte offset from data() for a given character position * * @param pos Character position */ - CUDA_DEVICE_CALLABLE size_type byte_offset(size_type pos) const; + __device__ inline size_type byte_offset(size_type pos) const; /** * @brief Comparing target string with this string. Each character is compared @@ -141,7 +141,7 @@ class string_view { * not match is greater in the arg string, or all compared characters * match but the arg string is longer. */ - CUDA_DEVICE_CALLABLE int compare(const string_view& str) const; + __device__ inline int compare(const string_view& str) const; /** * @brief Comparing target string with this string. Each character is compared * as a UTF-8 code-point value. @@ -156,32 +156,32 @@ class string_view { * not match is greater in the arg string, or all compared characters * match but the arg string is longer. */ - CUDA_DEVICE_CALLABLE int compare(const char* str, size_type bytes) const; + __device__ inline int compare(const char* str, size_type bytes) const; /** * @brief Returns true if rhs matches this string exactly. */ - CUDA_DEVICE_CALLABLE bool operator==(const string_view& rhs) const; + __device__ inline bool operator==(const string_view& rhs) const; /** * @brief Returns true if rhs does not match this string. */ - CUDA_DEVICE_CALLABLE bool operator!=(const string_view& rhs) const; + __device__ inline bool operator!=(const string_view& rhs) const; /** * @brief Returns true if this string is ordered before rhs. */ - CUDA_DEVICE_CALLABLE bool operator<(const string_view& rhs) const; + __device__ inline bool operator<(const string_view& rhs) const; /** * @brief Returns true if rhs is ordered before this string. */ - CUDA_DEVICE_CALLABLE bool operator>(const string_view& rhs) const; + __device__ inline bool operator>(const string_view& rhs) const; /** * @brief Returns true if this string matches or is ordered before rhs. */ - CUDA_DEVICE_CALLABLE bool operator<=(const string_view& rhs) const; + __device__ inline bool operator<=(const string_view& rhs) const; /** * @brief Returns true if rhs matches or is ordered before this string. */ - CUDA_DEVICE_CALLABLE bool operator>=(const string_view& rhs) const; + __device__ inline bool operator>=(const string_view& rhs) const; /** * @brief Returns the character position of the first occurrence where the @@ -193,9 +193,9 @@ class string_view { * Specify -1 to indicate to the end of the string. * @return -1 if str is not found in this string. */ - CUDA_DEVICE_CALLABLE size_type find(const string_view& str, - size_type pos = 0, - size_type count = -1) const; + __device__ inline size_type find(const string_view& str, + size_type pos = 0, + size_type count = -1) const; /** * @brief Returns the character position of the first occurrence where the * array str is found in this string within the character range [pos,pos+n). @@ -207,10 +207,10 @@ class string_view { * Specify -1 to indicate to the end of the string. * @return -1 if arg string is not found in this string. */ - CUDA_DEVICE_CALLABLE size_type find(const char* str, - size_type bytes, - size_type pos = 0, - size_type count = -1) const; + __device__ inline size_type find(const char* str, + size_type bytes, + size_type pos = 0, + size_type count = -1) const; /** * @brief Returns the character position of the first occurrence where * character is found in this string within the character range [pos,pos+n). @@ -221,9 +221,9 @@ class string_view { * Specify -1 to indicate to the end of the string. * @return -1 if arg string is not found in this string. */ - CUDA_DEVICE_CALLABLE size_type find(char_utf8 character, - size_type pos = 0, - size_type count = -1) const; + __device__ inline size_type find(char_utf8 character, + size_type pos = 0, + size_type count = -1) const; /** * @brief Returns the character position of the last occurrence where the * argument str is found in this string within the character range [pos,pos+n). @@ -234,9 +234,9 @@ class string_view { * Specify -1 to indicate to the end of the string. * @return -1 if arg string is not found in this string. */ - CUDA_DEVICE_CALLABLE size_type rfind(const string_view& str, - size_type pos = 0, - size_type count = -1) const; + __device__ inline size_type rfind(const string_view& str, + size_type pos = 0, + size_type count = -1) const; /** * @brief Returns the character position of the last occurrence where the * array str is found in this string within the character range [pos,pos+n). @@ -248,10 +248,10 @@ class string_view { * Specify -1 to indicate to the end of the string. * @return -1 if arg string is not found in this string. */ - CUDA_DEVICE_CALLABLE size_type rfind(const char* str, - size_type bytes, - size_type pos = 0, - size_type count = -1) const; + __device__ inline size_type rfind(const char* str, + size_type bytes, + size_type pos = 0, + size_type count = -1) const; /** * @brief Returns the character position of the last occurrence where * character is found in this string within the character range [pos,pos+n). @@ -262,9 +262,9 @@ class string_view { * Specify -1 to indicate to the end of the string. * @return -1 if arg string is not found in this string. */ - CUDA_DEVICE_CALLABLE size_type rfind(char_utf8 character, - size_type pos = 0, - size_type count = -1) const; + __device__ inline size_type rfind(char_utf8 character, + size_type pos = 0, + size_type count = -1) const; /** * @brief Return a sub-string of this string. The original string and device @@ -274,7 +274,7 @@ class string_view { * @param length Number of characters from start to include in the sub-string. * @return New instance pointing to a subset of the characters within this instance. */ - CUDA_DEVICE_CALLABLE string_view substr(size_type start, size_type length) const; + __device__ inline string_view substr(size_type start, size_type length) const; /** * @brief Return minimum value associated with the string type @@ -284,7 +284,7 @@ class string_view { * * @return An empty string */ - CUDA_HOST_DEVICE_CALLABLE static string_view min(); + CUDF_HOST_DEVICE inline static string_view min(); /** * @brief Return maximum value associated with the string type @@ -295,12 +295,12 @@ class string_view { * @return A string value which represents the highest possible valid UTF-8 encoded * character. */ - CUDA_HOST_DEVICE_CALLABLE static string_view max(); + CUDF_HOST_DEVICE inline static string_view max(); /** * @brief Default constructor represents an empty string. */ - CUDA_HOST_DEVICE_CALLABLE string_view() : _data(""), _bytes(0), _length(0) {} + CUDF_HOST_DEVICE inline string_view() : _data(""), _bytes(0), _length(0) {} /** * @brief Create instance from existing device char array. @@ -308,7 +308,7 @@ class string_view { * @param data Device char array encoded in UTF8. * @param bytes Number of bytes in data array. */ - CUDA_HOST_DEVICE_CALLABLE string_view(const char* data, size_type bytes) + CUDF_HOST_DEVICE inline string_view(const char* data, size_type bytes) : _data(data), _bytes(bytes), _length(UNKNOWN_STRING_LENGTH) { } @@ -330,7 +330,7 @@ class string_view { * @param bytepos Byte position from start of _data. * @return The character position for the specified byte. */ - CUDA_DEVICE_CALLABLE size_type character_offset(size_type bytepos) const; + __device__ inline size_type character_offset(size_type bytepos) const; }; namespace strings { @@ -386,7 +386,7 @@ constexpr size_type bytes_in_utf8_byte(uint8_t byte) * @param[out] character Single char_utf8 value. * @return The number of bytes in the character */ -CUDA_HOST_DEVICE_CALLABLE size_type to_char_utf8(const char* str, char_utf8& character) +CUDF_HOST_DEVICE inline size_type to_char_utf8(const char* str, char_utf8& character) { size_type const chr_width = bytes_in_utf8_byte(static_cast(*str)); @@ -413,7 +413,7 @@ CUDA_HOST_DEVICE_CALLABLE size_type to_char_utf8(const char* str, char_utf8& cha * @param[out] str Allocated char array with enough space to hold the encoded character. * @return The number of bytes in the character */ -CUDA_HOST_DEVICE_CALLABLE size_type from_char_utf8(char_utf8 character, char* str) +CUDF_HOST_DEVICE inline size_type from_char_utf8(char_utf8 character, char* str) { size_type const chr_width = bytes_in_char_utf8(character); for (size_type idx = 0; idx < chr_width; ++idx) { diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index 13d5f8e06bc..459a4182aa0 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -17,19 +17,9 @@ #pragma once #ifdef __CUDACC__ -#define CUDA_HOST_DEVICE_CALLABLE __host__ __device__ inline -#define CUDA_DEVICE_CALLABLE __device__ inline - -// This version of the macro maximizes the chances of inlining when applied to -// a callable that is called on the GPU. -#define CUDF_HDFI __host__ __device__ __forceinline__ -#define CUDF_DFI __device__ __forceinline__ +#define CUDF_HOST_DEVICE __host__ __device__ #else -#define CUDA_HOST_DEVICE_CALLABLE inline -#define CUDA_DEVICE_CALLABLE inline - -#define CUDF_HDFI inline -#define CUDF_DFI inline +#define CUDF_HOST_DEVICE #endif #include diff --git a/cpp/include/cudf/utilities/bit.hpp b/cpp/include/cudf/utilities/bit.hpp index cbd09fa7b0d..f4a70463de3 100644 --- a/cpp/include/cudf/utilities/bit.hpp +++ b/cpp/include/cudf/utilities/bit.hpp @@ -42,7 +42,7 @@ namespace detail { #endif template -constexpr CUDA_HOST_DEVICE_CALLABLE std::size_t size_in_bits() +constexpr CUDF_HOST_DEVICE inline std::size_t size_in_bits() { static_assert(CHAR_BIT == 8, "Size of a byte must be 8 bits."); return sizeof(T) * CHAR_BIT; @@ -58,7 +58,7 @@ constexpr CUDA_HOST_DEVICE_CALLABLE std::size_t size_in_bits() /** * @brief Returns the index of the word containing the specified bit. */ -constexpr CUDA_HOST_DEVICE_CALLABLE size_type word_index(size_type bit_index) +constexpr CUDF_HOST_DEVICE inline size_type word_index(size_type bit_index) { return bit_index / detail::size_in_bits(); } @@ -66,7 +66,7 @@ constexpr CUDA_HOST_DEVICE_CALLABLE size_type word_index(size_type bit_index) /** * @brief Returns the position within a word of the specified bit. */ -constexpr CUDA_HOST_DEVICE_CALLABLE size_type intra_word_index(size_type bit_index) +constexpr CUDF_HOST_DEVICE inline size_type intra_word_index(size_type bit_index) { return bit_index % detail::size_in_bits(); } @@ -80,7 +80,7 @@ constexpr CUDA_HOST_DEVICE_CALLABLE size_type intra_word_index(size_type bit_ind * @param bitmask The bitmask containing the bit to set * @param bit_index Index of the bit to set */ -CUDA_HOST_DEVICE_CALLABLE void set_bit_unsafe(bitmask_type* bitmask, size_type bit_index) +CUDF_HOST_DEVICE inline void set_bit_unsafe(bitmask_type* bitmask, size_type bit_index) { assert(nullptr != bitmask); bitmask[word_index(bit_index)] |= (bitmask_type{1} << intra_word_index(bit_index)); @@ -95,7 +95,7 @@ CUDA_HOST_DEVICE_CALLABLE void set_bit_unsafe(bitmask_type* bitmask, size_type b * @param bitmask The bitmask containing the bit to clear * @param bit_index The index of the bit to clear */ -CUDA_HOST_DEVICE_CALLABLE void clear_bit_unsafe(bitmask_type* bitmask, size_type bit_index) +CUDF_HOST_DEVICE inline void clear_bit_unsafe(bitmask_type* bitmask, size_type bit_index) { assert(nullptr != bitmask); bitmask[word_index(bit_index)] &= ~(bitmask_type{1} << intra_word_index(bit_index)); @@ -109,7 +109,7 @@ CUDA_HOST_DEVICE_CALLABLE void clear_bit_unsafe(bitmask_type* bitmask, size_type * @return true The specified bit is `1` * @return false The specified bit is `0` */ -CUDA_HOST_DEVICE_CALLABLE bool bit_is_set(bitmask_type const* bitmask, size_type bit_index) +CUDF_HOST_DEVICE inline bool bit_is_set(bitmask_type const* bitmask, size_type bit_index) { assert(nullptr != bitmask); return bitmask[word_index(bit_index)] & (bitmask_type{1} << intra_word_index(bit_index)); @@ -125,9 +125,9 @@ CUDA_HOST_DEVICE_CALLABLE bool bit_is_set(bitmask_type const* bitmask, size_type * @return false The specified bit is `0` * @return `default_value` if `bitmask` is nullptr */ -CUDA_HOST_DEVICE_CALLABLE bool bit_value_or(bitmask_type const* bitmask, - size_type bit_index, - bool default_value) +CUDF_HOST_DEVICE inline bool bit_value_or(bitmask_type const* bitmask, + size_type bit_index, + bool default_value) { return bitmask != nullptr ? bit_is_set(bitmask, bit_index) : default_value; } @@ -140,7 +140,7 @@ CUDA_HOST_DEVICE_CALLABLE bool bit_value_or(bitmask_type const* bitmask, * @param n The number of least significant bits to set * @return A bitmask word with `n` least significant bits set */ -constexpr CUDA_HOST_DEVICE_CALLABLE bitmask_type set_least_significant_bits(size_type n) +constexpr CUDF_HOST_DEVICE inline bitmask_type set_least_significant_bits(size_type n) { constexpr_assert(0 <= n && n < static_cast(detail::size_in_bits())); return ((bitmask_type{1} << n) - 1); @@ -154,7 +154,7 @@ constexpr CUDA_HOST_DEVICE_CALLABLE bitmask_type set_least_significant_bits(size * @param n The number of most significant bits to set * @return A bitmask word with `n` most significant bits set */ -constexpr CUDA_HOST_DEVICE_CALLABLE bitmask_type set_most_significant_bits(size_type n) +constexpr CUDF_HOST_DEVICE inline bitmask_type set_most_significant_bits(size_type n) { constexpr size_type word_size{detail::size_in_bits()}; constexpr_assert(0 <= n && n < word_size); diff --git a/cpp/include/cudf/utilities/type_dispatcher.hpp b/cpp/include/cudf/utilities/type_dispatcher.hpp index d7d38aba4f3..0c6a6ee244c 100644 --- a/cpp/include/cudf/utilities/type_dispatcher.hpp +++ b/cpp/include/cudf/utilities/type_dispatcher.hpp @@ -420,7 +420,9 @@ using scalar_device_type_t = typename type_to_scalar_type_impl::ScalarDeviceT template