Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Normalizing offsets iterator #14234

Merged
merged 47 commits into from
Nov 13, 2023
Merged
Show file tree
Hide file tree
Changes from 38 commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
32e1029
Enable indexalator for device code
davidwendt Sep 27, 2023
1548588
Merge branch 'branch-23.12' into indexalator-device-enable
davidwendt Sep 27, 2023
f6419b4
return ref experiment
davidwendt Sep 27, 2023
b5b4449
Merge branch 'branch-23.12' into indexalator-device-enable
davidwendt Sep 28, 2023
0e369dd
Normalizing offsets iterator
davidwendt Sep 28, 2023
e451116
Merge branch 'branch-23.12' into indexalator-device-enable
davidwendt Oct 5, 2023
7dcb134
23.12 baseline compile-time commit
davidwendt Oct 5, 2023
a248d75
Merge branch 'branch-23.12' into indexalator-device-enable
davidwendt Oct 5, 2023
88f6dff
undo temp change
davidwendt Oct 5, 2023
081cb84
use cudf::is_index_type
davidwendt Oct 6, 2023
a28a9ff
use cudf::is_index_type part 2
davidwendt Oct 6, 2023
df063b6
Merge branch 'branch-23.12' into indexalator-device-enable
davidwendt Oct 6, 2023
f5c898c
Merge branch 'indexalator-device-enable' into offsets-iterator
davidwendt Oct 6, 2023
ccc5bf5
add offsetalator factory
davidwendt Oct 6, 2023
73c04d8
Merge branch 'branch-23.12' into indexalator-device-enable
davidwendt Oct 10, 2023
eb586f4
use size_t for index_sizeof_fn
davidwendt Oct 10, 2023
4ba5a70
Merge branch 'indexalator-device-enable' into offsets-iterator
davidwendt Oct 10, 2023
e76ec97
Merge branch 'branch-23.12' into indexalator-device-enable
davidwendt Oct 12, 2023
1ae36f3
Merge branch 'branch-23.12' into indexalator-device-enable
davidwendt Oct 13, 2023
6fdeadf
Merge branch 'indexalator-device-enable' into offsets-iterator
davidwendt Oct 13, 2023
84e7510
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Oct 17, 2023
2bfb3e1
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Oct 20, 2023
8d45877
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Oct 23, 2023
352f516
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Oct 24, 2023
1add402
fix exception message
davidwendt Oct 24, 2023
e9cfafe
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Oct 24, 2023
e5f5589
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Oct 25, 2023
09afe63
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Oct 28, 2023
704c853
rework offsetalator/indexalator dispatch logic
davidwendt Nov 1, 2023
f45ee6d
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Nov 1, 2023
a132e33
Merge branch 'offsets-iterator' of github.com:davidwendt/cudf into of…
davidwendt Nov 1, 2023
8ee15a3
add alignas
davidwendt Nov 2, 2023
43c4984
Merge branch 'offsets-iterator' of github.com:davidwendt/cudf into of…
davidwendt Nov 2, 2023
a97d061
add more alignases
davidwendt Nov 2, 2023
1ed5e29
re-enable device test
davidwendt Nov 2, 2023
5839fcd
remove unneeded test
davidwendt Nov 2, 2023
e0d4e5f
change std::enable_if_t to CUDF_ENABLE_IF
davidwendt Nov 2, 2023
6ec6258
add anonymous namespace around internal functor
davidwendt Nov 3, 2023
8514a71
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Nov 6, 2023
3a21fe1
remove type-dispatcher call from ctor
davidwendt Nov 6, 2023
ab0edb7
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Nov 6, 2023
7b10cb7
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Nov 7, 2023
f4634f9
removed unneeded dispatch in factory
davidwendt Nov 8, 2023
713d601
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Nov 8, 2023
0d8caf1
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Nov 8, 2023
56a73c5
fix doxygen comments
davidwendt Nov 8, 2023
23a8bf6
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Nov 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions cpp/include/cudf/column/column_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#pragma once

#include <cudf/column/column_view.hpp>
#include <cudf/detail/offsets_iterator.cuh>
#include <cudf/detail/utilities/alignment.hpp>
#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/lists/list_view.hpp>
Expand Down Expand Up @@ -442,10 +443,11 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
__device__ T element(size_type element_index) const noexcept
{
size_type index = element_index + offset(); // account for this view's _offset
auto const* d_offsets = d_children[strings_column_view::offsets_column_index].data<int32_t>();
char const* d_strings = d_children[strings_column_view::chars_column_index].data<char>();
size_type offset = d_offsets[index];
return string_view{d_strings + offset, d_offsets[index + 1] - offset};
auto const offsets = d_children[strings_column_view::offsets_column_index];
auto const itr = cudf::detail::input_offsetalator(offsets.head(), offsets.type());
auto const offset = itr[index];
return string_view{d_strings + offset, static_cast<cudf::size_type>(itr[index + 1] - offset)};
}

private:
Expand Down
128 changes: 126 additions & 2 deletions cpp/include/cudf/detail/indexalator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,64 @@ namespace detail {
* auto result = thrust::find(thrust::device, begin, end, size_type{12} );
* @endcode
*/
using input_indexalator = input_normalator<cudf::size_type>;
struct alignas(16) input_indexalator : base_normalator<input_indexalator, cudf::size_type> {
divyegala marked this conversation as resolved.
Show resolved Hide resolved
friend struct base_normalator<input_indexalator, cudf::size_type>; // for CRTP

using reference = cudf::size_type const; // this keeps STL and thrust happy

input_indexalator() = default;
input_indexalator(input_indexalator const&) = default;
input_indexalator(input_indexalator&&) = default;
input_indexalator& operator=(input_indexalator const&) = default;
input_indexalator& operator=(input_indexalator&&) = default;

/**
* @brief Indirection operator returns the value at the current iterator position
*/
__device__ inline cudf::size_type operator*() const { return operator[](0); }

/**
* @brief Dispatch functor for resolving a Integer value from any integer type
*/
struct normalize_type {
template <typename T, std::enable_if_t<cudf::is_index_type<T>()>* = nullptr>
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
__device__ cudf::size_type operator()(void const* tp)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this not T const* tp?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because that is not the type that is being passed to the function.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So to understand the impact of type_dispatcher on the reworked design, it seems to me like we are still using it but there's no cascading calls to type_dispatcher and it's only called exactly once. Is that correct?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. We only call the type-dispatcher in the factory now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, and once when setting up the non-templated class input_indexelator::normalize_input. If you use a normal if-else dispatch there instead of type_dispatcher, are you able to see any benefits? Especially in src/reductions/scan/scan_inclusive.cu.o where there's a 6 minute compile-time increase

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! Looks good.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But you are correct, the base class's type-dispatcher is still called inside every element() call.
I think that is worth considering here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll have to study the assembly here. Is the type_dispatcher expanded only once when the class is compiled (so when the header is included) or is it expanded every time element() is called?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I created a separate ctor that just passes the width instead of type-dispatching to resolve it.
This did improved the compile time: https://downloads.rapids.ai/ci/cudf/pull-request/14234/ab0edb7/cuda12_x86_64.ninja_log.html

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So we halved the compile time increment in scan_inclusive? That is good!

{
return static_cast<cudf::size_type>(*static_cast<T const*>(tp));
}
template <typename T, std::enable_if_t<not cudf::is_index_type<T>()>* = nullptr>
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
__device__ cudf::size_type operator()(void const*)
{
CUDF_UNREACHABLE("only integral types are supported");
}
};

/**
* @brief Array subscript operator returns a value at the input
* `idx` position as a `Integer` value.
*/
__device__ inline cudf::size_type operator[](size_type idx) const
{
void const* tp = p_ + (idx * this->width_);
return type_dispatcher(this->dtype_, normalize_type{}, tp);
}

/**
* @brief Create an input index normalizing iterator.
*
* Use the indexalator_factory to create an iterator instance.
*
* @param data Pointer to an integer array in device memory.
* @param data_type Type of data in data
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
*/
CUDF_HOST_DEVICE input_indexalator(void const* data, data_type dtype, cudf::size_type offset = 0)
: base_normalator<input_indexalator, cudf::size_type>(dtype), p_{static_cast<char const*>(data)}
{
p_ += offset * this->width_;
}

char const* p_; /// pointer to the integer data in device memory
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
};

/**
* @brief The index normalizing output iterator.
Expand All @@ -82,7 +139,74 @@ using input_indexalator = input_normalator<cudf::size_type>;
* thrust::less<Element>());
* @endcode
*/
using output_indexalator = output_normalator<cudf::size_type>;
struct alignas(16) output_indexalator : base_normalator<output_indexalator, cudf::size_type> {
friend struct base_normalator<output_indexalator, cudf::size_type>; // for CRTP

using reference = output_indexalator const&; // required for output iterators

output_indexalator() = default;
output_indexalator(output_indexalator const&) = default;
output_indexalator(output_indexalator&&) = default;
output_indexalator& operator=(output_indexalator const&) = default;
output_indexalator& operator=(output_indexalator&&) = default;

/**
* @brief Indirection operator returns this iterator instance in order
* to capture the `operator=(Integer)` calls.
*/
__device__ inline output_indexalator const& operator*() const { return *this; }
davidwendt marked this conversation as resolved.
Show resolved Hide resolved

/**
* @brief Array subscript operator returns an iterator instance at the specified `idx` position.
*
* This allows capturing the subsequent `operator=(Integer)` call in this class.
*/
__device__ inline output_indexalator const operator[](size_type idx) const
{
output_indexalator tmp{*this};
tmp.p_ += (idx * this->width_);
return tmp;
}

/**
* @brief Dispatch functor for setting the index value from a size_type value.
*/
struct normalize_type {
template <typename T, std::enable_if_t<cudf::is_index_type<T>()>* = nullptr>
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
__device__ void operator()(void* tp, cudf::size_type const value)
{
(*static_cast<T*>(tp)) = static_cast<T>(value);
}
template <typename T, std::enable_if_t<not cudf::is_index_type<T>()>* = nullptr>
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
__device__ void operator()(void*, cudf::size_type const)
{
CUDF_UNREACHABLE("only index types are supported");
}
};

/**
* @brief Assign an Integer value to the current iterator position
*/
__device__ inline output_indexalator const& operator=(cudf::size_type const value) const
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
{
void* tp = p_;
type_dispatcher(this->dtype_, normalize_type{}, tp, value);
return *this;
}

/**
* @brief Create an output normalizing iterator
*
* @param data Pointer to an integer array in device memory.
* @param data_type Type of data in data
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
*/
CUDF_HOST_DEVICE output_indexalator(void* data, data_type dtype)
: base_normalator<output_indexalator, cudf::size_type>(dtype), p_{static_cast<char*>(data)}
{
}

char* p_; /// pointer to the integer data in device memory
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
};

/**
* @brief Use this class to create an indexalator instance.
Expand Down
158 changes: 3 additions & 155 deletions cpp/include/cudf/detail/normalizing_iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ namespace detail {
* @tparam Integer The type the iterator normalizes to
*/
template <class Derived, typename Integer>
struct base_normalator {
struct alignas(16) base_normalator {
static_assert(cudf::is_index_type<Integer>());
using difference_type = std::ptrdiff_t;
using value_type = Integer;
Expand Down Expand Up @@ -204,7 +204,7 @@ struct base_normalator {

private:
struct integer_sizeof_fn {
template <typename T, std::enable_if_t<not cudf::is_index_type<T>()>* = nullptr>
template <typename T, std::enable_if_t<not cudf::is_fixed_width<T>()>* = nullptr>
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
CUDF_HOST_DEVICE constexpr std::size_t operator()() const
{
#ifndef __CUDA_ARCH__
Expand All @@ -213,7 +213,7 @@ struct base_normalator {
CUDF_UNREACHABLE("only integral types are supported");
#endif
}
template <typename T, std::enable_if_t<cudf::is_index_type<T>()>* = nullptr>
template <typename T, std::enable_if_t<cudf::is_fixed_width<T>()>* = nullptr>
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
CUDF_HOST_DEVICE constexpr std::size_t operator()() const noexcept
{
return sizeof(T);
Expand All @@ -233,157 +233,5 @@ struct base_normalator {
data_type dtype_; /// for type-dispatcher calls
};

/**
* @brief The integer normalizing input iterator
*
* This is an iterator that can be used for index types (integers) without
* requiring a type-specific instance. It can be used for any iterator
* interface for reading an array of integer values of type
* int8, int16, int32, int64, uint8, uint16, uint32, or uint64.
* Reading specific elements always return a type of `Integer`
*
* @tparam Integer Type returned by all read functions
*/
template <typename Integer>
struct input_normalator : base_normalator<input_normalator<Integer>, Integer> {
friend struct base_normalator<input_normalator<Integer>, Integer>; // for CRTP

using reference = Integer const; // this keeps STL and thrust happy

input_normalator() = default;
input_normalator(input_normalator const&) = default;
input_normalator(input_normalator&&) = default;
input_normalator& operator=(input_normalator const&) = default;
input_normalator& operator=(input_normalator&&) = default;

/**
* @brief Indirection operator returns the value at the current iterator position
*/
__device__ inline Integer operator*() const { return operator[](0); }

/**
* @brief Dispatch functor for resolving a Integer value from any integer type
*/
struct normalize_type {
template <typename T, std::enable_if_t<cudf::is_index_type<T>()>* = nullptr>
__device__ Integer operator()(void const* tp)
{
return static_cast<Integer>(*static_cast<T const*>(tp));
}
template <typename T, std::enable_if_t<not cudf::is_index_type<T>()>* = nullptr>
__device__ Integer operator()(void const*)
{
CUDF_UNREACHABLE("only integral types are supported");
}
};

/**
* @brief Array subscript operator returns a value at the input
* `idx` position as a `Integer` value.
*/
__device__ inline Integer operator[](size_type idx) const
{
void const* tp = p_ + (idx * this->width_);
return type_dispatcher(this->dtype_, normalize_type{}, tp);
}

/**
* @brief Create an input index normalizing iterator.
*
* Use the indexalator_factory to create an iterator instance.
*
* @param data Pointer to an integer array in device memory.
* @param data_type Type of data in data
*/
CUDF_HOST_DEVICE input_normalator(void const* data, data_type dtype, cudf::size_type offset = 0)
: base_normalator<input_normalator<Integer>, Integer>(dtype), p_{static_cast<char const*>(data)}
{
p_ += offset * this->width_;
}

char const* p_; /// pointer to the integer data in device memory
};

/**
* @brief The integer normalizing output iterator
*
* This is an iterator that can be used for index types (integers) without
* requiring a type-specific instance. It can be used for any iterator
* interface for writing an array of integer values of type
* int8, int16, int32, int64, uint8, uint16, uint32, or uint64.
* Setting specific elements always accept the `Integer` type values.
*
* @tparam Integer The type used for all write functions
*/
template <typename Integer>
struct output_normalator : base_normalator<output_normalator<Integer>, Integer> {
friend struct base_normalator<output_normalator<Integer>, Integer>; // for CRTP

using reference = output_normalator const&; // required for output iterators

output_normalator() = default;
output_normalator(output_normalator const&) = default;
output_normalator(output_normalator&&) = default;
output_normalator& operator=(output_normalator const&) = default;
output_normalator& operator=(output_normalator&&) = default;

/**
* @brief Indirection operator returns this iterator instance in order
* to capture the `operator=(Integer)` calls.
*/
__device__ inline output_normalator const& operator*() const { return *this; }

/**
* @brief Array subscript operator returns an iterator instance at the specified `idx` position.
*
* This allows capturing the subsequent `operator=(Integer)` call in this class.
*/
__device__ inline output_normalator const operator[](size_type idx) const
{
output_normalator tmp{*this};
tmp.p_ += (idx * this->width_);
return tmp;
}

/**
* @brief Dispatch functor for setting the index value from a size_type value.
*/
struct normalize_type {
template <typename T, std::enable_if_t<cudf::is_index_type<T>()>* = nullptr>
__device__ void operator()(void* tp, Integer const value)
{
(*static_cast<T*>(tp)) = static_cast<T>(value);
}
template <typename T, std::enable_if_t<not cudf::is_index_type<T>()>* = nullptr>
__device__ void operator()(void*, Integer const)
{
CUDF_UNREACHABLE("only index types are supported");
}
};

/**
* @brief Assign an Integer value to the current iterator position
*/
__device__ inline output_normalator const& operator=(Integer const value) const
{
void* tp = p_;
type_dispatcher(this->dtype_, normalize_type{}, tp, value);
return *this;
}

/**
* @brief Create an output normalizing iterator
*
* @param data Pointer to an integer array in device memory.
* @param data_type Type of data in data
*/
CUDF_HOST_DEVICE output_normalator(void* data, data_type dtype)
: base_normalator<output_normalator<Integer>, Integer>(dtype), p_{static_cast<char*>(data)}
{
}

char* p_; /// pointer to the integer data in device memory
};

} // namespace detail
} // namespace cudf
Loading