Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Normalizing offsets iterator #14234

Merged
merged 47 commits into from
Nov 13, 2023
Merged
Show file tree
Hide file tree
Changes from 44 commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
32e1029
Enable indexalator for device code
davidwendt Sep 27, 2023
1548588
Merge branch 'branch-23.12' into indexalator-device-enable
davidwendt Sep 27, 2023
f6419b4
return ref experiment
davidwendt Sep 27, 2023
b5b4449
Merge branch 'branch-23.12' into indexalator-device-enable
davidwendt Sep 28, 2023
0e369dd
Normalizing offsets iterator
davidwendt Sep 28, 2023
e451116
Merge branch 'branch-23.12' into indexalator-device-enable
davidwendt Oct 5, 2023
7dcb134
23.12 baseline compile-time commit
davidwendt Oct 5, 2023
a248d75
Merge branch 'branch-23.12' into indexalator-device-enable
davidwendt Oct 5, 2023
88f6dff
undo temp change
davidwendt Oct 5, 2023
081cb84
use cudf::is_index_type
davidwendt Oct 6, 2023
a28a9ff
use cudf::is_index_type part 2
davidwendt Oct 6, 2023
df063b6
Merge branch 'branch-23.12' into indexalator-device-enable
davidwendt Oct 6, 2023
f5c898c
Merge branch 'indexalator-device-enable' into offsets-iterator
davidwendt Oct 6, 2023
ccc5bf5
add offsetalator factory
davidwendt Oct 6, 2023
73c04d8
Merge branch 'branch-23.12' into indexalator-device-enable
davidwendt Oct 10, 2023
eb586f4
use size_t for index_sizeof_fn
davidwendt Oct 10, 2023
4ba5a70
Merge branch 'indexalator-device-enable' into offsets-iterator
davidwendt Oct 10, 2023
e76ec97
Merge branch 'branch-23.12' into indexalator-device-enable
davidwendt Oct 12, 2023
1ae36f3
Merge branch 'branch-23.12' into indexalator-device-enable
davidwendt Oct 13, 2023
6fdeadf
Merge branch 'indexalator-device-enable' into offsets-iterator
davidwendt Oct 13, 2023
84e7510
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Oct 17, 2023
2bfb3e1
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Oct 20, 2023
8d45877
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Oct 23, 2023
352f516
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Oct 24, 2023
1add402
fix exception message
davidwendt Oct 24, 2023
e9cfafe
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Oct 24, 2023
e5f5589
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Oct 25, 2023
09afe63
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Oct 28, 2023
704c853
rework offsetalator/indexalator dispatch logic
davidwendt Nov 1, 2023
f45ee6d
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Nov 1, 2023
a132e33
Merge branch 'offsets-iterator' of github.com:davidwendt/cudf into of…
davidwendt Nov 1, 2023
8ee15a3
add alignas
davidwendt Nov 2, 2023
43c4984
Merge branch 'offsets-iterator' of github.com:davidwendt/cudf into of…
davidwendt Nov 2, 2023
a97d061
add more alignases
davidwendt Nov 2, 2023
1ed5e29
re-enable device test
davidwendt Nov 2, 2023
5839fcd
remove unneeded test
davidwendt Nov 2, 2023
e0d4e5f
change std::enable_if_t to CUDF_ENABLE_IF
davidwendt Nov 2, 2023
6ec6258
add anonymous namespace around internal functor
davidwendt Nov 3, 2023
8514a71
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Nov 6, 2023
3a21fe1
remove type-dispatcher call from ctor
davidwendt Nov 6, 2023
ab0edb7
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Nov 6, 2023
7b10cb7
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Nov 7, 2023
f4634f9
removed unneeded dispatch in factory
davidwendt Nov 8, 2023
713d601
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Nov 8, 2023
0d8caf1
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Nov 8, 2023
56a73c5
fix doxygen comments
davidwendt Nov 8, 2023
23a8bf6
Merge branch 'branch-23.12' into offsets-iterator
davidwendt Nov 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions cpp/include/cudf/column/column_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#pragma once

#include <cudf/column/column_view.hpp>
#include <cudf/detail/offsets_iterator.cuh>
#include <cudf/detail/utilities/alignment.hpp>
#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/lists/list_view.hpp>
Expand Down Expand Up @@ -442,10 +443,11 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
__device__ T element(size_type element_index) const noexcept
{
size_type index = element_index + offset(); // account for this view's _offset
auto const* d_offsets = d_children[strings_column_view::offsets_column_index].data<int32_t>();
char const* d_strings = d_children[strings_column_view::chars_column_index].data<char>();
size_type offset = d_offsets[index];
return string_view{d_strings + offset, d_offsets[index + 1] - offset};
auto const offsets = d_children[strings_column_view::offsets_column_index];
auto const itr = cudf::detail::input_offsetalator(offsets.head(), offsets.type());
auto const offset = itr[index];
return string_view{d_strings + offset, static_cast<cudf::size_type>(itr[index + 1] - offset)};
}

private:
Expand Down
148 changes: 134 additions & 14 deletions cpp/include/cudf/detail/indexalator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,65 @@ namespace detail {
* auto result = thrust::find(thrust::device, begin, end, size_type{12} );
* @endcode
*/
using input_indexalator = input_normalator<cudf::size_type>;
struct input_indexalator : base_normalator<input_indexalator, cudf::size_type> {
friend struct base_normalator<input_indexalator, cudf::size_type>; // for CRTP

using reference = cudf::size_type const; // this keeps STL and thrust happy

input_indexalator() = default;
input_indexalator(input_indexalator const&) = default;
input_indexalator(input_indexalator&&) = default;
input_indexalator& operator=(input_indexalator const&) = default;
input_indexalator& operator=(input_indexalator&&) = default;

/**
* @brief Indirection operator returns the value at the current iterator position
*/
__device__ inline cudf::size_type operator*() const { return operator[](0); }

/**
* @brief Dispatch functor for resolving a Integer value from any integer type
*/
struct normalize_type {
template <typename T, CUDF_ENABLE_IF(cudf::is_index_type<T>())>
__device__ cudf::size_type operator()(void const* tp)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this not T const* tp?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because that is not the type that is being passed to the function.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So to understand the impact of type_dispatcher on the reworked design, it seems to me like we are still using it but there's no cascading calls to type_dispatcher and it's only called exactly once. Is that correct?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. We only call the type-dispatcher in the factory now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, and once when setting up the non-templated class input_indexelator::normalize_input. If you use a normal if-else dispatch there instead of type_dispatcher, are you able to see any benefits? Especially in src/reductions/scan/scan_inclusive.cu.o where there's a 6 minute compile-time increase

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! Looks good.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But you are correct, the base class's type-dispatcher is still called inside every element() call.
I think that is worth considering here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll have to study the assembly here. Is the type_dispatcher expanded only once when the class is compiled (so when the header is included) or is it expanded every time element() is called?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I created a separate ctor that just passes the width instead of type-dispatching to resolve it.
This did improved the compile time: https://downloads.rapids.ai/ci/cudf/pull-request/14234/ab0edb7/cuda12_x86_64.ninja_log.html

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So we halved the compile time increment in scan_inclusive? That is good!

{
return static_cast<cudf::size_type>(*static_cast<T const*>(tp));
}
template <typename T, CUDF_ENABLE_IF(not cudf::is_index_type<T>())>
__device__ cudf::size_type operator()(void const*)
{
CUDF_UNREACHABLE("only integral types are supported");
}
};

/**
* @brief Array subscript operator returns a value at the input
* `idx` position as a `Integer` value.
*/
__device__ inline cudf::size_type operator[](size_type idx) const
{
void const* tp = p_ + (idx * this->width_);
return type_dispatcher(this->dtype_, normalize_type{}, tp);
}

/**
* @brief Create an input index normalizing iterator.
*
* Use the indexalator_factory to create an iterator instance.
*
* @param data Pointer to an integer array in device memory.
* @param data_type Type of data in data
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
*/
CUDF_HOST_DEVICE input_indexalator(void const* data, data_type dtype, cudf::size_type offset = 0)
: base_normalator<input_indexalator, cudf::size_type>(dtype), p_{static_cast<char const*>(data)}
{
p_ += offset * this->width_;
}

protected:
char const* p_; /// pointer to the integer data in device memory
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
};

/**
* @brief The index normalizing output iterator.
Expand All @@ -82,7 +140,75 @@ using input_indexalator = input_normalator<cudf::size_type>;
* thrust::less<Element>());
* @endcode
*/
using output_indexalator = output_normalator<cudf::size_type>;
struct output_indexalator : base_normalator<output_indexalator, cudf::size_type> {
friend struct base_normalator<output_indexalator, cudf::size_type>; // for CRTP

using reference = output_indexalator const&; // required for output iterators

output_indexalator() = default;
output_indexalator(output_indexalator const&) = default;
output_indexalator(output_indexalator&&) = default;
output_indexalator& operator=(output_indexalator const&) = default;
output_indexalator& operator=(output_indexalator&&) = default;

/**
* @brief Indirection operator returns this iterator instance in order
* to capture the `operator=(Integer)` calls.
*/
__device__ inline reference operator*() const { return *this; }

/**
* @brief Array subscript operator returns an iterator instance at the specified `idx` position.
*
* This allows capturing the subsequent `operator=(Integer)` call in this class.
*/
__device__ inline output_indexalator const operator[](size_type idx) const
{
output_indexalator tmp{*this};
tmp.p_ += (idx * this->width_);
return tmp;
}

/**
* @brief Dispatch functor for setting the index value from a size_type value.
*/
struct normalize_type {
template <typename T, CUDF_ENABLE_IF(cudf::is_index_type<T>())>
__device__ void operator()(void* tp, cudf::size_type const value)
{
(*static_cast<T*>(tp)) = static_cast<T>(value);
}
template <typename T, CUDF_ENABLE_IF(not cudf::is_index_type<T>())>
__device__ void operator()(void*, cudf::size_type const)
{
CUDF_UNREACHABLE("only index types are supported");
}
};

/**
* @brief Assign an Integer value to the current iterator position
*/
__device__ inline reference operator=(cudf::size_type const value) const
{
void* tp = p_;
type_dispatcher(this->dtype_, normalize_type{}, tp, value);
return *this;
}

/**
* @brief Create an output normalizing iterator
*
* @param data Pointer to an integer array in device memory.
* @param data_type Type of data in data
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
*/
CUDF_HOST_DEVICE output_indexalator(void* data, data_type dtype)
: base_normalator<output_indexalator, cudf::size_type>(dtype), p_{static_cast<char*>(data)}
{
}

protected:
char* p_; /// pointer to the integer data in device memory
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
};

/**
* @brief Use this class to create an indexalator instance.
Expand All @@ -92,14 +218,12 @@ struct indexalator_factory {
* @brief A type_dispatcher functor to create an input iterator from an indices column.
*/
struct input_indexalator_fn {
template <typename IndexType, std::enable_if_t<is_index_type<IndexType>()>* = nullptr>
template <typename IndexType, CUDF_ENABLE_IF(is_index_type<IndexType>())>
input_indexalator operator()(column_view const& indices)
{
return input_indexalator(indices.data<IndexType>(), indices.type());
}
template <typename IndexType,
typename... Args,
std::enable_if_t<not is_index_type<IndexType>()>* = nullptr>
template <typename IndexType, typename... Args, CUDF_ENABLE_IF(not is_index_type<IndexType>())>
input_indexalator operator()(Args&&... args)
{
CUDF_FAIL("indices must be an index type");
Expand All @@ -110,16 +234,14 @@ struct indexalator_factory {
* @brief Use this class to create an indexalator to a scalar index.
*/
struct input_indexalator_scalar_fn {
template <typename IndexType, std::enable_if_t<is_index_type<IndexType>()>* = nullptr>
template <typename IndexType, CUDF_ENABLE_IF(is_index_type<IndexType>())>
input_indexalator operator()(scalar const& index)
{
// note: using static_cast<scalar_type_t<IndexType> const&>(index) creates a copy
auto const scalar_impl = static_cast<scalar_type_t<IndexType> const*>(&index);
return input_indexalator(scalar_impl->data(), index.type());
}
template <typename IndexType,
typename... Args,
std::enable_if_t<not is_index_type<IndexType>()>* = nullptr>
template <typename IndexType, typename... Args, CUDF_ENABLE_IF(not is_index_type<IndexType>())>
input_indexalator operator()(Args&&... args)
{
CUDF_FAIL("scalar must be an index type");
Expand All @@ -130,14 +252,12 @@ struct indexalator_factory {
* @brief A type_dispatcher functor to create an output iterator from an indices column.
*/
struct output_indexalator_fn {
template <typename IndexType, std::enable_if_t<is_index_type<IndexType>()>* = nullptr>
template <typename IndexType, CUDF_ENABLE_IF(is_index_type<IndexType>())>
output_indexalator operator()(mutable_column_view const& indices)
{
return output_indexalator(indices.data<IndexType>(), indices.type());
}
template <typename IndexType,
typename... Args,
std::enable_if_t<not is_index_type<IndexType>()>* = nullptr>
template <typename IndexType, typename... Args, CUDF_ENABLE_IF(not is_index_type<IndexType>())>
output_indexalator operator()(Args&&... args)
{
CUDF_FAIL("indices must be an index type");
Expand Down
Loading