Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEA] Implement unravel_index for row-major array. #723

Merged
merged 8 commits into from
Jun 29, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion cpp/include/raft/core/mdarray.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -914,4 +914,34 @@ auto reshape(const array_interface_type& mda, extents<Extents...> new_shape)
return reshape(mda.view(), new_shape);
}

} // namespace raft
/**
* \brief Turns linear index into coordinate. Similar to numpy unravel_index.
*
* \code
* auto m = make_host_matrix<float>(7, 6);
* auto m_v = m.view();
* auto coord = unravel_index(2, m.extents(), typename decltype(m)::layout_type{});
* std::apply(m_v, coord) = 2;
* \endcode
*
* \param idx The linear index.
* \param shape The shape of the array to use.
* \param layout Must be `layout_c_contiguous` (row-major) in current implementation.
*
* \return A std::tuple that represents the coordinate.
*/
template <typename LayoutPolicy, std::size_t... Exts>
MDSPAN_INLINE_FUNCTION auto unravel_index(size_t idx,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

std::size_t idx as even extents are expressed as that type?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't tested the updated mdspan and its new extent type yet.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair. We'll update it when we update mdspan

Copy link
Member

@divyegala divyegala Jun 27, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this parameter possibly be optional with a default type/value? For those wishing to use the template param directly?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I will update the parameter type

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Am I missing the updates?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed the parameter type of std::size_t idx into template Idx idx

extents<Exts...> shape,
LayoutPolicy const& layout)
{
static_assert(std::is_same_v<std::remove_cv_t<std::remove_reference_t<decltype(layout)>>,
divyegala marked this conversation as resolved.
Show resolved Hide resolved
layout_c_contiguous>,
"Only C layout is supported.");
if (idx > std::numeric_limits<uint32_t>::max()) {
return detail::unravel_index_impl<uint64_t, Exts...>(static_cast<uint64_t>(idx), shape);
} else {
return detail::unravel_index_impl<uint32_t, Exts...>(static_cast<uint32_t>(idx), shape);
}
}
} // namespace raft
71 changes: 70 additions & 1 deletion cpp/include/raft/detail/mdarray.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class device_reference {
auto operator=(T const& other) -> device_reference&
{
auto* raw = ptr_.get();
raft::update_device(raw, &other, 1, stream_);
update_device(raw, &other, 1, stream_);
return *this;
}
};
Expand Down Expand Up @@ -240,4 +240,73 @@ namespace stdex = std::experimental;
using vector_extent = stdex::extents<dynamic_extent>;
using matrix_extent = stdex::extents<dynamic_extent, dynamic_extent>;
using scalar_extent = stdex::extents<1>;

template <typename T>
MDSPAN_INLINE_FUNCTION auto native_popc(T v) -> int32_t
{
int c = 0;
for (; v != 0; v &= v - 1) {
c++;
}
return c;
}

MDSPAN_INLINE_FUNCTION auto popc(uint32_t v) -> int32_t
{
#if defined(__CUDA_ARCH__)
return __popc(v);
#elif defined(__GNUC__) || defined(__clang__)
return __builtin_popcount(v);
#else
return native_popc(v);
#endif // compiler
}

MDSPAN_INLINE_FUNCTION auto popc(uint64_t v) -> int32_t
{
#if defined(__CUDA_ARCH__)
return __popcll(v);
#elif defined(__GNUC__) || defined(__clang__)
return __builtin_popcountll(v);
#else
return native_popc(v);
#endif // compiler
}

template <class T, std::size_t N, std::size_t... Idx>
MDSPAN_INLINE_FUNCTION constexpr auto arr_to_tup(T (&arr)[N], std::index_sequence<Idx...>)
{
return std::make_tuple(arr[Idx]...);
}

template <class T, std::size_t N>
MDSPAN_INLINE_FUNCTION constexpr auto arr_to_tup(T (&arr)[N])
{
return arr_to_tup(arr, std::make_index_sequence<N>{});
}

// uint division optimization inspired by the CIndexer in cupy. Division operation is
// slow on both CPU and GPU, especially 64 bit integer. So here we first try to avoid 64
// bit when the index is smaller, then try to avoid division when it's exp of 2.
template <typename I, size_t... Extents>
MDSPAN_INLINE_FUNCTION auto unravel_index_impl(I idx, stdex::extents<Extents...> shape)
{
constexpr auto kRank = static_cast<int32_t>(shape.rank());
size_t index[shape.rank()]{0}; // NOLINT
static_assert(std::is_signed<decltype(kRank)>::value,
"Don't change the type without changing the for loop.");
for (int32_t dim = kRank; --dim > 0;) {
auto s = static_cast<std::remove_const_t<std::remove_reference_t<I>>>(shape.extent(dim));
if (s & (s - 1)) {
auto t = idx / s;
index[dim] = idx - t * s;
idx = t;
} else { // exp of 2
index[dim] = idx & (s - 1);
idx >>= popc(s - 1);
}
}
index[0] = idx;
return arr_to_tup(index);
}
} // namespace raft::detail
92 changes: 92 additions & 0 deletions cpp/test/mdarray.cu
Original file line number Diff line number Diff line change
Expand Up @@ -421,4 +421,96 @@ TEST(MDArray, FuncArg)
std::is_same_v<decltype(slice)::accessor_type, device_matrix_view<float>::accessor_type>);
}
}

namespace {
void test_mdarray_unravel()
{
{
uint32_t v{0};
ASSERT_EQ(detail::native_popc(v), 0);
ASSERT_EQ(detail::popc(v), 0);
v = 1;
ASSERT_EQ(detail::native_popc(v), 1);
ASSERT_EQ(detail::popc(v), 1);
v = 0xffffffff;
ASSERT_EQ(detail::native_popc(v), 32);
ASSERT_EQ(detail::popc(v), 32);
}
{
uint64_t v{0};
ASSERT_EQ(detail::native_popc(v), 0);
ASSERT_EQ(detail::popc(v), 0);
v = 1;
ASSERT_EQ(detail::native_popc(v), 1);
ASSERT_EQ(detail::popc(v), 1);
v = 0xffffffff;
ASSERT_EQ(detail::native_popc(v), 32);
ASSERT_EQ(detail::popc(v), 32);
v = 0xffffffffffffffff;
ASSERT_EQ(detail::native_popc(v), 64);
ASSERT_EQ(detail::popc(v), 64);
}

// examples from numpy unravel_index
{
auto coord = unravel_index(22, detail::matrix_extent{7, 6}, stdex::layout_right{});
static_assert(std::tuple_size<decltype(coord)>::value == 2);
ASSERT_EQ(std::get<0>(coord), 3);
ASSERT_EQ(std::get<1>(coord), 4);
}
{
auto coord = unravel_index(41, detail::matrix_extent{7, 6}, stdex::layout_right{});
static_assert(std::tuple_size<decltype(coord)>::value == 2);
ASSERT_EQ(std::get<0>(coord), 6);
ASSERT_EQ(std::get<1>(coord), 5);
}
{
auto coord = unravel_index(37, detail::matrix_extent{7, 6}, stdex::layout_right{});
static_assert(std::tuple_size<decltype(coord)>::value == 2);
ASSERT_EQ(std::get<0>(coord), 6);
ASSERT_EQ(std::get<1>(coord), 1);
}
// assignment
{
auto m = make_host_matrix<float>(7, 6);
auto m_v = m.view();
for (size_t i = 0; i < m.size(); ++i) {
auto coord = unravel_index(i, m.extents(), typename decltype(m)::layout_type{});
std::apply(m_v, coord) = i;
}
for (size_t i = 0; i < m.size(); ++i) {
auto coord = unravel_index(i, m.extents(), typename decltype(m)::layout_type{});
ASSERT_EQ(std::apply(m_v, coord), i);
}
}

{
handle_t handle;
auto m = make_device_matrix<float>(handle, 7, 6);
auto m_v = m.view();
thrust::for_each_n(handle.get_thrust_policy(),
thrust::make_counting_iterator(0ul),
m_v.size(),
[=] HD(size_t i) {
auto coord =
unravel_index(i, m_v.extents(), typename decltype(m_v)::layout_type{});
std::apply(m_v, coord) = static_cast<float>(i);
});
thrust::device_vector<int32_t> status(1, 0);
auto p_status = status.data().get();
thrust::for_each_n(handle.get_thrust_policy(),
thrust::make_counting_iterator(0ul),
m_v.size(),
[=] __device__(size_t i) {
auto coord =
unravel_index(i, m_v.extents(), typename decltype(m_v)::layout_type{});
auto v = std::apply(m_v, coord);
if (v != static_cast<float>(i)) { raft::myAtomicAdd(p_status, 1); }
});
check_status(p_status, handle.get_stream());
}
}
} // anonymous namespace

TEST(MDArray, Unravel) { test_mdarray_unravel(); }
} // namespace raft