Skip to content

Commit

Permalink
Separating mdspan/mdarray infra into host_* and device_* variants (#810)
Browse files Browse the repository at this point in the history
This is a breaking change as it provides users with a more granular set of headers to import `host` separately from `device` and `managed` versions. It also separates the headers for `mdspan` and `mdarray`. 

As an example, the following public headers can now be imported individually:
```c++
raft/core/host_mdspan.hpp
raft/core/device_mdspan.hpp

raft/core/host_mdarray.hpp
raft/core/device_mdarray.hpp
```


cc @rg20 @afender @akifcorduk for awareness. 


Closes #806

Authors:
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Divye Gala (https://github.com/divyegala)
  - Mark Hoemmen (https://github.com/mhoemmen)
  - William Hicks (https://github.com/wphicks)

URL: #810
  • Loading branch information
cjnolet authored Sep 22, 2022
1 parent e394ac2 commit 6a1d1da
Show file tree
Hide file tree
Showing 30 changed files with 1,300 additions and 1,016 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ The APIs in RAFT currently accept raw pointers to device memory and we are in th
The `mdarray` forms a convenience layer over RMM and can be constructed in RAFT using a number of different helper functions:

```c++
#include <raft/mdarray.hpp>
#include <raft/core/device_mdarray.hpp>

int n_rows = 10;
int n_cols = 10;
Expand All @@ -56,8 +56,8 @@ Most of the primitives in RAFT accept a `raft::handle_t` object for the manageme
The example below demonstrates creating a RAFT handle and using it with `device_matrix` and `device_vector` to allocate memory, generating random clusters, and computing
pairwise Euclidean distances:
```c++
#include <raft/handle.hpp>
#include <raft/mdarray.hpp>
#include <raft/core/handle.hpp>
#include <raft/core/device_mdarray.hpp>
#include <raft/random/make_blobs.cuh>
#include <raft/distance/distance.cuh>

Expand Down
7 changes: 4 additions & 3 deletions cpp/include/raft.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@
*/

/**
* This file is deprecated and will be removed in release 22.06.
* This file is deprecated and will be removed in a future release.
*/
#include "raft/core/device_mdarray.hpp"
#include "raft/core/device_mdspan.hpp"
#include "raft/core/device_span.hpp"
#include "raft/core/handle.hpp"
#include "raft/mdarray.hpp"
#include "raft/span.hpp"

#include <string>

Expand Down
2 changes: 2 additions & 0 deletions cpp/include/raft/cluster/detail/kmeans.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@
#include <raft/cluster/detail/kmeans_common.cuh>
#include <raft/cluster/kmeans_types.hpp>
#include <raft/core/cudart_utils.hpp>
#include <raft/core/device_mdarray.hpp>
#include <raft/core/handle.hpp>
#include <raft/core/host_mdarray.hpp>
#include <raft/core/logger.hpp>
#include <raft/core/mdarray.hpp>
#include <raft/distance/distance_types.hpp>
Expand Down
1 change: 1 addition & 0 deletions cpp/include/raft/cluster/detail/kmeans_common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include <raft/cluster/kmeans_types.hpp>
#include <raft/core/cudart_utils.hpp>
#include <raft/core/device_mdarray.hpp>
#include <raft/core/handle.hpp>
#include <raft/core/logger.hpp>
#include <raft/core/mdarray.hpp>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,13 @@
* limitations under the License.
*/
#pragma once
#include <raft/core/mdspan.hpp>
#include <raft/detail/span.hpp> // dynamic_extent
#include <raft/core/device_mdspan.hpp>
#include <raft/core/handle.hpp>
#include <raft/util/cudart_utils.hpp>

#include <raft/core/detail/host_device_accessor.hpp>
#include <raft/core/detail/span.hpp> // dynamic_extent

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>
Expand Down Expand Up @@ -187,161 +190,4 @@ class device_uvector_policy {
[[nodiscard]] auto make_accessor_policy() const noexcept { return const_accessor_policy{}; }
};

/**
* @brief A container policy for host mdarray.
*/
template <typename ElementType, typename Allocator = std::allocator<ElementType>>
class host_vector_policy {
public:
using element_type = ElementType;
using container_type = std::vector<element_type, Allocator>;
using allocator_type = typename container_type::allocator_type;
using pointer = typename container_type::pointer;
using const_pointer = typename container_type::const_pointer;
using reference = element_type&;
using const_reference = element_type const&;
using accessor_policy = std::experimental::default_accessor<element_type>;
using const_accessor_policy = std::experimental::default_accessor<element_type const>;

public:
auto create(size_t n) -> container_type { return container_type(n); }

constexpr host_vector_policy() noexcept(std::is_nothrow_default_constructible_v<ElementType>) =
default;
explicit constexpr host_vector_policy(rmm::cuda_stream_view) noexcept(
std::is_nothrow_default_constructible_v<ElementType>)
: host_vector_policy()
{
}

[[nodiscard]] constexpr auto access(container_type& c, size_t n) const noexcept -> reference
{
return c[n];
}
[[nodiscard]] constexpr auto access(container_type const& c, size_t n) const noexcept
-> const_reference
{
return c[n];
}

[[nodiscard]] auto make_accessor_policy() noexcept { return accessor_policy{}; }
[[nodiscard]] auto make_accessor_policy() const noexcept { return const_accessor_policy{}; }
};

/**
* @brief A mixin to distinguish host and device memory.
*/
template <typename AccessorPolicy, bool is_host, bool is_device>
struct accessor_mixin : public AccessorPolicy {
using accessor_type = AccessorPolicy;
using is_host_type = std::conditional_t<is_host, std::true_type, std::false_type>;
using is_device_type = std::conditional_t<is_device, std::true_type, std::false_type>;
using is_managed_type = std::conditional_t<is_device && is_host, std::true_type, std::false_type>;
static constexpr bool is_host_accessible = is_host;
static constexpr bool is_device_accessible = is_device;
static constexpr bool is_managed_accessible = is_device && is_host;
// make sure the explicit ctor can fall through
using AccessorPolicy::AccessorPolicy;
using offset_policy = accessor_mixin;
accessor_mixin(AccessorPolicy const& that) : AccessorPolicy{that} {} // NOLINT
};

template <typename AccessorPolicy>
using host_accessor = accessor_mixin<AccessorPolicy, true, false>;

template <typename AccessorPolicy>
using device_accessor = accessor_mixin<AccessorPolicy, false, true>;

template <typename AccessorPolicy>
using managed_accessor = accessor_mixin<AccessorPolicy, true, true>;

namespace stdex = std::experimental;

template <typename IndexType>
using vector_extent = stdex::extents<IndexType, dynamic_extent>;

template <typename IndexType>
using matrix_extent = stdex::extents<IndexType, dynamic_extent, dynamic_extent>;

template <typename IndexType = std::uint32_t>
using scalar_extent = stdex::extents<IndexType, 1>;

template <typename T>
MDSPAN_INLINE_FUNCTION auto native_popc(T v) -> int32_t
{
int c = 0;
for (; v != 0; v &= v - 1) {
c++;
}
return c;
}

MDSPAN_INLINE_FUNCTION auto popc(uint32_t v) -> int32_t
{
#if defined(__CUDA_ARCH__)
return __popc(v);
#elif defined(__GNUC__) || defined(__clang__)
return __builtin_popcount(v);
#else
return native_popc(v);
#endif // compiler
}

MDSPAN_INLINE_FUNCTION auto popc(uint64_t v) -> int32_t
{
#if defined(__CUDA_ARCH__)
return __popcll(v);
#elif defined(__GNUC__) || defined(__clang__)
return __builtin_popcountll(v);
#else
return native_popc(v);
#endif // compiler
}

template <class T, std::size_t N, std::size_t... Idx>
MDSPAN_INLINE_FUNCTION constexpr auto arr_to_tup(T (&arr)[N], std::index_sequence<Idx...>)
{
return std::make_tuple(arr[Idx]...);
}

template <class T, std::size_t N>
MDSPAN_INLINE_FUNCTION constexpr auto arr_to_tup(T (&arr)[N])
{
return arr_to_tup(arr, std::make_index_sequence<N>{});
}

// uint division optimization inspired by the CIndexer in cupy. Division operation is
// slow on both CPU and GPU, especially 64 bit integer. So here we first try to avoid 64
// bit when the index is smaller, then try to avoid division when it's exp of 2.
template <typename I, typename IndexType, size_t... Extents>
MDSPAN_INLINE_FUNCTION auto unravel_index_impl(I idx, stdex::extents<IndexType, Extents...> shape)
{
constexpr auto kRank = static_cast<int32_t>(shape.rank());
std::size_t index[shape.rank()]{0}; // NOLINT
static_assert(std::is_signed<decltype(kRank)>::value,
"Don't change the type without changing the for loop.");
for (int32_t dim = kRank; --dim > 0;) {
auto s = static_cast<std::remove_const_t<std::remove_reference_t<I>>>(shape.extent(dim));
if (s & (s - 1)) {
auto t = idx / s;
index[dim] = idx - t * s;
idx = t;
} else { // exp of 2
index[dim] = idx & (s - 1);
idx >>= popc(s - 1);
}
}
index[0] = idx;
return arr_to_tup(index);
}

/**
* Ensure all types listed in the parameter pack `Extents` are integral types.
* Usage:
* put it as the last nameless template parameter of a function:
* `typename = ensure_integral_extents<Extents...>`
*/
template <typename... Extents>
using ensure_integral_extents = std::enable_if_t<std::conjunction_v<std::is_integral<Extents>...>>;

} // namespace raft::detail
39 changes: 39 additions & 0 deletions cpp/include/raft/core/detail/host_device_accessor.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

namespace raft::detail {

/**
* @brief A mixin to distinguish host and device memory.
*/
template <typename AccessorPolicy, bool is_host, bool is_device>
struct host_device_accessor : public AccessorPolicy {
using accessor_type = AccessorPolicy;
using is_host_type = std::conditional_t<is_host, std::true_type, std::false_type>;
using is_device_type = std::conditional_t<is_device, std::true_type, std::false_type>;
using is_managed_type = std::conditional_t<is_device && is_host, std::true_type, std::false_type>;
static constexpr bool is_host_accessible = is_host;
static constexpr bool is_device_accessible = is_device;
static constexpr bool is_managed_accessible = is_device && is_host;
// make sure the explicit ctor can fall through
using AccessorPolicy::AccessorPolicy;
using offset_policy = host_device_accessor;
host_device_accessor(AccessorPolicy const& that) : AccessorPolicy{that} {} // NOLINT
};

} // namespace raft::detail
69 changes: 69 additions & 0 deletions cpp/include/raft/core/detail/host_mdarray.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Copyright (2019) Sandia Corporation
*
* The source code is licensed under the 3-clause BSD license found in the LICENSE file
* thirdparty/LICENSES/mdarray.license
*/

/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <raft/core/mdspan_types.hpp>
#include <vector>

namespace raft::detail {

/**
* @brief A container policy for host mdarray.
*/
template <typename ElementType, typename Allocator = std::allocator<ElementType>>
class host_vector_policy {
public:
using element_type = ElementType;
using container_type = std::vector<element_type, Allocator>;
using allocator_type = typename container_type::allocator_type;
using pointer = typename container_type::pointer;
using const_pointer = typename container_type::const_pointer;
using reference = element_type&;
using const_reference = element_type const&;
using accessor_policy = std::experimental::default_accessor<element_type>;
using const_accessor_policy = std::experimental::default_accessor<element_type const>;

public:
auto create(size_t n) -> container_type { return container_type(n); }

constexpr host_vector_policy() noexcept(std::is_nothrow_default_constructible_v<ElementType>) =
default;
explicit constexpr host_vector_policy(rmm::cuda_stream_view) noexcept(
std::is_nothrow_default_constructible_v<ElementType>)
: host_vector_policy()
{
}

[[nodiscard]] constexpr auto access(container_type& c, size_t n) const noexcept -> reference
{
return c[n];
}
[[nodiscard]] constexpr auto access(container_type const& c, size_t n) const noexcept
-> const_reference
{
return c[n];
}

[[nodiscard]] auto make_accessor_policy() noexcept { return accessor_policy{}; }
[[nodiscard]] auto make_accessor_policy() const noexcept { return const_accessor_policy{}; }
};
} // namespace raft::detail
35 changes: 35 additions & 0 deletions cpp/include/raft/core/detail/macros.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#ifndef _RAFT_HAS_CUDA
#if defined(__CUDACC__)
#define _RAFT_HAS_CUDA __CUDACC__
#endif
#endif

#ifndef _RAFT_HOST_DEVICE
#if defined(_RAFT_HAS_CUDA)
#define _RAFT_HOST_DEVICE __host__ __device__
#else
#define _RAFT_HOST_DEVICE
#endif
#endif

#ifndef RAFT_INLINE_FUNCTION
#define RAFT_INLINE_FUNCTION inline _RAFT_HOST_DEVICE
#endif
Loading

0 comments on commit 6a1d1da

Please sign in to comment.