Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
Add common Thrust abstractions that use memory which is accessible fr…
Browse files Browse the repository at this point in the history
…om both

hosts and devices:
- `thrust::universal_vector`.
- `thrust::universal_ptr`.
- `thrust::universal_allocator`.
  • Loading branch information
brycelelbach committed Oct 29, 2020
1 parent 67e4718 commit 20966ed
Show file tree
Hide file tree
Showing 12 changed files with 412 additions and 166 deletions.
25 changes: 23 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,29 @@
# Thrust 1.10.0 (NVIDIA HPC SDK 20.9)
# Thrust 1.11.0 (NVIDIA HPC SDK 20.11)

Thrust 1.11.0 is the major release accompanying the NVIDIA HPC SDK 20.11 release.
It exposes common Thrust abstractions for dealing with memory that can be
accessed by both hosts and devices: `thrust::universal_vector`,
`thrust::universal_ptr`, and `thrust::universal_allocator`.
It also adds `thrust::async::inclusive_scan` and
`thrust::async::exclusive_scan`.
Finally, it brings substantial improvements to the performance of the sort
backend for CUDA.

## New Features

- `thrust::universal_allocator<T>`: An allocator which allocates memory that is
accessible to both hosts and devices.
- `thrust::universal_ptr<T>`: A tagged pointer to memory that is accessible to
both hosts and devices.
- `thrust::universal_vector<T>`: A dynamically-allocated array of objects
accessible to both hosts and devices.

# Thrust 1.10.0 (NVIDIA HPC SDK 20.9, CUDA Toolkit 11.2)

## Summary

Thrust 1.10.0 is the major release accompanying the NVIDIA HPC SDK 20.9 release.
Thrust 1.10.0 is the major release accompanying the NVIDIA HPC SDK 20.9 release
and the CUDA Toolkit 11.2 release.
It drops support for C++03, GCC < 5, Clang < 6, and MSVC < 2017.
It also overhauls CMake support.
Finally, we now have a Code of Conduct for contributors:
Expand Down
144 changes: 11 additions & 133 deletions testing/cuda/managed_memory_pointer.cu
Original file line number Diff line number Diff line change
@@ -1,141 +1,19 @@
#include <thrust/detail/config.h>
#include <unittest/unittest.h>

#if THRUST_CPP_DIALECT >= 2011
#include <thrust/universal_allocator.h>

# include <unittest/unittest.h>

# include <thrust/allocate_unique.h>
# include <thrust/memory/detail/device_system_resource.h>
# include <thrust/mr/allocator.h>
# include <thrust/type_traits/is_contiguous_iterator.h>

# include <numeric>
# include <vector>

namespace
{

template <typename T>
using allocator =
thrust::mr::stateless_resource_allocator<T, thrust::universal_memory_resource>;

// The managed_memory_pointer class should be identified as a
// contiguous_iterator
THRUST_STATIC_ASSERT(
thrust::is_contiguous_iterator<allocator<int>::pointer>::value);
#include <type_traits>

template <typename T>
struct some_object {
some_object(T data)
: m_data(data)
{}

void setter(T data) { m_data = data; }
T getter() const { return m_data; }

private:
T m_data;
};

} // namespace

template <typename T>
void TestAllocateUnique()
void TestCudaManagedMemoryPointer()
{
// Simple test to ensure that pointers created with universal_memory_resource
// can be dereferenced and used with STL code. This is necessary as some
// STL implementations break when using fancy references that overload
// `operator&`, so universal_memory_resource uses a special pointer type that
// returns regular C++ references that can be safely used host-side.

// These operations fail to compile with fancy references:
auto pRaw = thrust::allocate_unique<T>(allocator<T>{}, 42);
auto pObj =
thrust::allocate_unique<some_object<T> >(allocator<some_object<T> >{}, 42);

static_assert(
std::is_same<decltype(pRaw.get()),
thrust::system::cuda::detail::managed_memory_pointer<T> >::value,
"Unexpected pointer returned from unique_ptr::get.");
static_assert(
std::is_same<decltype(pObj.get()),
thrust::system::cuda::detail::managed_memory_pointer<
some_object<T> > >::value,
"Unexpected pointer returned from unique_ptr::get.");

ASSERT_EQUAL(*pRaw, T(42));
ASSERT_EQUAL(*pRaw.get(), T(42));
ASSERT_EQUAL(pObj->getter(), T(42));
ASSERT_EQUAL((*pObj).getter(), T(42));
ASSERT_EQUAL(pObj.get()->getter(), T(42));
ASSERT_EQUAL((*pObj.get()).getter(), T(42));
}
DECLARE_GENERIC_UNITTEST(TestAllocateUnique);

template <typename T>
void TestIterationRaw()
{
auto array = thrust::allocate_unique_n<T>(allocator<T>{}, 6, 42);

static_assert(
std::is_same<decltype(array.get()),
thrust::system::cuda::detail::managed_memory_pointer<T> >::value,
"Unexpected pointer returned from unique_ptr::get.");

for (auto iter = array.get(), end = array.get() + 6; iter < end; ++iter)
{
ASSERT_EQUAL(*iter, T(42));
ASSERT_EQUAL(*iter.get(), T(42));
}
}
DECLARE_GENERIC_UNITTEST(TestIterationRaw);

template <typename T>
void TestIterationObj()
{
auto array =
thrust::allocate_unique_n<some_object<T> >(allocator<some_object<T> >{},
6,
42);

static_assert(
std::is_same<decltype(array.get()),
thrust::system::cuda::detail::managed_memory_pointer<
some_object<T> > >::value,
"Unexpected pointer returned from unique_ptr::get.");

for (auto iter = array.get(), end = array.get() + 6; iter < end; ++iter)
{
ASSERT_EQUAL(iter->getter(), T(42));
ASSERT_EQUAL((*iter).getter(), T(42));
ASSERT_EQUAL(iter.get()->getter(), T(42));
ASSERT_EQUAL((*iter.get()).getter(), T(42));
}
}
DECLARE_GENERIC_UNITTEST(TestIterationObj);

template <typename T>
void TestStdVector()
{
// Verify that a std::vector using the universal allocator will work with
// STL algorithms.
std::vector<T, allocator<T> > v0;

static_assert(
std::is_same<typename std::decay<decltype(v0)>::type::pointer,
thrust::system::cuda::detail::managed_memory_pointer<
T > >::value,
"Unexpected pointer returned from unique_ptr::get.");

v0.resize(6);
std::iota(v0.begin(), v0.end(), 0);
ASSERT_EQUAL(v0[0], T(0));
ASSERT_EQUAL(v0[1], T(1));
ASSERT_EQUAL(v0[2], T(2));
ASSERT_EQUAL(v0[3], T(3));
ASSERT_EQUAL(v0[4], T(4));
ASSERT_EQUAL(v0[5], T(5));
std::is_same<
thrust::universal_ptr<T>,
thrust::system::cuda::detail::managed_memory_pointer<T>
>::value,
"thrust::universal_ptr is not thrust::system::cuda::detail::managed_memory_pointer."
);
}
DECLARE_GENERIC_UNITTEST(TestStdVector);
DECLARE_GENERIC_UNITTEST(TestCudaManagedMemoryPointer);

#endif // C++11
38 changes: 33 additions & 5 deletions testing/unittest/assertions.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <thrust/complex.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/universal_vector.h>
#include <thrust/iterator/iterator_traits.h>
#include <thrust/detail/type_traits.h>

Expand Down Expand Up @@ -376,7 +377,7 @@ class almost_equal_to<thrust::complex<T> >
double a_tol, r_tol;
almost_equal_to(double _a_tol = DEFAULT_ABSOLUTE_TOL, double _r_tol = DEFAULT_RELATIVE_TOL) : a_tol(_a_tol), r_tol(_r_tol) {}
bool operator()(const thrust::complex<T>& a, const thrust::complex<T>& b) const {
return almost_equal((double) a.real(), (double) b.real(), a_tol, r_tol)
return almost_equal((double) a.real(), (double) b.real(), a_tol, r_tol)
&& almost_equal((double) a.imag(), (double) b.imag(), a_tol, r_tol);
}
};
Expand All @@ -390,12 +391,12 @@ void assert_equal(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterat
{
typedef typename thrust::iterator_difference<ForwardIterator1>::type difference_type;
typedef typename thrust::iterator_value<ForwardIterator1>::type InputType;

bool failure = false;

difference_type length1 = thrust::distance(first1, last1);
difference_type length2 = thrust::distance(first2, last2);

difference_type min_length = thrust::min(length1, length2);

unittest::UnitTestFailure f;
Expand All @@ -409,7 +410,7 @@ void assert_equal(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterat
}

// check values

size_t mismatches = 0;

for (difference_type i = 0; i < min_length; i++)
Expand Down Expand Up @@ -472,14 +473,41 @@ void assert_almost_equal(ForwardIterator1 first1, ForwardIterator1 last1, Forwar
assert_equal(first1, last1, first2, last2, almost_equal_to<InputType>(a_tol, r_tol), filename, lineno);
}


template <typename T, typename Alloc1, typename Alloc2>
void assert_equal(const thrust::host_vector<T,Alloc1>& A, const thrust::host_vector<T,Alloc2>& B,
const std::string& filename = "unknown", int lineno = -1)
{
assert_equal(A.begin(), A.end(), B.begin(), B.end(), filename, lineno);
}

template <typename T, typename Alloc1, typename Alloc2>
void assert_equal(const thrust::universal_vector<T,Alloc1>& A, const thrust::universal_vector<T,Alloc2>& B,
const std::string& filename = "unknown", int lineno = -1)
{
assert_equal(A.begin(), A.end(), B.begin(), B.end(), filename, lineno);
}

template <typename T, typename Alloc1, typename Alloc2>
void assert_equal(const thrust::host_vector<T,Alloc1>& A, const thrust::universal_vector<T,Alloc2>& B,
const std::string& filename = "unknown", int lineno = -1)
{
assert_equal(A.begin(), A.end(), B.begin(), B.end(), filename, lineno);
}

template <typename T, typename Alloc1, typename Alloc2>
void assert_equal(const thrust::universal_vector<T,Alloc1>& A, const thrust::host_vector<T,Alloc2>& B,
const std::string& filename = "unknown", int lineno = -1)
{
assert_equal(A.begin(), A.end(), B.begin(), B.end(), filename, lineno);
}

template <typename T, typename Alloc1, typename Alloc2>
void assert_equal(const std::vector<T,Alloc1>& A, const std::vector<T,Alloc2>& B,
const std::string& filename = "unknown", int lineno = -1)
{
assert_equal(A.begin(), A.end(), B.begin(), B.end(), filename, lineno);
}

template <typename T, typename Alloc1, typename Alloc2>
void assert_almost_equal(const thrust::host_vector<T,Alloc1>& A, const thrust::host_vector<T,Alloc2>& B,
const std::string& filename = "unknown", int lineno = -1,
Expand Down
20 changes: 14 additions & 6 deletions testing/unittest/testframework.h
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ class NAME##UnitTest : public UnitTest { \
public: \
NAME##UnitTest() : UnitTest(#NAME) {} \
void run(){ \
TEST(); \
TEST(); \
} \
}; \
NAME##UnitTest NAME##Instance
Expand Down Expand Up @@ -388,15 +388,16 @@ void VTEST##Device(void) { \
VTEST< thrust::device_vector<int, \
thrust::mr::stateless_resource_allocator<int, \
thrust::device_memory_resource> > >(); \
VTEST< thrust::device_vector<int, \
thrust::mr::stateless_resource_allocator<int, \
thrust::universal_memory_resource> > >(); \
} \
void VTEST##Universal(void) { \
VTEST< thrust::universal_vector<int> >(); \
VTEST< thrust::device_vector<int, \
thrust::mr::stateless_resource_allocator<int, \
thrust::universal_host_pinned_memory_resource> > >();\
} \
DECLARE_UNITTEST(VTEST##Host); \
DECLARE_UNITTEST(VTEST##Device);
DECLARE_UNITTEST(VTEST##Device); \
DECLARE_UNITTEST(VTEST##Universal);

// Same as above, but only for integral types
#define DECLARE_INTEGRAL_VECTOR_UNITTEST(VTEST) \
Expand All @@ -410,8 +411,15 @@ void VTEST##Device(void) { \
VTEST< thrust::device_vector<short> >(); \
VTEST< thrust::device_vector<int> >(); \
} \
void VTEST##Universal(void) { \
VTEST< thrust::universal_vector<int> >(); \
VTEST< thrust::device_vector<int, \
thrust::mr::stateless_resource_allocator<int, \
thrust::universal_host_pinned_memory_resource> > >();\
} \
DECLARE_UNITTEST(VTEST##Host); \
DECLARE_UNITTEST(VTEST##Device);
DECLARE_UNITTEST(VTEST##Device); \
DECLARE_UNITTEST(VTEST##Universal);

// Macro to create instances of a test for several data types.
#define DECLARE_GENERIC_UNITTEST(TEST) \
Expand Down
Loading

0 comments on commit 20966ed

Please sign in to comment.