Skip to content

Commit

Permalink
[SYCL][USM] Improve USM Allocator. (#2026)
Browse files Browse the repository at this point in the history
Add ability to use std::allocate_shared.
Add equality operators for allocators.
Add tests.

Disallow device allocations in usm_allocator as there are too many incompatibilities with how C++ allocators are used.

Signed-off-by: James Brodman <[email protected]>
  • Loading branch information
jbrodman authored Jul 29, 2020
1 parent a43dcc2 commit ce915ef
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 278 deletions.
139 changes: 40 additions & 99 deletions sycl/include/CL/sycl/usm/usm_allocator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,112 +26,45 @@ __SYCL_EXPORT void *aligned_alloc(size_t alignment, size_t size,
usm::alloc kind);
__SYCL_EXPORT void free(void *ptr, const context &ctxt);

template <typename T, usm::alloc AllocKind, size_t Alignment = 0>
template <typename T, usm::alloc AllocKind, size_t Alignment = alignof(T)>
class usm_allocator {
public:
using value_type = T;
using pointer = T *;
using const_pointer = const T *;
using reference = T &;
using const_reference = const T &;
using propagate_on_container_copy_assignment = std::true_type;
using propagate_on_container_move_assignment = std::true_type;
using propagate_on_container_swap = std::true_type;

public:
template <typename U> struct rebind {
typedef usm_allocator<U, AllocKind, Alignment> other;
};

usm_allocator() = delete;
usm_allocator(const context &Ctxt, const device &Dev)
static_assert(
AllocKind != usm::alloc::device,
"usm_allocator does not support AllocKind == usm::alloc::device");

usm_allocator() noexcept = delete;
usm_allocator(const context &Ctxt, const device &Dev) noexcept
: MContext(Ctxt), MDevice(Dev) {}
usm_allocator(const queue &Q)
usm_allocator(const queue &Q) noexcept
: MContext(Q.get_context()), MDevice(Q.get_device()) {}
usm_allocator(const usm_allocator &Other)
: MContext(Other.MContext), MDevice(Other.MDevice) {}

/// Constructs an object on memory pointed by Ptr.
///
/// Note: AllocKind == alloc::device is not allowed.
///
/// \param Ptr is a pointer to memory that will be used to construct the
/// object.
/// \param Val is a value to initialize the newly constructed object.
template <
usm::alloc AllocT = AllocKind,
typename std::enable_if<AllocT != usm::alloc::device, int>::type = 0>
void construct(pointer Ptr, const_reference Val) {
new (Ptr) value_type(Val);
}

template <
usm::alloc AllocT = AllocKind,
typename std::enable_if<AllocT == usm::alloc::device, int>::type = 0>
void construct(pointer, const_reference) {
throw feature_not_supported(
"Device pointers do not support construct on host",
PI_INVALID_OPERATION);
}
usm_allocator(const usm_allocator &) noexcept = default;
usm_allocator(usm_allocator &&) noexcept = default;
usm_allocator &operator=(const usm_allocator &) = delete;
usm_allocator &operator=(usm_allocator &&) = default;

/// Destroys an object.
///
/// Note:: AllocKind == alloc::device is not allowed
///
/// \param Ptr is a pointer to memory where the object resides.
template <
usm::alloc AllocT = AllocKind,
typename std::enable_if<AllocT != usm::alloc::device, int>::type = 0>
void destroy(pointer Ptr) {
Ptr->~value_type();
}

template <
usm::alloc AllocT = AllocKind,
typename std::enable_if<AllocT == usm::alloc::device, int>::type = 0>
void destroy(pointer) {
// This method must be a NOP for device pointers.
}

/// Note:: AllocKind == alloc::device is not allowed.
///
/// \param Val is a reference to object.
/// \return an address of the object referenced by Val.
template <
usm::alloc AllocT = AllocKind,
typename std::enable_if<AllocT != usm::alloc::device, int>::type = 0>
pointer address(reference Val) const {
return &Val;
}

template <
usm::alloc AllocT = AllocKind,
typename std::enable_if<AllocT == usm::alloc::device, int>::type = 0>
pointer address(reference) const {
throw feature_not_supported(
"Device pointers do not support address on host", PI_INVALID_OPERATION);
}

template <
usm::alloc AllocT = AllocKind,
typename std::enable_if<AllocT != usm::alloc::device, int>::type = 0>
const_pointer address(const_reference Val) const {
return &Val;
}

template <
usm::alloc AllocT = AllocKind,
typename std::enable_if<AllocT == usm::alloc::device, int>::type = 0>
const_pointer address(const_reference) const {
throw feature_not_supported(
"Device pointers do not support address on host", PI_INVALID_OPERATION);
}
template <class U>
usm_allocator(const usm_allocator<U, AllocKind, Alignment> &Other) noexcept
: MContext(Other.MContext), MDevice(Other.MDevice) {}

/// Allocates memory.
///
/// \param NumberOfElements is a count of elements to allocate memory for.
pointer allocate(size_t NumberOfElements) {
T *allocate(size_t NumberOfElements) {

auto Result = reinterpret_cast<pointer>(
auto Result = reinterpret_cast<T *>(
aligned_alloc(getAlignment(), NumberOfElements * sizeof(value_type),
MDevice, MContext, AllocKind));
MDevice, MContext, AllocKind));
if (!Result) {
throw memory_allocation_error();
}
Expand All @@ -142,24 +75,32 @@ class usm_allocator {
///
/// \param Ptr is a pointer to memory being deallocated.
/// \param Size is a number of elements previously passed to allocate.
void deallocate(pointer Ptr, size_t) {
void deallocate(T *Ptr, size_t) {
if (Ptr) {
free(Ptr, MContext);
}
}

private:
constexpr size_t getAlignment() const {
/*
// This form might be preferable if the underlying implementation
// doesn't do the right thing when given 0 for alignment
return ((Alignment == 0)
? alignof(value_type)
: Alignment);
*/
return Alignment;
template <class U, usm::alloc AllocKindU, size_t AlignmentU>
friend bool operator==(const usm_allocator<T, AllocKind, Alignment> &One,
const usm_allocator<U, AllocKindU, AlignmentU> &Two) {
return ((AllocKind == AllocKindU) && (One.MContext == Two.MContext) &&
(One.MDevice == Two.MDevice));
}

template <class U, usm::alloc AllocKindU, size_t AlignmentU>
friend bool operator!=(const usm_allocator<T, AllocKind, Alignment> &One,
const usm_allocator<U, AllocKindU, AlignmentU> &Two) {
return !((AllocKind == AllocKindU) && (One.MContext == Two.MContext) &&
(One.MDevice == Two.MDevice));
}

private:
constexpr size_t getAlignment() const { return Alignment; }

template <class U, usm::alloc AllocKindU, size_t AlignmentU>
friend class usm_allocator;

const context MContext;
const device MDevice;
};
Expand Down
55 changes: 55 additions & 0 deletions sycl/test/usm/allocator_equal.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// piextUSM*Alloc functions for CUDA are not behaving as described in
// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
// RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
// RUN: %GPU_RUN_PLACEHOLDER %t1.out

//==---------- allocator_equal.cpp - Allocator Equality test ---------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <CL/sycl.hpp>

#include <cassert>

using namespace cl::sycl;

int main() {
queue q;
auto dev = q.get_device();
auto ctxt = q.get_context();

queue q2;
auto dev2 = q2.get_device();
auto ctxt2 = q2.get_context();

// Test allocator equality
if (dev.get_info<info::device::usm_host_allocations>()) {
usm_allocator<int, usm::alloc::host> alloc1(ctxt, dev);
usm_allocator<int, usm::alloc::host> alloc2(q);

assert((alloc1 == alloc2) && "Allocators should be equal.");

usm_allocator<int, usm::alloc::host, 8> alloc3(ctxt, dev);
usm_allocator<int, usm::alloc::host, 16> alloc4(q);

assert((alloc1 == alloc2) && "Allocators should be equal.");
}

if (dev.get_info<info::device::usm_shared_allocations>() &&
dev.get_info<info::device::usm_host_allocations>()) {
usm_allocator<int, usm::alloc::shared> alloc1(ctxt, dev);
usm_allocator<int, usm::alloc::host> alloc2(ctxt, dev);

assert((alloc1 != alloc2) && "Allocators should NOT be equal.");
}

return 0;
}
49 changes: 49 additions & 0 deletions sycl/test/usm/allocator_shared.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
// RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
// RUN: %GPU_RUN_PLACEHOLDER %t1.out

//==-------- allocator_shared.cpp - Allocate Shared test -------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <CL/sycl.hpp>

#include <cassert>
#include <memory>

using namespace cl::sycl;

int main() {
queue q;
auto dev = q.get_device();
auto ctxt = q.get_context();

// Test ability to create a shared pointer.
if (dev.get_info<info::device::usm_host_allocations>()) {
usm_allocator<int, usm::alloc::host> alloc(ctxt, dev);
auto ptr1 = std::allocate_shared<int>(alloc);

// Test construction
auto ptr2 = std::allocate_shared<int>(alloc, 42);
assert((*ptr2 == 42) && "Host construct passed.");
}

if (dev.get_info<info::device::usm_shared_allocations>()) {
usm_allocator<int, usm::alloc::shared> alloc(ctxt, dev);
auto ptr1 = std::allocate_shared<int>(alloc);

// Test construction
auto ptr2 = std::allocate_shared<int>(alloc, 42);
assert((*ptr2 == 42) && "Shared construct passed.");
}

// Device allocations are not supported due to how allocated_shared is
// written.

return 0;
}
43 changes: 0 additions & 43 deletions sycl/test/usm/allocator_vector.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
// XFAIL: cuda || level0
// piextUSM*Alloc functions for CUDA are not behaving as described in
// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
// RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
Expand Down Expand Up @@ -88,43 +83,5 @@ int main() {
return -1;
}

if (dev.get_info<info::device::usm_device_allocations>()) {
usm_allocator<int, usm::alloc::device> alloc(ctxt, dev);

std::vector<int, decltype(alloc)> vec(alloc);
vec.resize(N);

int *res = &vec[0];
int *vals = &vec[0];

auto e0 = q.submit([=](handler &h) {
h.single_task<class baz_init>([=]() {
res[0] = 0;
for (int i = 0; i < N; i++) {
vals[i] = i;
}
});
});

auto e1 = q.submit([=](handler &h) {
h.depends_on(e0);
h.single_task<class baz>([=]() {
for (int i = 1; i < N; i++) {
res[0] += vals[i];
}
});
});

e1.wait();

int answer = (N * (N - 1)) / 2;
int result;
q.memcpy(&result, res, sizeof(int));
q.wait();

if (result != answer)
return -1;
}

return 0;
}
48 changes: 0 additions & 48 deletions sycl/test/usm/allocator_vector_fail.cpp

This file was deleted.

Loading

0 comments on commit ce915ef

Please sign in to comment.