Skip to content

Commit

Permalink
stream wip
Browse files Browse the repository at this point in the history
  • Loading branch information
wdeconinck committed Nov 6, 2024
1 parent 7965b08 commit 1ea3c6f
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 10 deletions.
2 changes: 1 addition & 1 deletion pluto/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ project( pluto VERSION ${atlas_VERSION} LANGUAGES CXX )
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

set(PLUTO_HAVE_PMR 0)
set(PLUTO_HAVE_PMR 1)
### Normally we should be auto-detecting std::pmr (c++17) for polymorphic memory resource
# However, CUDA or HIP compiler seems to not find the right headers,
#  so for now we don't rely on it
Expand Down
8 changes: 8 additions & 0 deletions pluto/examples/use_streams.cc
Original file line number Diff line number Diff line change
Expand Up @@ -108,21 +108,29 @@ int main(int argc, char* argv[]) {
std::cerr << "device alloc" << std::endl;
device_array<value_type> array_d1(size);

std::cerr << "async loop start" << std::endl;
auto start = std::chrono::steady_clock::now();
for(std::size_t jstream=0; jstream<streams.size(); ++jstream) {
pluto::scope stream_scope;
pluto::set_default_stream(streams[jstream]);

const auto& stream = streams[jstream];
const auto& stream_offset = jstream * size/streams.size();
const auto& stream_size = (jstream < streams.size()-1 ? size/streams.size() : size - stream_offset);
auto* h1 = array_h1.data()+stream_offset;
auto* h2 = array_h2.data()+stream_offset;
auto* d1 = array_d1.data()+stream_offset;

device_array<value_type> stream_tmp(stream_size);
auto* dtmp = stream_tmp.data();

h1[stream_size-1] = 1.;
h2[stream_size-1] = -1.;
pluto::copy_host_to_device(d1, h1, stream_size, stream);
plus_one_on_device(d1, stream_size, stream);
pluto::copy_device_to_host(h2, d1, stream_size, stream);
}
std::cerr << "async loop end" << std::endl;
pluto::wait();
auto end = std::chrono::steady_clock::now();
std::cout << "execution without allocations took " << std::chrono::duration<double>(end-start).count() << " s" << std::endl;
Expand Down
26 changes: 19 additions & 7 deletions pluto/src/pluto/device/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "pluto/memory_resource/memory_resource.h"
#include "pluto/offload/wait.h"
#include "pluto/offload/Stream.h"

namespace pluto::device {

Expand All @@ -36,28 +37,38 @@ template<typename T>
class allocator {
public:
using value_type = T;

allocator(memory_resource* mr, const Stream& stream) :
memory_resource_(mr),
stream_(stream) {}

allocator() :
memory_resource_(get_default_resource()) {}
allocator(get_default_resource(), get_default_stream()) {}

allocator(const allocator& other) :
memory_resource_(other.memory_resource_) {}
allocator(other.memory_resource_, other.stream_) {}

allocator(memory_resource* mr) :
memory_resource_(mr) {}
allocator(mr, get_default_stream()) {}

allocator(const Stream& stream) :
allocator(get_default_resource(), stream) {}

value_type* allocate(std::size_t size) {
DefaultStream scope{stream_};
return static_cast<value_type*>(memory_resource_->allocate(size * sizeof(value_type), 256));
}

void deallocate(value_type* ptr, std::size_t size) {
DefaultStream scope{stream_};
memory_resource_->deallocate(ptr, size * sizeof(value_type), 256);
}

template <class U, class... Args>
void construct(U* p, Args&&... args) {
#if HIC_COMPILER
new_on_device<<<1, 1>>>(p, std::forward<Args>(args)...);
pluto::wait();
new_on_device<<<1, 1, 0, stream_.value<hicStream_t>()>>>(p, std::forward<Args>(args)...);
pluto::wait(stream_);
#else
new_on_device(p, args...);
#endif
Expand All @@ -66,14 +77,15 @@ class allocator {
template <class U>
void destroy(U* p) {
#if HIC_COMPILER
delete_on_device<<<1, 1>>>(p);
pluto::wait();
delete_on_device<<<1, 1, 0, stream_.value<hicStream_t>()>>>(p);
pluto::wait(stream_);
#else
delete_on_device(p);
#endif
}
private:
memory_resource* memory_resource_{nullptr};
const Stream& stream_;
};

// --------------------------------------------------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions pluto/src/pluto/memory_resource/DeviceMemoryResource.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ memory_pool_resource* device_pool_resource() {
void* DeviceMemoryResource::do_allocate(std::size_t bytes, alignment_t) {
void* ptr;
const auto& stream = get_default_stream();
if (false) {//stream.value()) {
if (stream.value()) {
if constexpr (PLUTO_HAVE_HIC) {
HIC_CALL( hicMallocAsync(&ptr, bytes, stream.value<hicStream_t>() ) );
}
Expand All @@ -64,7 +64,7 @@ void* DeviceMemoryResource::do_allocate(std::size_t bytes, alignment_t) {

void DeviceMemoryResource::do_deallocate(void* ptr, std::size_t bytes, alignment_t) {
const auto& stream = get_default_stream();
if (false) {//if (stream.value()) {
if (stream.value()) {
if constexpr (PLUTO_HAVE_HIC) {
HIC_CALL( hicFreeAsync(ptr, stream.value<hicStream_t>()) );
}
Expand Down

0 comments on commit 1ea3c6f

Please sign in to comment.