-
Notifications
You must be signed in to change notification settings - Fork 4.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #43969 from makortel/alpakaDeviceCache
Add {Copy,Move}ToDeviceCache<T> class templates and moveToDeviceAsync function template
- Loading branch information
Showing
17 changed files
with
970 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
104 changes: 104 additions & 0 deletions
104
HeterogeneousCore/AlpakaCore/interface/CopyToDeviceCache.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
#ifndef HeterogeneousCore_AlpakaInterface_interface_CopyToDeviceCache_h | ||
#define HeterogeneousCore_AlpakaInterface_interface_CopyToDeviceCache_h | ||
|
||
#include <alpaka/alpaka.hpp> | ||
|
||
#include "HeterogeneousCore/AlpakaCore/interface/QueueCache.h" | ||
#include "HeterogeneousCore/AlpakaInterface/interface/CopyToDevice.h" | ||
#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" | ||
|
||
namespace cms::alpakatools { | ||
namespace detail { | ||
// By default copy the host object with CopyToDevice<T> | ||
// | ||
// Doing with template specialization (rather than | ||
// std::conditional_t and if constexpr) because the | ||
// CopyToDevice<THostObject>::copyAsync() is ill-defined e.g. for | ||
// PortableCollection on host device | ||
template <typename TDevice, typename THostObject> | ||
class CopyToDeviceCacheImpl { | ||
public: | ||
using Device = TDevice; | ||
using Queue = alpaka::Queue<Device, alpaka::NonBlocking>; | ||
using HostObject = THostObject; | ||
using Copy = CopyToDevice<HostObject>; | ||
using DeviceObject = decltype(Copy::copyAsync(std::declval<Queue&>(), std::declval<HostObject const&>())); | ||
|
||
CopyToDeviceCacheImpl(HostObject const& srcObject) { | ||
using Platform = alpaka::Platform<Device>; | ||
auto const& devices = cms::alpakatools::devices<Platform>(); | ||
std::vector<std::shared_ptr<Queue>> queues; | ||
queues.reserve(devices.size()); | ||
data_.reserve(devices.size()); | ||
for (auto const& dev : devices) { | ||
auto queue = getQueueCache<Queue>().get(dev); | ||
data_.emplace_back(Copy::copyAsync(*queue, srcObject)); | ||
queues.emplace_back(std::move(queue)); | ||
} | ||
for (auto& queuePtr : queues) { | ||
alpaka::wait(*queuePtr); | ||
} | ||
} | ||
|
||
DeviceObject const& get(size_t i) const { return data_[i]; } | ||
|
||
private: | ||
std::vector<DeviceObject> data_; | ||
}; | ||
|
||
// For host device, copy the host object directly instead | ||
template <typename THostObject> | ||
class CopyToDeviceCacheImpl<alpaka_common::DevHost, THostObject> { | ||
public: | ||
using HostObject = THostObject; | ||
using DeviceObject = HostObject; | ||
|
||
CopyToDeviceCacheImpl(HostObject const& srcObject) : data_(srcObject) {} | ||
|
||
DeviceObject const& get(size_t i) const { return data_; } | ||
|
||
private: | ||
HostObject data_; | ||
}; | ||
} // namespace detail | ||
|
||
/** | ||
* This class template implements a cache for data that is moved | ||
* from the host (of type THostObject) to all the devices | ||
* corresponding to the TDevice device type. | ||
* | ||
* The host-side object to be copied is given as an argument to the | ||
* class constructor. The constructor uses the | ||
* CopyToDevice<THostObject> class template to perfom the copy, and | ||
* waits for the data copies to finish, i.e. the constructor is | ||
* synchronous wrt. the data copies. | ||
* | ||
* The device-side object corresponding to the THostObject (actual | ||
* type is the return type of CopyToDevice<THostObject>::copyAsync()) | ||
* can be obtained with get() member function, that has either the | ||
* queue or device argument. | ||
*/ | ||
template <typename TDevice, typename THostObject> | ||
requires alpaka::isDevice<TDevice> | ||
class CopyToDeviceCache { | ||
using Device = TDevice; | ||
using HostObject = THostObject; | ||
using Impl = detail::CopyToDeviceCacheImpl<Device, HostObject>; | ||
using DeviceObject = typename Impl::DeviceObject; | ||
|
||
public: | ||
CopyToDeviceCache(THostObject const& srcData) : data_(srcData) {} | ||
|
||
DeviceObject const& get(Device const& dev) const { return data_.get(alpaka::getNativeHandle(dev)); } | ||
|
||
template <typename TQueue> | ||
DeviceObject const& get(TQueue const& queue) const { | ||
return get(alpaka::getDev(queue)); | ||
} | ||
|
||
private: | ||
Impl data_; | ||
}; | ||
} // namespace cms::alpakatools | ||
|
||
#endif |
101 changes: 101 additions & 0 deletions
101
HeterogeneousCore/AlpakaCore/interface/MoveToDeviceCache.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
#ifndef HeterogeneousCore_AlpakaInterface_interface_MoveToDeviceCache_h | ||
#define HeterogeneousCore_AlpakaInterface_interface_MoveToDeviceCache_h | ||
|
||
#include <type_traits> | ||
|
||
#include <alpaka/alpaka.hpp> | ||
|
||
#include "HeterogeneousCore/AlpakaCore/interface/QueueCache.h" | ||
#include "HeterogeneousCore/AlpakaCore/interface/CopyToDeviceCache.h" | ||
#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" | ||
|
||
namespace cms::alpakatools { | ||
namespace detail { | ||
// By default copy the host object with CopyToDevice<T> | ||
// | ||
// Doing with template specialization (rather than | ||
// std::conditional_t and if constexpr) because the | ||
// CopyToDevice<THostObject>::copyAsync() is ill-defined e.g. for | ||
// PortableCollection on host device | ||
template <typename TDevice, typename THostObject> | ||
class MoveToDeviceCacheImpl { | ||
public: | ||
using HostObject = THostObject; | ||
using Impl = CopyToDeviceCacheImpl<TDevice, THostObject>; | ||
using DeviceObject = typename Impl::DeviceObject; | ||
|
||
MoveToDeviceCacheImpl(HostObject&& srcObject) : impl_(srcObject) {} | ||
|
||
DeviceObject const& get(size_t i) const { return impl_.get(i); } | ||
|
||
private: | ||
Impl impl_; | ||
}; | ||
|
||
// For host device, move the host object instead | ||
template <typename THostObject> | ||
class MoveToDeviceCacheImpl<alpaka_common::DevHost, THostObject> { | ||
public: | ||
using HostObject = THostObject; | ||
using DeviceObject = HostObject; | ||
|
||
MoveToDeviceCacheImpl(HostObject&& srcObject) : data_(std::move(srcObject)) {} | ||
|
||
DeviceObject const& get(size_t i) const { return data_; } | ||
|
||
private: | ||
HostObject data_; | ||
}; | ||
} // namespace detail | ||
|
||
/** | ||
* This class template implements a cache for data that is moved | ||
* from the host (of type THostObject) to all the devices | ||
* corresponding to the TDevice device type. | ||
* | ||
* The host-side object to be moved is given as an argument to the | ||
* class constructor. The constructor uses the | ||
* CopyToDevice<THostObject> class template to copy the data to the | ||
* devices, and waits for the data copies to finish, i.e. the | ||
* constructor is synchronous wrt. the data copies. The "move" is | ||
* achieved by requiring the constructor argument to be an rvalue | ||
* reference. | ||
* | ||
* Note that the host object type is required to be non-copyable. | ||
* This is to avoid easy mistakes with objects that follow copy | ||
* semantics of std::shared_ptr (that includes Alpaka buffers), that | ||
* would allow the source memory buffer to be used via another copy | ||
* during the asynchronous data copy to the device. | ||
* | ||
* The device-side object corresponding to the THostObject (actual | ||
* type is the return type of CopyToDevice<THostObject>::copyAsync()) | ||
* can be obtained with get() member function, that has either the | ||
* queue or device argument. | ||
*/ | ||
template <typename TDevice, typename THostObject> | ||
requires alpaka::isDevice<TDevice> | ||
class MoveToDeviceCache { | ||
public: | ||
using Device = TDevice; | ||
using HostObject = THostObject; | ||
using Impl = detail::MoveToDeviceCacheImpl<Device, HostObject>; | ||
using DeviceObject = typename Impl::DeviceObject; | ||
|
||
static_assert(not(std::is_copy_constructible_v<HostObject> or std::is_copy_assignable_v<HostObject>), | ||
"The data object to be moved to device must not be copyable."); | ||
|
||
MoveToDeviceCache(HostObject&& srcData) : data_(std::move(srcData)) {} | ||
|
||
DeviceObject const& get(Device const& dev) const { return data_.get(alpaka::getNativeHandle(dev)); } | ||
|
||
template <typename TQueue> | ||
DeviceObject const& get(TQueue const& queue) const { | ||
return get(alpaka::getDev(queue)); | ||
} | ||
|
||
private: | ||
Impl data_; | ||
}; | ||
} // namespace cms::alpakatools | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.