forked from cms-sw/cmssw
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move BeamSpot transfer to GPU to its own producer (cms-sw#318)
Implement a non-caching host allocator, useful for host-to-device copy buffers: - not bound to any CUDA stream to allow use in EDM beginStream(); - with the possibility to pass flags to cudaHostAlloc(), e.g. cudaHostAllocWriteCombined. Add perfect forwarding overload for CUDAProduct constructor, enabling the use of CUDAScopedContext::emplace() in BeamSpotToCUDA::produce(). Move the BeamSpot host-to-device transfer to its own EDProducer, making use of beginStream()-allocated write-combined memory for the transfer.
- Loading branch information
Showing
23 changed files
with
300 additions
and
35 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
<use name="FWCore/ServiceRegistry"/> | ||
<use name="HeterogeneousCore/CUDAServices"/> | ||
<use name="cuda-api-wrappers"/> | ||
<use name="rootcore"/> | ||
|
||
<export> | ||
<lib name="1"/> | ||
</export> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#ifndef CUDADataFormats_BeamSpot_interface_BeamSpotCUDA_h | ||
#define CUDADataFormats_BeamSpot_interface_BeamSpotCUDA_h | ||
|
||
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" | ||
|
||
#include <cuda/api_wrappers.h> | ||
|
||
class BeamSpotCUDA { | ||
public: | ||
// alignas(128) doesn't really make sense as there is only one | ||
// beamspot per event? | ||
struct Data { | ||
float x,y,z; // position | ||
// TODO: add covariance matrix | ||
|
||
float sigmaZ; | ||
float beamWidthX, beamWidthY; | ||
float dxdz, dydz; | ||
float emittanceX, emittanceY; | ||
float betaStar; | ||
}; | ||
|
||
BeamSpotCUDA() = default; | ||
BeamSpotCUDA(Data const* data_h, cuda::stream_t<>& stream); | ||
|
||
Data const* data() const { return data_d_.get(); } | ||
|
||
private: | ||
cudautils::device::unique_ptr<Data> data_d_; | ||
}; | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" | ||
|
||
#include "FWCore/ServiceRegistry/interface/Service.h" | ||
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" | ||
|
||
BeamSpotCUDA::BeamSpotCUDA(Data const* data_h, cuda::stream_t<>& stream) { | ||
edm::Service<CUDAService> cs; | ||
|
||
data_d_ = cs->make_device_unique<Data>(stream); | ||
cuda::memory::async::copy(data_d_.get(), data_h, sizeof(Data), stream.id()); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#ifndef CUDADataFormats_BeamSpot_classes_h | ||
#define CUDADataFormats_BeamSpot_classes_h | ||
|
||
#include "CUDADataFormats/Common/interface/CUDAProduct.h" | ||
#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" | ||
#include "DataFormats/Common/interface/Wrapper.h" | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
<lcgdict> | ||
<class name="CUDAProduct<BeamSpotCUDA>" persistent="false"/> | ||
<class name="edm::Wrapper<CUDAProduct<BeamSpotCUDA>>" persistent="false"/> | ||
</lcgdict> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
<use name="cub"/> | ||
<use name="cuda"/> | ||
<use name="cuda-api-wrappers"/> | ||
|
||
<export> | ||
<lib name="1"/> | ||
|
65 changes: 65 additions & 0 deletions
65
HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#ifndef HeterogeneousCore_CUDAUtilities_interface_host_noncached_unique_ptr_h | ||
#define HeterogeneousCore_CUDAUtilities_interface_host_noncached_unique_ptr_h | ||
|
||
#include <memory> | ||
|
||
#include <cuda/api_wrappers.h> | ||
#include <cuda_runtime.h> | ||
|
||
namespace cudautils { | ||
namespace host { | ||
namespace noncached { | ||
namespace impl { | ||
// Additional layer of types to distinguish from host::unique_ptr | ||
class HostDeleter { | ||
public: | ||
void operator()(void *ptr) { | ||
cuda::throw_if_error(cudaFreeHost(ptr)); | ||
} | ||
}; | ||
} | ||
|
||
template <typename T> | ||
using unique_ptr = std::unique_ptr<T, impl::HostDeleter>; | ||
|
||
namespace impl { | ||
template <typename T> | ||
struct make_host_unique_selector { using non_array = cudautils::host::noncached::unique_ptr<T>; }; | ||
template <typename T> | ||
struct make_host_unique_selector<T[]> { using unbounded_array = cudautils::host::noncached::unique_ptr<T[]>; }; | ||
template <typename T, size_t N> | ||
struct make_host_unique_selector<T[N]> { struct bounded_array {}; }; | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* The difference wrt. CUDAService::make_host_unique is that these | ||
* do not cache, so they should not be called per-event. | ||
*/ | ||
template <typename T> | ||
typename host::noncached::impl::make_host_unique_selector<T>::non_array | ||
make_host_noncached_unique(unsigned int flags = cudaHostAllocDefault) { | ||
static_assert(std::is_trivially_constructible<T>::value, "Allocating with non-trivial constructor on the pinned host memory is not supported"); | ||
void *mem; | ||
cuda::throw_if_error(cudaHostAlloc(&mem, sizeof(T), flags)); | ||
return typename cudautils::host::noncached::impl::make_host_unique_selector<T>::non_array(reinterpret_cast<T *>(mem)); | ||
} | ||
|
||
template <typename T> | ||
typename host::noncached::impl::make_host_unique_selector<T>::unbounded_array | ||
make_host_noncached_unique(size_t n, unsigned int flags = cudaHostAllocDefault) { | ||
using element_type = typename std::remove_extent<T>::type; | ||
static_assert(std::is_trivially_constructible<element_type>::value, "Allocating with non-trivial constructor on the pinned host memory is not supported"); | ||
void *mem; | ||
cuda::throw_if_error(cudaHostAlloc(&mem, n*sizeof(element_type), flags)); | ||
return typename cudautils::host::noncached::impl::make_host_unique_selector<T>::unbounded_array(reinterpret_cast<element_type *>(mem)); | ||
} | ||
|
||
template <typename T, typename ...Args> | ||
typename cudautils::host::noncached::impl::make_host_unique_selector<T>::bounded_array | ||
make_host_noncached_unique(Args&&...) = delete; | ||
} | ||
|
||
#endif | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
22 changes: 22 additions & 0 deletions
22
HeterogeneousCore/CUDAUtilities/test/host_noncached_unique_ptr_t.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#include "catch.hpp" | ||
|
||
#include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h" | ||
#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h" | ||
|
||
TEST_CASE("host_noncached_unique_ptr", "[cudaMemTools]") { | ||
exitSansCUDADevices(); | ||
|
||
SECTION("Single element") { | ||
auto ptr1 = cudautils::make_host_noncached_unique<int>(); | ||
REQUIRE(ptr1 != nullptr); | ||
auto ptr2 = cudautils::make_host_noncached_unique<int>(cudaHostAllocPortable | cudaHostAllocWriteCombined); | ||
REQUIRE(ptr2 != nullptr); | ||
} | ||
|
||
SECTION("Multiple elements") { | ||
auto ptr1 = cudautils::make_host_noncached_unique<int[]>(10); | ||
REQUIRE(ptr1 != nullptr); | ||
auto ptr2 = cudautils::make_host_noncached_unique<int[]>(10, cudaHostAllocPortable | cudaHostAllocWriteCombined); | ||
REQUIRE(ptr2 != nullptr); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.