forked from cms-sw/cmssw
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add infrastructure around cub CachingDeviceAllocator, and use it in S…
…iPixelRawToCluster (cms-sw#172) Add infrastructure around cub CachingDeviceAllocator for device memory allocations, and CachingHostAllocator for pinned (or managed) host memory. CUDAService uses the CachingHostAllocator to allocate requested GPU->CPU/CPU->GPU buffers and data products. Configuration options can be used to request: - to print all memory (re)allocations and frees; - to preallocate device and host buffers. SiPixelRawToCluster uses the CachingDeviceAllocator for temporary buffers and data products. Fix a memory problem with SiPixelFedCablingMapGPUWrapper::ModulesToUnpack.
- Loading branch information
Showing
24 changed files
with
1,468 additions
and
310 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#ifndef CUDADataFormats_Common_interface_device_unique_ptr_h | ||
#define CUDADataFormats_Common_interface_device_unique_ptr_h | ||
|
||
#include <memory> | ||
#include <functional> | ||
|
||
namespace edm { | ||
namespace cuda { | ||
namespace device { | ||
template <typename T> | ||
using unique_ptr = std::unique_ptr<T, std::function<void(void *)>>; | ||
} | ||
} | ||
} | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#ifndef CUDADataFormats_Common_interface_host_unique_ptr_h | ||
#define CUDADataFormats_Common_interface_host_unique_ptr_h | ||
|
||
#include <memory> | ||
#include <functional> | ||
|
||
namespace edm { | ||
namespace cuda { | ||
namespace host { | ||
template <typename T> | ||
using unique_ptr = std::unique_ptr<T, std::function<void(void *)>>; | ||
} | ||
} | ||
} | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
<use name="FWCore/ServiceRegistry"/> | ||
<use name="HeterogeneousCore/CUDAServices"/> | ||
<use name="cuda-api-wrappers"/> | ||
|
||
<export> | ||
<lib name="1"/> | ||
</export> | ||
|
73 changes: 73 additions & 0 deletions
73
CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#ifndef CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h | ||
#define CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h | ||
|
||
#include "CUDADataFormats/Common/interface/device_unique_ptr.h" | ||
|
||
#include <cuda/api_wrappers.h> | ||
|
||
class SiPixelClustersCUDA { | ||
public: | ||
SiPixelClustersCUDA() = default; | ||
explicit SiPixelClustersCUDA(size_t feds, size_t nelements, cuda::stream_t<>& stream); | ||
~SiPixelClustersCUDA() = default; | ||
|
||
SiPixelClustersCUDA(const SiPixelClustersCUDA&) = delete; | ||
SiPixelClustersCUDA& operator=(const SiPixelClustersCUDA&) = delete; | ||
SiPixelClustersCUDA(SiPixelClustersCUDA&&) = default; | ||
SiPixelClustersCUDA& operator=(SiPixelClustersCUDA&&) = default; | ||
|
||
uint32_t *moduleStart() { return moduleStart_d.get(); } | ||
int32_t *clus() { return clus_d.get(); } | ||
uint32_t *clusInModule() { return clusInModule_d.get(); } | ||
uint32_t *moduleId() { return moduleId_d.get(); } | ||
uint32_t *clusModuleStart() { return clusModuleStart_d.get(); } | ||
|
||
uint32_t const *moduleStart() const { return moduleStart_d.get(); } | ||
int32_t const *clus() const { return clus_d.get(); } | ||
uint32_t const *clusInModule() const { return clusInModule_d.get(); } | ||
uint32_t const *moduleId() const { return moduleId_d.get(); } | ||
uint32_t const *clusModuleStart() const { return clusModuleStart_d.get(); } | ||
|
||
uint32_t const *c_moduleStart() const { return moduleStart_d.get(); } | ||
int32_t const *c_clus() const { return clus_d.get(); } | ||
uint32_t const *c_clusInModule() const { return clusInModule_d.get(); } | ||
uint32_t const *c_moduleId() const { return moduleId_d.get(); } | ||
uint32_t const *c_clusModuleStart() const { return clusModuleStart_d.get(); } | ||
|
||
class DeviceConstView { | ||
public: | ||
DeviceConstView() = default; | ||
|
||
#ifdef __CUDACC__ | ||
__device__ __forceinline__ uint32_t moduleStart(int i) const { return __ldg(moduleStart_+i); } | ||
__device__ __forceinline__ int32_t clus(int i) const { return __ldg(clus_+i); } | ||
__device__ __forceinline__ uint32_t clusInModule(int i) const { return __ldg(clusInModule_+i); } | ||
__device__ __forceinline__ uint32_t moduleId(int i) const { return __ldg(moduleId_+i); } | ||
__device__ __forceinline__ uint32_t clusModuleStart(int i) const { return __ldg(clusModuleStart_+i); } | ||
#endif | ||
|
||
friend SiPixelClustersCUDA; | ||
|
||
private: | ||
uint32_t const *moduleStart_ = nullptr; | ||
int32_t const *clus_ = nullptr; | ||
uint32_t const *clusInModule_ = nullptr; | ||
uint32_t const *moduleId_ = nullptr; | ||
uint32_t const *clusModuleStart_ = nullptr; | ||
}; | ||
|
||
DeviceConstView *view() const { return view_d.get(); } | ||
|
||
private: | ||
edm::cuda::device::unique_ptr<uint32_t[]> moduleStart_d; // index of the first pixel of each module | ||
edm::cuda::device::unique_ptr<int32_t[]> clus_d; // cluster id of each pixel | ||
edm::cuda::device::unique_ptr<uint32_t[]> clusInModule_d; // number of clusters found in each module | ||
edm::cuda::device::unique_ptr<uint32_t[]> moduleId_d; // module id of each module | ||
|
||
// originally from rechits | ||
edm::cuda::device::unique_ptr<uint32_t[]> clusModuleStart_d; | ||
|
||
edm::cuda::device::unique_ptr<DeviceConstView> view_d; // "me" pointer | ||
}; | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" | ||
|
||
#include "FWCore/ServiceRegistry/interface/Service.h" | ||
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" | ||
|
||
SiPixelClustersCUDA::SiPixelClustersCUDA(size_t feds, size_t nelements, cuda::stream_t<>& stream) { | ||
edm::Service<CUDAService> cs; | ||
|
||
moduleStart_d = cs->make_device_unique<uint32_t[]>(nelements+1, stream); | ||
clus_d = cs->make_device_unique< int32_t[]>(feds, stream); | ||
clusInModule_d = cs->make_device_unique<uint32_t[]>(nelements, stream); | ||
moduleId_d = cs->make_device_unique<uint32_t[]>(nelements, stream); | ||
clusModuleStart_d = cs->make_device_unique<uint32_t[]>(nelements+1, stream); | ||
|
||
auto view = cs->make_host_unique<DeviceConstView>(stream); | ||
view->moduleStart_ = moduleStart_d.get(); | ||
view->clus_ = clus_d.get(); | ||
view->clusInModule_ = clusInModule_d.get(); | ||
view->moduleId_ = moduleId_d.get(); | ||
view->clusModuleStart_ = clusModuleStart_d.get(); | ||
|
||
view_d = cs->make_device_unique<DeviceConstView>(stream); | ||
cudaMemcpyAsync(view_d.get(), view.get(), sizeof(DeviceConstView), cudaMemcpyDefault, stream.id()); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
<use name="FWCore/ServiceRegistry"/> | ||
<use name="HeterogeneousCore/CUDAServices"/> | ||
<use name="cuda-api-wrappers"/> | ||
|
||
<export> | ||
<lib name="1"/> | ||
</export> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h | ||
#define CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h | ||
|
||
#include "CUDADataFormats/Common/interface/device_unique_ptr.h" | ||
#include "FWCore/Utilities/interface/propagate_const.h" | ||
|
||
#include <cuda/api_wrappers.h> | ||
|
||
class SiPixelDigisCUDA { | ||
public: | ||
SiPixelDigisCUDA() = default; | ||
explicit SiPixelDigisCUDA(size_t nelements, cuda::stream_t<>& stream); | ||
~SiPixelDigisCUDA() = default; | ||
|
||
SiPixelDigisCUDA(const SiPixelDigisCUDA&) = delete; | ||
SiPixelDigisCUDA& operator=(const SiPixelDigisCUDA&) = delete; | ||
SiPixelDigisCUDA(SiPixelDigisCUDA&&) = default; | ||
SiPixelDigisCUDA& operator=(SiPixelDigisCUDA&&) = default; | ||
|
||
uint16_t * xx() { return xx_d.get(); } | ||
uint16_t * yy() { return yy_d.get(); } | ||
uint16_t * adc() { return adc_d.get(); } | ||
uint16_t * moduleInd() { return moduleInd_d.get(); } | ||
|
||
uint16_t const *xx() const { return xx_d.get(); } | ||
uint16_t const *yy() const { return yy_d.get(); } | ||
uint16_t const *adc() const { return adc_d.get(); } | ||
uint16_t const *moduleInd() const { return moduleInd_d.get(); } | ||
|
||
uint16_t const *c_xx() const { return xx_d.get(); } | ||
uint16_t const *c_yy() const { return yy_d.get(); } | ||
uint16_t const *c_adc() const { return adc_d.get(); } | ||
uint16_t const *c_moduleInd() const { return moduleInd_d.get(); } | ||
|
||
class DeviceConstView { | ||
public: | ||
DeviceConstView() = default; | ||
|
||
#ifdef __CUDACC__ | ||
__device__ __forceinline__ uint16_t xx(int i) const { return __ldg(xx_+i); } | ||
__device__ __forceinline__ uint16_t yy(int i) const { return __ldg(yy_+i); } | ||
__device__ __forceinline__ uint16_t adc(int i) const { return __ldg(adc_+i); } | ||
__device__ __forceinline__ uint16_t moduleInd(int i) const { return __ldg(moduleInd_+i); } | ||
#endif | ||
|
||
friend class SiPixelDigisCUDA; | ||
|
||
private: | ||
uint16_t const *xx_ = nullptr; | ||
uint16_t const *yy_ = nullptr; | ||
uint16_t const *adc_ = nullptr; | ||
uint16_t const *moduleInd_ = nullptr; | ||
}; | ||
|
||
const DeviceConstView *view() const { return view_d.get(); } | ||
|
||
private: | ||
edm::cuda::device::unique_ptr<uint16_t[]> xx_d; // local coordinates of each pixel | ||
edm::cuda::device::unique_ptr<uint16_t[]> yy_d; // | ||
edm::cuda::device::unique_ptr<uint16_t[]> adc_d; // ADC of each pixel | ||
edm::cuda::device::unique_ptr<uint16_t[]> moduleInd_d; // module id of each pixel | ||
edm::cuda::device::unique_ptr<DeviceConstView> view_d; // "me" pointer | ||
}; | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" | ||
|
||
#include "FWCore/ServiceRegistry/interface/Service.h" | ||
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" | ||
|
||
#include <cuda_runtime.h> | ||
|
||
SiPixelDigisCUDA::SiPixelDigisCUDA(size_t nelements, cuda::stream_t<>& stream) { | ||
edm::Service<CUDAService> cs; | ||
|
||
xx_d = cs->make_device_unique<uint16_t[]>(nelements, stream); | ||
yy_d = cs->make_device_unique<uint16_t[]>(nelements, stream); | ||
adc_d = cs->make_device_unique<uint16_t[]>(nelements, stream); | ||
moduleInd_d = cs->make_device_unique<uint16_t[]>(nelements, stream); | ||
|
||
auto view = cs->make_host_unique<DeviceConstView>(stream); | ||
view->xx_ = xx_d.get(); | ||
view->yy_ = yy_d.get(); | ||
view->adc_ = adc_d.get(); | ||
view->moduleInd_ = moduleInd_d.get(); | ||
|
||
view_d = cs->make_device_unique<DeviceConstView>(stream); | ||
cudaMemcpyAsync(view_d.get(), view.get(), sizeof(DeviceConstView), cudaMemcpyDefault, stream.id()); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.