diff --git a/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h b/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h index 50e1fb51949e3..e5e5f41053e3d 100644 --- a/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h +++ b/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h @@ -28,26 +28,12 @@ class SiPixelFedCablingMapGPUWrapper { // returns pointer to GPU memory const SiPixelFedCablingMapGPU *getGPUProductAsync(cuda::stream_t<>& cudaStream) const; + // returns pointer to GPU memory + const unsigned char *getModToUnpAllAsync(cuda::stream_t<>& cudaStream) const; + edm::cuda::device::unique_ptr getModToUnpRegionalAsync(std::set const& modules, cuda::stream_t<>& cudaStream) const; - // Allocates host and device memory, converts data to host memory, - // copies host memory to device memory asynchronously. It is the - // caller's responsibility to have this object to live until all - // operations on the device memory have completed. - class ModulesToUnpack { - public: - ModulesToUnpack(cuda::stream_t<>& cudaStream); - ~ModulesToUnpack() = default; - - void fillAsync(SiPixelFedCablingMap const& cablingMap, std::set const& modules, cuda::stream_t<>& cudaStream); - - const unsigned char *get() const { return modToUnpDevice.get(); } - - private: - edm::cuda::device::unique_ptr modToUnpDevice; - edm::cuda::host::unique_ptr modToUnpHost; - }; - private: + const SiPixelFedCablingMap *cablingMap_; std::vector> fedMap; std::vector> linkMap; std::vector> rocMap; @@ -55,6 +41,7 @@ class SiPixelFedCablingMapGPUWrapper { std::vector> rocInDet; std::vector> moduleId; std::vector> badRocs; + std::vector> modToUnpDefault; unsigned int size; bool hasQuality_; @@ -64,6 +51,12 @@ class SiPixelFedCablingMapGPUWrapper { SiPixelFedCablingMapGPU *cablingMapDevice = nullptr; // same internal pointers as above, struct itself is on GPU }; CUDAESProduct gpuData_; + + struct ModulesToUnpack { + ~ModulesToUnpack(); + unsigned char *modToUnpDefault = nullptr; // pointer to GPU + }; + CUDAESProduct modToUnp_; }; diff --git a/RecoLocalTracker/SiPixelClusterizer/src/SiPixelFedCablingMapGPUWrapper.cc b/RecoLocalTracker/SiPixelClusterizer/src/SiPixelFedCablingMapGPUWrapper.cc index d43ebfb3c7192..b652100f69e9f 100644 --- a/RecoLocalTracker/SiPixelClusterizer/src/SiPixelFedCablingMapGPUWrapper.cc +++ b/RecoLocalTracker/SiPixelClusterizer/src/SiPixelFedCablingMapGPUWrapper.cc @@ -21,9 +21,10 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCablingMap const& cablingMap, TrackerGeometry const& trackerGeom, SiPixelQuality const *badPixelInfo): + cablingMap_(&cablingMap), fedMap(pixelgpudetails::MAX_SIZE), linkMap(pixelgpudetails::MAX_SIZE), rocMap(pixelgpudetails::MAX_SIZE), RawId(pixelgpudetails::MAX_SIZE), rocInDet(pixelgpudetails::MAX_SIZE), moduleId(pixelgpudetails::MAX_SIZE), - badRocs(pixelgpudetails::MAX_SIZE), + badRocs(pixelgpudetails::MAX_SIZE), modToUnpDefault(pixelgpudetails::MAX_SIZE), hasQuality_(badPixelInfo != nullptr) { std::vector const& fedIds = cablingMap.fedIds(); @@ -46,6 +47,7 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling if (pixelRoc != nullptr) { RawId[index] = pixelRoc->rawId(); rocInDet[index] = pixelRoc->idInDetUnit(); + modToUnpDefault[index] = false; if (badPixelInfo != nullptr) badRocs[index] = badPixelInfo->IsRocBad(pixelRoc->rawId(), pixelRoc->idInDetUnit()); else @@ -54,6 +56,7 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling RawId[index] = 9999; rocInDet[index] = 9999; badRocs[index] = true; + modToUnpDefault[index] = true; } index++; } @@ -124,16 +127,21 @@ const SiPixelFedCablingMapGPU *SiPixelFedCablingMapGPUWrapper::getGPUProductAsyn return data.cablingMapDevice; } -SiPixelFedCablingMapGPUWrapper::ModulesToUnpack::ModulesToUnpack(cuda::stream_t<>& cudaStream) -{ - edm::Service cs; - modToUnpDevice = cs->make_device_unique(pixelgpudetails::MAX_SIZE, cudaStream); - modToUnpHost = cs->make_host_unique(pixelgpudetails::MAX_SIZE, cudaStream); +const unsigned char *SiPixelFedCablingMapGPUWrapper::getModToUnpAllAsync(cuda::stream_t<>& cudaStream) const { + const auto& data = modToUnp_.dataForCurrentDeviceAsync(cudaStream, [this](ModulesToUnpack& data, cuda::stream_t<>& stream) { + cudaCheck(cudaMalloc((void**) & data.modToUnpDefault, pixelgpudetails::MAX_SIZE_BYTE_BOOL)); + cudaCheck(cudaMemcpyAsync(data.modToUnpDefault, this->modToUnpDefault.data(), this->modToUnpDefault.size() * sizeof(unsigned char), cudaMemcpyDefault, stream.id())); + }); + return data.modToUnpDefault; } -void SiPixelFedCablingMapGPUWrapper::ModulesToUnpack::fillAsync(SiPixelFedCablingMap const& cablingMap, std::set const& modules, cuda::stream_t<>& cudaStream) { - std::vector const& fedIds = cablingMap.fedIds(); - std::unique_ptr const& cabling = cablingMap.cablingTree(); +edm::cuda::device::unique_ptr SiPixelFedCablingMapGPUWrapper::getModToUnpRegionalAsync(std::set const& modules, cuda::stream_t<>& cudaStream) const { + edm::Service cs; + auto modToUnpDevice = cs->make_device_unique(pixelgpudetails::MAX_SIZE, cudaStream); + auto modToUnpHost = cs->make_host_unique(pixelgpudetails::MAX_SIZE, cudaStream); + + std::vector const& fedIds = cablingMap_->fedIds(); + std::unique_ptr const& cabling = cablingMap_->cablingTree(); unsigned int startFed = *(fedIds.begin()); unsigned int endFed = *(fedIds.end() - 1); @@ -157,6 +165,7 @@ void SiPixelFedCablingMapGPUWrapper::ModulesToUnpack::fillAsync(SiPixelFedCablin } cuda::memory::async::copy(modToUnpDevice.get(), modToUnpHost.get(), pixelgpudetails::MAX_SIZE * sizeof(unsigned char), cudaStream.id()); + return modToUnpDevice; } @@ -173,3 +182,7 @@ SiPixelFedCablingMapGPUWrapper::GPUData::~GPUData() { } cudaCheck(cudaFree(cablingMapDevice)); } + +SiPixelFedCablingMapGPUWrapper::ModulesToUnpack::~ModulesToUnpack() { + cudaCheck(cudaFree(modToUnpDefault)); +}