diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h index ddabd531e7602..9f33bf076418d 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h @@ -14,7 +14,7 @@ class SiPixelDigiErrorsCUDA { using SiPixelErrorCompactVector = cms::cuda::SimpleVector; SiPixelDigiErrorsCUDA() = default; - inline SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream); + SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream); ~SiPixelDigiErrorsCUDA() = default; SiPixelDigiErrorsCUDA(const SiPixelDigiErrorsCUDA&) = delete; @@ -28,9 +28,9 @@ class SiPixelDigiErrorsCUDA { SiPixelErrorCompactVector const* error() const { return error_d.get(); } using HostDataError = std::pair>; - inline HostDataError dataErrorToHostAsync(cudaStream_t stream) const; + HostDataError dataErrorToHostAsync(cudaStream_t stream) const; - inline void copyErrorToHostAsync(cudaStream_t stream); + void copyErrorToHostAsync(cudaStream_t stream); int nErrorWords() const { return nErrorWords_; } private: @@ -42,51 +42,5 @@ class SiPixelDigiErrorsCUDA { }; -#include "HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" - - -SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream) : - formatterErrors_h(std::move(errors)), - nErrorWords_(maxFedWords) { - assert(maxFedWords != 0); - - memoryPool::Deleter deleter = memoryPool::Deleter(std::make_shared(stream, memoryPool::onDevice)); - assert(deleter.pool()); - - data_d = memoryPool::cuda::make_buffer(maxFedWords, deleter); - error_d = memoryPool::cuda::make_buffer(1,deleter); - error_h = memoryPool::cuda::make_buffer(1,stream,memoryPool::onHost); - - - cudaMemsetAsync(data_d.get(), 0x00, maxFedWords, stream); - - cms::cuda::make_SimpleVector(error_h.get(), maxFedWords, data_d.get()); - assert(error_h->empty()); - assert(error_h->capacity() == static_cast(maxFedWords)); - - cudaCheck(memoryPool::cuda::copy(error_d, error_h, 1,stream)); -} - -void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cudaStream_t stream) { - cudaCheck(memoryPool::cuda::copy(error_h, error_d, 1,stream)); -} - -SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cudaStream_t stream) const { - // On one hand size() could be sufficient. On the other hand, if - // someone copies the SimpleVector<>, (s)he might expect the data - // buffer to actually have space for capacity() elements. - auto data = memoryPool::cuda::make_buffer(error_h->capacity(), stream, memoryPool::onHost); - - // but transfer only the required amount - if (not error_h->empty()) { - cudaCheck(memoryPool::cuda::copy(data, data_d, error_h->size(), stream)); - } - auto err = *error_h; - err.set_data(data.get()); - return HostDataError(err, std::move(data)); -} - - #endif // CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h index 6f3ceb17f5a28..b3126097e15a4 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h @@ -4,14 +4,13 @@ #include #include "HeterogeneousCore/CUDAUtilities/interface/memoryPool.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDASOAView.h" class SiPixelDigisCUDA { public: using StoreType = uint16_t; SiPixelDigisCUDA() = default; - inline SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream); + /*inline*/ SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream); ~SiPixelDigisCUDA() = default; SiPixelDigisCUDA(const SiPixelDigisCUDA &) = delete; @@ -27,7 +26,7 @@ class SiPixelDigisCUDA { uint32_t nModules() const { return nModules_h; } uint32_t nDigis() const { return nDigis_h; } - inline memoryPool::buffer copyAllToHostAsync(cudaStream_t stream) const; + /*inline*/ memoryPool::buffer copyAllToHostAsync(cudaStream_t stream) const; SiPixelDigisCUDASOAView view() { return m_view; } SiPixelDigisCUDASOAView const view() const { return m_view; } @@ -43,6 +42,6 @@ class SiPixelDigisCUDA { }; -#include "SiPixelDigisCUDAImpl.h" +// #include "SiPixelDigisCUDAImpl.h" #endif // CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc index fb2d7618d6eea..d9432c94edb5e 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc @@ -1 +1,47 @@ #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" + +#include "HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + + +SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream) : + formatterErrors_h(std::move(errors)), + nErrorWords_(maxFedWords) { + assert(maxFedWords != 0); + + memoryPool::Deleter deleter = memoryPool::Deleter(std::make_shared(stream, memoryPool::onDevice)); + assert(deleter.pool()); + + data_d = memoryPool::cuda::make_buffer(maxFedWords, deleter); + error_d = memoryPool::cuda::make_buffer(1,deleter); + error_h = memoryPool::cuda::make_buffer(1,stream,memoryPool::onHost); + + + cudaMemsetAsync(data_d.get(), 0x00, maxFedWords, stream); + + cms::cuda::make_SimpleVector(error_h.get(), maxFedWords, data_d.get()); + assert(error_h->empty()); + assert(error_h->capacity() == static_cast(maxFedWords)); + + cudaCheck(memoryPool::cuda::copy(error_d, error_h, 1,stream)); +} + +void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cudaStream_t stream) { + cudaCheck(memoryPool::cuda::copy(error_h, error_d, 1,stream)); +} + +SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cudaStream_t stream) const { + // On one hand size() could be sufficient. On the other hand, if + // someone copies the SimpleVector<>, (s)he might expect the data + // buffer to actually have space for capacity() elements. + auto data = memoryPool::cuda::make_buffer(error_h->capacity(), stream, memoryPool::onHost); + + // but transfer only the required amount + if (not error_h->empty()) { + cudaCheck(memoryPool::cuda::copy(data, data_d, error_h->size(), stream)); + } + auto err = *error_h; + err.set_data(data.get()); + return HostDataError(err, std::move(data)); +} + diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc index 2b230d26c9b4b..700a4198bb293 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc @@ -1 +1,3 @@ #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDAImpl.h" + diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h index 251c0b0edc933..8b106ed8df960 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h @@ -6,7 +6,6 @@ #include "HeterogeneousCore/CUDAUtilities/interface/memoryPool.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" class TrackingRecHit2DHeterogeneous { public: @@ -38,7 +37,7 @@ class TrackingRecHit2DHeterogeneous { TrackingRecHit2DHeterogeneous() = default; - inline TrackingRecHit2DHeterogeneous(uint32_t nHits, + /*inline*/ TrackingRecHit2DHeterogeneous(uint32_t nHits, bool isPhase2, int32_t offsetBPIX2, pixelCPEforGPU::ParamsOnGPU const* cpeParams, @@ -48,7 +47,7 @@ class TrackingRecHit2DHeterogeneous { TrackingRecHit2DHeterogeneous const* input = nullptr); // used on CPU only - inline TrackingRecHit2DHeterogeneous(float* store32, + /*inline*/ TrackingRecHit2DHeterogeneous(float* store32, uint16_t* store16, uint32_t* modules, int nHits, @@ -74,15 +73,15 @@ class TrackingRecHit2DHeterogeneous { auto phiBinnerStorage() { return m_phiBinnerStorage; } auto iphi() { return m_iphi; } - inline buffer localCoordToHostAsync(cudaStream_t stream) const; + /*inline*/ buffer localCoordToHostAsync(cudaStream_t stream) const; - inline buffer hitsModuleStartToHostAsync(cudaStream_t stream) const; + /*inline*/ buffer hitsModuleStartToHostAsync(cudaStream_t stream) const; - inline buffer store16ToHostAsync(cudaStream_t stream) const; - inline buffer store32ToHostAsync(cudaStream_t stream) const; + /*inline*/ buffer store16ToHostAsync(cudaStream_t stream) const; + /*inline*/ buffer store32ToHostAsync(cudaStream_t stream) const; // needed for Host - inline void copyFromGPU(TrackingRecHit2DHeterogeneous const* input, cudaStream_t stream); + /*inline*/ void copyFromGPU(TrackingRecHit2DHeterogeneous const* input, cudaStream_t stream); private: static constexpr uint32_t n16 = 4; // number of elements in m_store16 @@ -114,6 +113,6 @@ using TrackingRecHit2DGPU = TrackingRecHit2DHeterogeneous; using TrackingRecHit2DCPU = TrackingRecHit2DHeterogeneous; using TrackingRecHit2DHost = TrackingRecHit2DHeterogeneous; -#include "TrackingRecHit2DHeterogeneousImpl.h" +// #include "TrackingRecHit2DHeterogeneousImpl.h" #endif // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneousImpl.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneousImpl.h index 71486facd6ae8..3543c0f691985 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneousImpl.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneousImpl.h @@ -1,4 +1,4 @@ -// #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h" #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" diff --git a/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc b/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc new file mode 100644 index 0000000000000..115b80db650b4 --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc @@ -0,0 +1,2 @@ +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneousImpl.h" + diff --git a/HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h b/HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h index 5b9f75c27e9fd..897e5077e0659 100644 --- a/HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h +++ b/HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h @@ -3,9 +3,7 @@ #include // only for cudaStream_t -#include #include -#include #include diff --git a/HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPoolImpl.h b/HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPoolImpl.h index 0a346d0d84f13..32e86d1e15268 100644 --- a/HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPoolImpl.h +++ b/HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPoolImpl.h @@ -4,6 +4,7 @@ #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include #include #include