Skip to content

Commit

Permalink
remove inline
Browse files Browse the repository at this point in the history
  • Loading branch information
VinInn committed May 8, 2022
1 parent 849da8c commit e7d8632
Show file tree
Hide file tree
Showing 9 changed files with 66 additions and 65 deletions.
52 changes: 3 additions & 49 deletions CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class SiPixelDigiErrorsCUDA {
using SiPixelErrorCompactVector = cms::cuda::SimpleVector<SiPixelErrorCompact>;

SiPixelDigiErrorsCUDA() = default;
inline SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream);
SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream);
~SiPixelDigiErrorsCUDA() = default;

SiPixelDigiErrorsCUDA(const SiPixelDigiErrorsCUDA&) = delete;
Expand All @@ -28,9 +28,9 @@ class SiPixelDigiErrorsCUDA {
SiPixelErrorCompactVector const* error() const { return error_d.get(); }

using HostDataError = std::pair<SiPixelErrorCompactVector, memoryPool::buffer<SiPixelErrorCompact>>;
inline HostDataError dataErrorToHostAsync(cudaStream_t stream) const;
HostDataError dataErrorToHostAsync(cudaStream_t stream) const;

inline void copyErrorToHostAsync(cudaStream_t stream);
void copyErrorToHostAsync(cudaStream_t stream);
int nErrorWords() const { return nErrorWords_; }

private:
Expand All @@ -42,51 +42,5 @@ class SiPixelDigiErrorsCUDA {
};


#include "HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"


SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream) :
formatterErrors_h(std::move(errors)),
nErrorWords_(maxFedWords) {
assert(maxFedWords != 0);

memoryPool::Deleter deleter = memoryPool::Deleter(std::make_shared<memoryPool::cuda::BundleDelete>(stream, memoryPool::onDevice));
assert(deleter.pool());

data_d = memoryPool::cuda::make_buffer<SiPixelErrorCompact>(maxFedWords, deleter);
error_d = memoryPool::cuda::make_buffer<SiPixelErrorCompactVector>(1,deleter);
error_h = memoryPool::cuda::make_buffer<SiPixelErrorCompactVector>(1,stream,memoryPool::onHost);


cudaMemsetAsync(data_d.get(), 0x00, maxFedWords, stream);

cms::cuda::make_SimpleVector(error_h.get(), maxFedWords, data_d.get());
assert(error_h->empty());
assert(error_h->capacity() == static_cast<int>(maxFedWords));

cudaCheck(memoryPool::cuda::copy(error_d, error_h, 1,stream));
}

void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cudaStream_t stream) {
cudaCheck(memoryPool::cuda::copy(error_h, error_d, 1,stream));
}

SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cudaStream_t stream) const {
// On one hand size() could be sufficient. On the other hand, if
// someone copies the SimpleVector<>, (s)he might expect the data
// buffer to actually have space for capacity() elements.
auto data = memoryPool::cuda::make_buffer<SiPixelErrorCompact>(error_h->capacity(), stream, memoryPool::onHost);

// but transfer only the required amount
if (not error_h->empty()) {
cudaCheck(memoryPool::cuda::copy(data, data_d, error_h->size(), stream));
}
auto err = *error_h;
err.set_data(data.get());
return HostDataError(err, std::move(data));
}



#endif // CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h
7 changes: 3 additions & 4 deletions CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@
#include <cuda_runtime.h>

#include "HeterogeneousCore/CUDAUtilities/interface/memoryPool.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"
#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDASOAView.h"

class SiPixelDigisCUDA {
public:
using StoreType = uint16_t;
SiPixelDigisCUDA() = default;
inline SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream);
/*inline*/ SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream);
~SiPixelDigisCUDA() = default;

SiPixelDigisCUDA(const SiPixelDigisCUDA &) = delete;
Expand All @@ -27,7 +26,7 @@ class SiPixelDigisCUDA {
uint32_t nModules() const { return nModules_h; }
uint32_t nDigis() const { return nDigis_h; }

inline memoryPool::buffer<StoreType> copyAllToHostAsync(cudaStream_t stream) const;
/*inline*/ memoryPool::buffer<StoreType> copyAllToHostAsync(cudaStream_t stream) const;

SiPixelDigisCUDASOAView view() { return m_view; }
SiPixelDigisCUDASOAView const view() const { return m_view; }
Expand All @@ -43,6 +42,6 @@ class SiPixelDigisCUDA {
};


#include "SiPixelDigisCUDAImpl.h"
// #include "SiPixelDigisCUDAImpl.h"

#endif // CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h
46 changes: 46 additions & 0 deletions CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc
Original file line number Diff line number Diff line change
@@ -1 +1,47 @@
#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h"

#include "HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"


SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream) :
formatterErrors_h(std::move(errors)),
nErrorWords_(maxFedWords) {
assert(maxFedWords != 0);

memoryPool::Deleter deleter = memoryPool::Deleter(std::make_shared<memoryPool::cuda::BundleDelete>(stream, memoryPool::onDevice));
assert(deleter.pool());

data_d = memoryPool::cuda::make_buffer<SiPixelErrorCompact>(maxFedWords, deleter);
error_d = memoryPool::cuda::make_buffer<SiPixelErrorCompactVector>(1,deleter);
error_h = memoryPool::cuda::make_buffer<SiPixelErrorCompactVector>(1,stream,memoryPool::onHost);


cudaMemsetAsync(data_d.get(), 0x00, maxFedWords, stream);

cms::cuda::make_SimpleVector(error_h.get(), maxFedWords, data_d.get());
assert(error_h->empty());
assert(error_h->capacity() == static_cast<int>(maxFedWords));

cudaCheck(memoryPool::cuda::copy(error_d, error_h, 1,stream));
}

void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cudaStream_t stream) {
cudaCheck(memoryPool::cuda::copy(error_h, error_d, 1,stream));
}

SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cudaStream_t stream) const {
// On one hand size() could be sufficient. On the other hand, if
// someone copies the SimpleVector<>, (s)he might expect the data
// buffer to actually have space for capacity() elements.
auto data = memoryPool::cuda::make_buffer<SiPixelErrorCompact>(error_h->capacity(), stream, memoryPool::onHost);

// but transfer only the required amount
if (not error_h->empty()) {
cudaCheck(memoryPool::cuda::copy(data, data_d, error_h->size(), stream));
}
auto err = *error_h;
err.set_data(data.get());
return HostDataError(err, std::move(data));
}

2 changes: 2 additions & 0 deletions CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h"
#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDAImpl.h"

Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@


#include "HeterogeneousCore/CUDAUtilities/interface/memoryPool.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"

class TrackingRecHit2DHeterogeneous {
public:
Expand Down Expand Up @@ -38,7 +37,7 @@ class TrackingRecHit2DHeterogeneous {

TrackingRecHit2DHeterogeneous() = default;

inline TrackingRecHit2DHeterogeneous(uint32_t nHits,
/*inline*/ TrackingRecHit2DHeterogeneous(uint32_t nHits,
bool isPhase2,
int32_t offsetBPIX2,
pixelCPEforGPU::ParamsOnGPU const* cpeParams,
Expand All @@ -48,7 +47,7 @@ class TrackingRecHit2DHeterogeneous {
TrackingRecHit2DHeterogeneous const* input = nullptr);

// used on CPU only
inline TrackingRecHit2DHeterogeneous(float* store32,
/*inline*/ TrackingRecHit2DHeterogeneous(float* store32,
uint16_t* store16,
uint32_t* modules,
int nHits,
Expand All @@ -74,15 +73,15 @@ class TrackingRecHit2DHeterogeneous {
auto phiBinnerStorage() { return m_phiBinnerStorage; }
auto iphi() { return m_iphi; }

inline buffer<float> localCoordToHostAsync(cudaStream_t stream) const;
/*inline*/ buffer<float> localCoordToHostAsync(cudaStream_t stream) const;

inline buffer<uint32_t> hitsModuleStartToHostAsync(cudaStream_t stream) const;
/*inline*/ buffer<uint32_t> hitsModuleStartToHostAsync(cudaStream_t stream) const;

inline buffer<uint16_t> store16ToHostAsync(cudaStream_t stream) const;
inline buffer<float> store32ToHostAsync(cudaStream_t stream) const;
/*inline*/ buffer<uint16_t> store16ToHostAsync(cudaStream_t stream) const;
/*inline*/ buffer<float> store32ToHostAsync(cudaStream_t stream) const;

// needed for Host
inline void copyFromGPU(TrackingRecHit2DHeterogeneous const* input, cudaStream_t stream);
/*inline*/ void copyFromGPU(TrackingRecHit2DHeterogeneous const* input, cudaStream_t stream);

private:
static constexpr uint32_t n16 = 4; // number of elements in m_store16
Expand Down Expand Up @@ -114,6 +113,6 @@ using TrackingRecHit2DGPU = TrackingRecHit2DHeterogeneous;
using TrackingRecHit2DCPU = TrackingRecHit2DHeterogeneous;
using TrackingRecHit2DHost = TrackingRecHit2DHeterogeneous;

#include "TrackingRecHit2DHeterogeneousImpl.h"
// #include "TrackingRecHit2DHeterogeneousImpl.h"

#endif // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h"
#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h"

#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h"
#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneousImpl.h"

2 changes: 0 additions & 2 deletions HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
#include <vector>

// only for cudaStream_t
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>

#include <cassert>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"

#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>

Expand Down

0 comments on commit e7d8632

Please sign in to comment.