Skip to content

Commit

Permalink
compiles
Browse files Browse the repository at this point in the history
  • Loading branch information
VinInn committed May 5, 2022
1 parent 6b050bd commit 0e49a36
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class TrackingRecHit2DHeterogeneous {
};

template <typename T>
using unique_ptr = typename memoryPool::unique_ptr<T>;
using buffer = typename memoryPool::buffer<T>;

using PhiBinner = TrackingRecHit2DSOAView::PhiBinner;

Expand Down Expand Up @@ -69,12 +69,12 @@ class TrackingRecHit2DHeterogeneous {
auto phiBinnerStorage() { return m_phiBinnerStorage; }
auto iphi() { return m_iphi; }

unique_ptr<float> localCoordToHostAsync(cudaStream_t stream) const;
buffer<float> localCoordToHostAsync(cudaStream_t stream) const;

unique_ptr<uint32_t> hitsModuleStartToHostAsync(cudaStream_t stream) const;
buffer<uint32_t> hitsModuleStartToHostAsync(cudaStream_t stream) const;

unique_ptr<uint16_t> store16ToHostAsync(cudaStream_t stream) const;
unique_ptr<float> store32ToHostAsync(cudaStream_t stream) const;
buffer<uint16_t> store16ToHostAsync(cudaStream_t stream) const;
buffer<float> store32ToHostAsync(cudaStream_t stream) const;

// needs specialization for Host
void copyFromGPU(TrackingRecHit2DHeterogeneous<memoryPool::onDevice> const* input, cudaStream_t stream);
Expand All @@ -84,13 +84,13 @@ class TrackingRecHit2DHeterogeneous {
static constexpr uint32_t n32 = 10; // number of elements in m_store32
static_assert(sizeof(uint32_t) == sizeof(float)); // just stating the obvious
static_assert(n32 == static_cast<uint32_t>(Storage32::kLayers));
unique_ptr<uint16_t> m_store16; //!
unique_ptr<float[]> m_store32; //!
buffer<uint16_t> m_store16; //!
buffer<float> m_store32; //!

unique_ptr<TrackingRecHit2DSOAView::PhiBinner> m_PhiBinnerStore; //!
unique_ptr<TrackingRecHit2DSOAView::AverageGeometry> m_AverageGeometryStore; //!
buffer<TrackingRecHit2DSOAView::PhiBinner> m_PhiBinnerStore; //!
buffer<TrackingRecHit2DSOAView::AverageGeometry> m_AverageGeometryStore; //!

unique_ptr<TrackingRecHit2DSOAView> m_view; //!
buffer<TrackingRecHit2DSOAView> m_view; //!

uint32_t m_nHits;
int32_t m_offsetBPIX2;
Expand Down Expand Up @@ -120,23 +120,24 @@ TrackingRecHit2DHeterogeneous<where>::TrackingRecHit2DHeterogeneous(
cudaStream_t stream,
TrackingRecHit2DHeterogeneous<memoryPool::onDevice> const* input)
: m_nHits(nHits), m_offsetBPIX2(offsetBPIX2), m_hitsModuleStart(hitsModuleStart) {

using namespace memoryPool::cuda;
auto view = make_unique<TrackingRecHit2DSOAView>(1,stream,memoryPool::onCPU==where ? memoryPool::onCPU : memoryPool::onHost);
auto view = make_buffer<TrackingRecHit2DSOAView>(1,stream,memoryPool::onCPU==where ? memoryPool::onCPU : memoryPool::onHost);

m_nMaxModules = isPhase2 ? phase2PixelTopology::numberOfModules : phase1PixelTopology::numberOfModules;

view->m_nHits = nHits;
view->m_nMaxModules = m_nMaxModules;
m_view = make_unique<TrackingRecHit2DSOAView>(1,stream,where); // leave it on host and pass it by value?
m_AverageGeometryStore = make_unique<TrackingRecHit2DSOAView::AverageGeometry>(1,stream,where);
m_view = make_buffer<TrackingRecHit2DSOAView>(1,stream,where); // leave it on host and pass it by value?
m_AverageGeometryStore = make_buffer<TrackingRecHit2DSOAView::AverageGeometry>(1,stream,where);
view->m_averageGeometry = m_AverageGeometryStore.get();
view->m_cpeParams = cpeParams;
view->m_hitsModuleStart = hitsModuleStart;

// if empy do not bother
if (0 == nHits) {
if constexpr (memoryPool::onDevice == where) {
cudaCheck(cudaMemcpyAsync(m_view.get(), view.get(), sizeof(TrackingRecHit2DSOAView), cudaMemcpyHostToDevice, stream));
cudaCheck(memoryPool::cuda::copy(m_view, view, sizeof(TrackingRecHit2DSOAView), stream));
} else {
m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version
}
Expand All @@ -158,9 +159,9 @@ TrackingRecHit2DHeterogeneous<where>::TrackingRecHit2DHeterogeneous(

auto nL = isPhase2 ? phase2PixelTopology::numberOfLayers : phase1PixelTopology::numberOfLayers;

m_store16 = make_unique<uint16_t>(nHits * n16, stream, where);
m_store32 = make_unique<float>(nHits * n32 + nL + 1, stream, where);
m_PhiBinnerStore = make_unique<TrackingRecHit2DSOAView::PhiBinner>(1,stream,where);
m_store16 = make_buffer<uint16_t>(nHits * n16, stream, where);
m_store32 = make_buffer<float>(nHits * n32 + nL + 1, stream, where);
m_PhiBinnerStore = make_buffer<TrackingRecHit2DSOAView::PhiBinner>(1,stream,where);
}

static_assert(sizeof(TrackingRecHit2DSOAView::hindex_type) == sizeof(float));
Expand Down Expand Up @@ -199,7 +200,7 @@ TrackingRecHit2DHeterogeneous<where>::TrackingRecHit2DHeterogeneous(

// transfer view
if constexpr (memoryPool::onDevice == where) {
cudaCheck(cudaMemcpyAsync(m_view.get(), view.get(), sizeof(TrackingRecHit2DSOAView), cudaMemcpyHostToDevice, stream));
cudaCheck(memoryPool::cuda::copy(m_view, view, sizeof(TrackingRecHit2DSOAView), stream));
} else {
m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version
}
Expand All @@ -211,25 +212,26 @@ TrackingRecHit2DHeterogeneous<where>::TrackingRecHit2DHeterogeneous(
float* store32, uint16_t* store16, uint32_t* modules, int nHits, cudaStream_t stream)
: m_nHits(nHits), m_hitsModuleStart(modules) {

auto view = make_unique<TrackingRecHit2DSOAView>(1,stream,memoryPool::onCPU==where? memoryPool::onCPU : memoryPool::onHost);
using namespace memoryPool::cuda;
auto view = make_buffer<TrackingRecHit2DSOAView>(1,stream,memoryPool::onCPU==where? memoryPool::onCPU : memoryPool::onHost);

m_view = make_unique<TrackingRecHit2DSOAView>(stream);
m_view = make_buffer<TrackingRecHit2DSOAView>(1,stream,where);

view->m_nHits = nHits;

if (0 == nHits) {
if constexpr (memoryPool::onDevice == where) {
cudaCheck(cudaMemcpyAsync(m_view.get(), view.get(), sizeof(TrackingRecHit2DSOAView), cudaMemcpyHostToDevice, stream));
cudaCheck(memoryPool::cuda::copy(m_view, view, sizeof(TrackingRecHit2DSOAView), stream));
} else {
m_view = std::move(view);
}
return;
}

m_store16 = make_unique<uint16_t>(nHits * n16, stream,where);
m_store32 = make_unique<float>(nHits * n32, stream,where);
m_PhiBinnerStore = make_unique<TrackingRecHit2DSOAView::PhiBinner>(1,stream,where);
m_AverageGeometryStore = make_unique<TrackingRecHit2DSOAView::AverageGeometry>(1,stream,where);
m_store16 = make_buffer<uint16_t>(nHits * n16, stream,where);
m_store32 = make_buffer<float>(nHits * n32, stream,where);
m_PhiBinnerStore = make_buffer<TrackingRecHit2DSOAView::PhiBinner>(1,stream,where);
m_AverageGeometryStore = make_buffer<TrackingRecHit2DSOAView::AverageGeometry>(1,stream,where);

view->m_averageGeometry = m_AverageGeometryStore.get();
view->m_hitsModuleStart = m_hitsModuleStart;
Expand Down Expand Up @@ -270,7 +272,7 @@ TrackingRecHit2DHeterogeneous<where>::TrackingRecHit2DHeterogeneous(

// transfer view
if constexpr (memoryPool::onDevice == where) {
cudaCheck(cudaMemcpyAsync(m_view.get(), view.get(), sizeof(TrackingRecHit2DSOAView), cudaMemcpyHostToDevice, stream));
cudaCheck(memoryPool::cuda::copy(m_view, view, sizeof(TrackingRecHit2DSOAView), stream));
} else {
m_view = std::move(view);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,32 @@
using namespace memoryPool::cuda;

template <>
memoryPool::unique_ptr<float> TrackingRecHit2DGPU::localCoordToHostAsync(cudaStream_t stream) const {
auto ret = make_unique<float>(5 * nHits(), stream,onHost);
memoryPool::buffer<float> TrackingRecHit2DGPU::localCoordToHostAsync(cudaStream_t stream) const {
auto ret = make_buffer<float>(5 * nHits(), stream,memoryPool::onHost);
cudaCheck(
cudaMemcpyAsync(ret.get(), m_store32.get(), 5 * nHits(), cudaMemcpyDefault,stream));
return ret;
}

template <>
memoryPool::unique_ptr<float> TrackingRecHit2DGPU::store32ToHostAsync(cudaStream_t stream) const {
auto ret = make_unique<float>(static_cast<int>(n32) * nHits(), stream,onHost));
memoryPool::buffer<float> TrackingRecHit2DGPU::store32ToHostAsync(cudaStream_t stream) const {
auto ret = make_buffer<float>(static_cast<int>(n32) * nHits(), stream,memoryPool::onHost);
cudaCheck(
cudaMemcpyAsync(ret.get(), m_store32.get(), static_cast<int>(n32) * nHits(), cudaMemcpyDefault,stream));
return ret;
}

template <>
memoryPool::unique_ptr<uint16_t> TrackingRecHit2DGPU::store16ToHostAsync(cudaStream_t stream) const {
auto ret = make_unique<uint16_t>(static_cast<int>(n16) * nHits(), stream,onHost));
memoryPool::buffer<uint16_t> TrackingRecHit2DGPU::store16ToHostAsync(cudaStream_t stream) const {
auto ret = make_buffer<uint16_t>(static_cast<int>(n16) * nHits(), stream,memoryPool::onHost);
cudaCheck(
cudaMemcpyAsync(ret.get(), m_store16, static_cast<int>(n16) * nHits(), cudaMemcpyDefault,stream));
cudaMemcpyAsync(ret.get(), m_store16.get(), static_cast<int>(n16) * nHits(), cudaMemcpyDefault,stream));
return ret;
}

template <>
memoryPool::unique_ptr<uint32_t> TrackingRecHit2DGPU::hitsModuleStartToHostAsync(cudaStream_t stream) const {
auto ret = make_unique<uint32_t>(nMaxModules() + 1, stream,onHost));
memoryPool::buffer<uint32_t> TrackingRecHit2DGPU::hitsModuleStartToHostAsync(cudaStream_t stream) const {
auto ret = make_buffer<uint32_t>(nMaxModules() + 1, stream,memoryPool::onHost);
cudaCheck(
cudaMemcpyAsync(ret.get(), m_hitsModuleStart, sizeof(uint32_t) * (nMaxModules() + 1), cudaMemcpyDefault, stream));
return ret;
Expand Down

0 comments on commit 0e49a36

Please sign in to comment.