Skip to content

Commit

Permalink
rebase and response to comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Dan Riley authored and Dan Riley committed Mar 20, 2023
1 parent 0408521 commit 7dbff2a
Show file tree
Hide file tree
Showing 18 changed files with 450 additions and 428 deletions.
17 changes: 6 additions & 11 deletions CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,6 @@
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"

namespace cms {
template <typename T>
using observer_ptr = T *;
}

#include <cuda_runtime.h>

class SiStripClustersCUDADevice : public SiStripClustersSOABase<cms::cuda::device::unique_ptr> {
Expand Down Expand Up @@ -38,15 +33,15 @@ class SiStripClustersCUDADevice : public SiStripClustersSOABase<cms::cuda::devic
};

DeviceView *view() const { return view_d.get(); }
uint32_t nClustersHost() const { return nClustersHost_; }
uint32_t *nClustersHostPtr() { return &nClustersHost_; }
uint32_t maxClusterSizeHost() const { return maxClusterSizeHost_; }
uint32_t *maxClusterSizeHostPtr() { return &maxClusterSizeHost_; }
uint32_t nClusters() const { return nClusters_; }
uint32_t *nClustersPtr() { return &nClusters_; }
uint32_t maxClusterSize() const { return maxClusterSize_; }
uint32_t *maxClusterSizePtr() { return &maxClusterSize_; }

private:
cms::cuda::device::unique_ptr<DeviceView> view_d; // "me" pointer
uint32_t nClustersHost_;
uint32_t maxClusterSizeHost_;
uint32_t nClusters_;
uint32_t maxClusterSize_;
};

class SiStripClustersCUDAHost : public SiStripClustersSOABase<cms::cuda::host::unique_ptr> {
Expand Down
12 changes: 9 additions & 3 deletions CUDADataFormats/SiStripCluster/src/SiStripClustersCUDA.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
SiStripClustersCUDADevice::SiStripClustersCUDADevice(uint32_t maxClusters,
uint32_t maxStripsPerCluster,
cudaStream_t stream) {
maxClusterSizeHost_ = maxStripsPerCluster;
maxClusterSize_ = maxStripsPerCluster;

clusterIndex_ = cms::cuda::make_device_unique<uint32_t[]>(maxClusters, stream);
clusterSize_ = cms::cuda::make_device_unique<uint32_t[]>(maxClusters, stream);
Expand All @@ -28,11 +28,14 @@ SiStripClustersCUDADevice::SiStripClustersCUDADevice(uint32_t maxClusters,

view_d = cms::cuda::make_device_unique<DeviceView>(stream);
cms::cuda::copyAsync(view_d, view, stream);
#ifdef GPU_CHECK
cudaCheck(cudaStreamSynchronize(stream));
#endif
}

SiStripClustersCUDAHost::SiStripClustersCUDAHost(const SiStripClustersCUDADevice& clusters_d, cudaStream_t stream) {
nClusters_ = clusters_d.nClustersHost();
maxClusterSize_ = clusters_d.maxClusterSizeHost();
nClusters_ = clusters_d.nClusters();
maxClusterSize_ = clusters_d.maxClusterSize();
clusterIndex_ = cms::cuda::make_host_unique<uint32_t[]>(nClusters_, stream);
clusterSize_ = cms::cuda::make_host_unique<uint32_t[]>(nClusters_, stream);
clusterADCs_ = cms::cuda::make_host_unique<uint8_t[]>(nClusters_ * maxClusterSize_, stream);
Expand All @@ -50,4 +53,7 @@ SiStripClustersCUDAHost::SiStripClustersCUDAHost(const SiStripClustersCUDADevice
cms::cuda::copyAsync(trueCluster_, clusters_d.trueCluster(), nClusters_, stream);
cms::cuda::copyAsync(barycenter_, clusters_d.barycenter(), nClusters_, stream);
cms::cuda::copyAsync(charge_, clusters_d.charge(), nClusters_, stream);
#ifdef GPU_CHECK
cudaCheck(cudaStreamSynchronize(stream));
#endif
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,16 @@ namespace stripgpu {
public:
class DetToFed {
public:
DetToFed(detId_t detid, APVPair_t ipair, fedId_t fedid, fedCh_t fedch)
DetToFed(detId_t detid, apvPair_t ipair, fedId_t fedid, fedCh_t fedch)
: detid_(detid), ipair_(ipair), fedid_(fedid), fedch_(fedch) {}
detId_t detID() const { return detid_; }
APVPair_t pair() const { return ipair_; }
apvPair_t pair() const { return ipair_; }
fedId_t fedID() const { return fedid_; }
fedCh_t fedCh() const { return fedch_; }

private:
detId_t detid_;
APVPair_t ipair_;
apvPair_t ipair_;
fedId_t fedid_;
fedCh_t fedch_;
};
Expand All @@ -56,7 +56,7 @@ namespace stripgpu {
return detID_[channelIndex(fed, channel)];
}

__device__ inline APVPair_t iPair(fedId_t fed, fedCh_t channel) const {
__device__ inline apvPair_t iPair(fedId_t fed, fedCh_t channel) const {
return iPair_[channelIndex(fed, channel)];
}

Expand All @@ -65,6 +65,7 @@ namespace stripgpu {
}

__device__ inline float noise(fedId_t fed, fedCh_t channel, stripId_t strip) const {
// noise is stored as 9 bits with a fixed point scale factor of 0.1
return 0.1f * (noise_[stripIndex(fed, channel, strip)] & ~badBit);
}

Expand All @@ -78,7 +79,7 @@ namespace stripgpu {
const std::uint16_t* noise_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED * sistrip::STRIPS_PER_FEDCH];
const float* invthick_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED];
const detId_t* detID_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED];
const APVPair_t* iPair_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED];
const apvPair_t* iPair_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED];
const float* gain_; //[sistrip::NUMBER_OF_FEDS*sistrip::APVS_PER_FEDCH * sistrip::FEDCH_PER_FED];
};

Expand All @@ -91,7 +92,7 @@ namespace stripgpu {
noise_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED * sistrip::STRIPS_PER_FEDCH];
cms::cuda::device::unique_ptr<float[]> invthick_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED];
cms::cuda::device::unique_ptr<detId_t[]> detID_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED];
cms::cuda::device::unique_ptr<APVPair_t[]> iPair_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED];
cms::cuda::device::unique_ptr<apvPair_t[]> iPair_; //[sistrip::NUMBER_OF_FEDS*sistrip::FEDCH_PER_FED];
cms::cuda::device::unique_ptr<float[]>
gain_; //[sistrip::NUMBER_OF_FEDS*sistrip::APVS_PER_FEDCH * sistrip::FEDCH_PER_FED];
};
Expand Down Expand Up @@ -123,7 +124,7 @@ namespace stripgpu {
std::vector<std::uint16_t, cms::cuda::HostAllocator<std::uint16_t>> noise_;
std::vector<float, cms::cuda::HostAllocator<float>> invthick_;
std::vector<detId_t, cms::cuda::HostAllocator<detId_t>> detID_;
std::vector<APVPair_t, cms::cuda::HostAllocator<APVPair_t>> iPair_;
std::vector<apvPair_t, cms::cuda::HostAllocator<apvPair_t>> iPair_;
std::vector<float, cms::cuda::HostAllocator<float>> gain_;

// Helper that takes care of complexity of transferring the data to
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ namespace stripgpu {
detID_(sistrip::NUMBER_OF_FEDS * sistrip::FEDCH_PER_FED),
iPair_(sistrip::NUMBER_OF_FEDS * sistrip::FEDCH_PER_FED),
gain_(sistrip::NUMBER_OF_FEDS * sistrip::APVS_PER_FEDCH * sistrip::FEDCH_PER_FED) {
detToFeds_.clear();

// connected: map<DetID, std::vector<int>>
// map of KEY=detid DATA=vector of apvs, maximum 6 APVs per detector module :
const auto& connected = quality.cabling()->connected();
Expand Down Expand Up @@ -77,7 +75,7 @@ namespace stripgpu {
data.noise_ = cms::cuda::make_device_unique<std::uint16_t[]>(noise_.size(), stream);
data.invthick_ = cms::cuda::make_device_unique<float[]>(invthick_.size(), stream);
data.detID_ = cms::cuda::make_device_unique<detId_t[]>(detID_.size(), stream);
data.iPair_ = cms::cuda::make_device_unique<APVPair_t[]>(iPair_.size(), stream);
data.iPair_ = cms::cuda::make_device_unique<apvPair_t[]>(iPair_.size(), stream);
data.gain_ = cms::cuda::make_device_unique<float[]>(gain_.size(), stream);

cms::cuda::copyAsync(data.noise_, noise_, stream);
Expand Down
10 changes: 6 additions & 4 deletions DataFormats/SiStripCluster/interface/SiStripClustersSOA.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
#include <memory>

namespace detail {
template <typename T>
using unique_ptr = typename std::unique_ptr<T>;
}
namespace impl {
template <typename T>
using unique_ptr_default_deleter = typename std::unique_ptr<T>;
}
} // namespace detail

class SiStripClustersSOA : public SiStripClustersSOABase<detail::unique_ptr> {
class SiStripClustersSOA : public SiStripClustersSOABase<detail::impl::unique_ptr_default_deleter> {
public:
SiStripClustersSOA() = default;
explicit SiStripClustersSOA(uint32_t maxClusters, uint32_t maxStripsPerCluster);
Expand Down
2 changes: 1 addition & 1 deletion DataFormats/SiStripCluster/interface/SiStripTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace stripgpu {
using detId_t = std::uint32_t;
using fedId_t = std::uint16_t;
using fedCh_t = std::uint8_t;
using APVPair_t = std::uint16_t;
using apvPair_t = std::uint16_t;
using stripId_t = std::uint16_t;

static constexpr detId_t invalidDet = std::numeric_limits<detId_t>::max();
Expand Down
2 changes: 1 addition & 1 deletion DataFormats/SiStripCluster/src/classes_def.xml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
<class name="edm::Wrapper<edmNew::DetSetVector<edm::Ref<edmNew::DetSetVector<SiStripApproximateCluster>,SiStripApproximateCluster,edmNew::DetSetVector<SiStripApproximateCluster>::FindForDetSetVector> > >" />

<class name="SiStripClustersSOA" ClassVersion="3">
<version ClassVersion="3" checksum="859705774"/>
<version ClassVersion="3" checksum="2739562998"/>
</class>
<class name="edm::Wrapper<SiStripClustersSOA>"/>

Expand Down
17 changes: 9 additions & 8 deletions RecoLocalTracker/SiStripClusterizer/plugins/ChannelLocsGPU.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ ChannelLocs::ChannelLocs(size_t size, cudaStream_t stream) : ChannelLocsBase(siz
}
}

void ChannelLocsView::Fill(const ChannelLocsGPU& c) {
void ChannelLocsView::fill(const ChannelLocsGPU& c) {
input_ = c.input();
inoff_ = c.inoff();
offset_ = c.offset();
Expand All @@ -39,17 +39,18 @@ ChannelLocsGPU::ChannelLocsGPU(size_t size, cudaStream_t stream) : ChannelLocsBa
fedCh_ = cms::cuda::make_device_unique<stripgpu::fedCh_t[]>(size, stream);
detID_ = cms::cuda::make_device_unique<stripgpu::detId_t[]>(size, stream);

ChannelLocsView channelLocsView;
channelLocsView.Fill(*this);
channelLocsView_ = cms::cuda::make_device_unique<ChannelLocsView>(stream);
cudaCheck(
cudaMemcpyAsync(channelLocsView_.get(), &channelLocsView, sizeof(ChannelLocsView), cudaMemcpyDefault, stream));
auto channelLocsView = cms::cuda::make_host_unique<ChannelLocsView>(stream);
channelLocsView->fill(*this);
channelLocsViewGPU_ = cms::cuda::make_device_unique<ChannelLocsView>(stream);
cms::cuda::copyAsync(channelLocsViewGPU_, channelLocsView, stream);
}
}

void ChannelLocsGPU::setVals(const ChannelLocs* c, const std::vector<uint8_t*>& inputGPU, cudaStream_t stream) {
void ChannelLocsGPU::setVals(const ChannelLocs* c,
cms::cuda::host::unique_ptr<const uint8_t*[]> inputGPU,
cudaStream_t stream) {
assert(c->size() == size_);
cudaCheck(cudaMemcpyAsync(input_.get(), inputGPU.data(), sizeof(uint8_t*) * size_, cudaMemcpyDefault, stream));
cms::cuda::copyAsync(input_, inputGPU, size_, stream);
cms::cuda::copyAsync(inoff_, c->inoff_, size_, stream);
cms::cuda::copyAsync(offset_, c->offset_, size_, stream);
cms::cuda::copyAsync(length_, c->length_, size_, stream);
Expand Down
24 changes: 13 additions & 11 deletions RecoLocalTracker/SiStripClusterizer/plugins/ChannelLocsGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class ChannelLocsBase {
stripgpu::fedCh_t fedCh(uint32_t index) const { return fedCh_[index]; }
stripgpu::detId_t detID(uint32_t index) const { return detID_[index]; }

const uint8_t** input() const { return input_.get(); }
const uint8_t* const* input() const { return input_.get(); }
size_t* inoff() const { return inoff_.get(); }
size_t* offset() const { return offset_.get(); }
uint16_t* length() const { return length_.get(); }
Expand Down Expand Up @@ -89,8 +89,9 @@ class ChannelLocs : public ChannelLocsBase<cms::cuda::host::unique_ptr> {
~ChannelLocs() override = default;
};

struct ChannelLocsView {
void Fill(const ChannelLocsGPU& c);
class ChannelLocsView {
public:
void fill(const ChannelLocsGPU& c);

__device__ size_t size() const { return size_; }

Expand All @@ -102,10 +103,11 @@ struct ChannelLocsView {
__device__ stripgpu::fedCh_t fedCh(uint32_t index) const { return fedCh_[index]; }
__device__ stripgpu::detId_t detID(uint32_t index) const { return detID_[index]; }

const uint8_t** input_; // input raw data for channel
size_t* inoff_; // offset in input raw data
size_t* offset_; // global offset in alldata
uint16_t* length_; // length of channel data
private:
const uint8_t* const* input_; // input raw data for channel
size_t* inoff_; // offset in input raw data
size_t* offset_; // global offset in alldata
uint16_t* length_; // length of channel data
stripgpu::fedId_t* fedID_;
stripgpu::fedCh_t* fedCh_;
stripgpu::detId_t* detID_;
Expand All @@ -117,7 +119,7 @@ class ChannelLocsGPU : public ChannelLocsBase<cms::cuda::device::unique_ptr> {
//using Base = ChannelLocsBase<cms::cuda::device::unique_ptr>;
ChannelLocsGPU(size_t size, cudaStream_t stream);
ChannelLocsGPU(ChannelLocsGPU&& arg)
: ChannelLocsBase(std::move(arg)), channelLocsView_(std::move(arg.channelLocsView_)) {}
: ChannelLocsBase(std::move(arg)), channelLocsViewGPU_(std::move(arg.channelLocsViewGPU_)) {}

ChannelLocsGPU(ChannelLocsGPU&) = delete;
ChannelLocsGPU(const ChannelLocsGPU&) = delete;
Expand All @@ -126,11 +128,11 @@ class ChannelLocsGPU : public ChannelLocsBase<cms::cuda::device::unique_ptr> {

~ChannelLocsGPU() override = default;

void setVals(const ChannelLocs* c, const std::vector<uint8_t*>& inputGPU, cudaStream_t stream);
const ChannelLocsView* channelLocsView() const { return channelLocsView_.get(); }
void setVals(const ChannelLocs* c, cms::cuda::host::unique_ptr<const uint8_t*[]> inputGPU, cudaStream_t stream);
const ChannelLocsView* channelLocsView() const { return channelLocsViewGPU_.get(); }

private:
cms::cuda::device::unique_ptr<ChannelLocsView> channelLocsView_;
cms::cuda::device::unique_ptr<ChannelLocsView> channelLocsViewGPU_;
};

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ namespace {
class SiStripClusterizerFromRawGPU final : public edm::stream::EDProducer<edm::ExternalWork> {
public:
explicit SiStripClusterizerFromRawGPU(const edm::ParameterSet& conf)
: buffers(1024),
raw(1024),
: buffers_(sistrip::FED_ID_MAX),
raw_(sistrip::FED_ID_MAX),
gpuAlgo_(conf.getParameter<edm::ParameterSet>("Clusterizer")),
inputToken_(consumes(conf.getParameter<edm::InputTag>("ProductLabel"))),
outputToken_(produces<cms::cuda::Product<SiStripClustersCUDADevice>>()),
Expand All @@ -91,7 +91,7 @@ class SiStripClusterizerFromRawGPU final : public edm::stream::EDProducer<edm::E

// Queues asynchronous data transfers and kernels to the CUDA stream
// returned by cms::cuda::ScopedContextAcquire::stream()
gpuAlgo_.makeAsync(raw, buffers, conditions, ctx.stream());
gpuAlgo_.makeAsync(raw_, buffers_, conditions, ctx.stream());

// Destructor of ctx queues a callback to the CUDA stream notifying
// waitingTaskHolder when the queued asynchronous work has finished
Expand All @@ -107,7 +107,7 @@ class SiStripClusterizerFromRawGPU final : public edm::stream::EDProducer<edm::E
// in the consumer side.
ctx.emplace(ev, outputToken_, gpuAlgo_.getResults(ctx.stream()));

for (auto& buf : buffers)
for (auto& buf : buffers_)
buf.reset(nullptr);
}

Expand All @@ -116,8 +116,8 @@ class SiStripClusterizerFromRawGPU final : public edm::stream::EDProducer<edm::E
void fill(uint32_t idet, const FEDRawDataCollection& rawColl, const SiStripClusterizerConditions& conditions);

private:
std::vector<std::unique_ptr<sistrip::FEDBuffer>> buffers;
std::vector<const FEDRawData*> raw;
std::vector<std::unique_ptr<sistrip::FEDBuffer>> buffers_;
std::vector<const FEDRawData*> raw_;
cms::cuda::ContextState ctxState_;

stripgpu::SiStripRawToClusterGPUKernel gpuAlgo_;
Expand Down Expand Up @@ -169,17 +169,12 @@ void SiStripClusterizerFromRawGPU::fill(uint32_t idet,
}

// If Fed hasnt already been initialised, extract data and initialise
sistrip::FEDBuffer* buffer = buffers[fedId].get();
sistrip::FEDBuffer* buffer = buffers_[fedId].get();
if (!buffer) {
const FEDRawData& rawData = rawColl.FEDData(fedId);
raw[fedId] = &rawData;
buffer = fillBuffer(fedId, rawData).release();
if (!buffer) {
continue;
}
buffers[fedId].reset(buffer);
raw_[fedId] = &rawData;
buffers_[fedId] = fillBuffer(fedId, rawData);
}
assert(buffer);
} // end loop over conn
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,17 @@ class SiStripClusterizerConditionsGPUESProducer : public edm::ESProducer {
ReturnType produce(const SiStripClusterizerConditionsRcd&);

private:
edm::ESGetToken<SiStripGain, SiStripGainRcd> m_gainToken;
edm::ESGetToken<SiStripNoises, SiStripNoisesRcd> m_noisesToken;
edm::ESGetToken<SiStripQuality, SiStripQualityRcd> m_qualityToken;
edm::ESGetToken<SiStripGain, SiStripGainRcd> gainToken_;
edm::ESGetToken<SiStripNoises, SiStripNoisesRcd> noisesToken_;
edm::ESGetToken<SiStripQuality, SiStripQualityRcd> qualityToken_;
};

SiStripClusterizerConditionsGPUESProducer::SiStripClusterizerConditionsGPUESProducer(const edm::ParameterSet& iConfig) {
auto cc = setWhatProduced(this, iConfig.getParameter<std::string>("Label"));

m_gainToken = cc.consumesFrom<SiStripGain, SiStripGainRcd>();
m_noisesToken = cc.consumesFrom<SiStripNoises, SiStripNoisesRcd>();
m_qualityToken = cc.consumesFrom<SiStripQuality, SiStripQualityRcd>(
gainToken_ = cc.consumesFrom<SiStripGain, SiStripGainRcd>();
noisesToken_ = cc.consumesFrom<SiStripNoises, SiStripNoisesRcd>();
qualityToken_ = cc.consumesFrom<SiStripQuality, SiStripQualityRcd>(
edm::ESInputTag{"", iConfig.getParameter<std::string>("QualityLabel")});
}

Expand All @@ -54,9 +54,9 @@ void SiStripClusterizerConditionsGPUESProducer::fillDescriptions(edm::Configurat

SiStripClusterizerConditionsGPUESProducer::ReturnType SiStripClusterizerConditionsGPUESProducer::produce(
const SiStripClusterizerConditionsRcd& iRecord) {
auto gainsH = iRecord.getTransientHandle(m_gainToken);
const auto& noises = iRecord.get(m_noisesToken);
const auto& quality = iRecord.get(m_qualityToken);
auto gainsH = iRecord.getTransientHandle(gainToken_);
const auto& noises = iRecord.get(noisesToken_);
const auto& quality = iRecord.get(qualityToken_);

return std::make_unique<SiStripClusterizerConditionsGPU>(quality, gainsH.product(), noises);
}
Expand Down
Loading

0 comments on commit 7dbff2a

Please sign in to comment.