Skip to content

Commit

Permalink
Merge pull request #44915 from AdrianoDee/phase2_alpaka_fixes_140X
Browse files Browse the repository at this point in the history
[14_0_X] Fixes for Alpaka Phase2 Pixel Reco
  • Loading branch information
cmsbuild authored May 10, 2024
2 parents 2466a2b + 4bd3572 commit 8581266
Show file tree
Hide file tree
Showing 9 changed files with 72 additions and 42 deletions.
8 changes: 2 additions & 6 deletions DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,13 @@ class SiPixelDigisDevice : public PortableDeviceCollection<SiPixelDigisSoA, TDev
explicit SiPixelDigisDevice(size_t maxFedWords, TDev const &device)
: PortableDeviceCollection<SiPixelDigisSoA, TDev>(maxFedWords + 1, device) {}

void setNModulesDigis(uint32_t nModules, uint32_t nDigis) {
nModules_h = nModules;
nDigis_h = nDigis;
}
void setNModules(uint32_t nModules) { nModules_h = nModules; }

uint32_t nModules() const { return nModules_h; }
uint32_t nDigis() const { return nDigis_h; }
uint32_t nDigis() const { return this->view().metadata().size() - 1; }

private:
uint32_t nModules_h = 0;
uint32_t nDigis_h = 0;
};

#endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigisDevice_h
8 changes: 2 additions & 6 deletions DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,13 @@ class SiPixelDigisHost : public PortableHostCollection<SiPixelDigisSoA> {
explicit SiPixelDigisHost(size_t maxFedWords, TQueue queue)
: PortableHostCollection<SiPixelDigisSoA>(maxFedWords + 1, queue) {}

void setNModulesDigis(uint32_t nModules, uint32_t nDigis) {
nModules_h = nModules;
nDigis_h = nDigis;
}
void setNModules(uint32_t nModules) { nModules_h = nModules; }

uint32_t nModules() const { return nModules_h; }
uint32_t nDigis() const { return nDigis_h; }
uint32_t nDigis() const { return view().metadata().size() - 1; }

private:
uint32_t nModules_h = 0;
uint32_t nDigis_h = 0;
};

#endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigisHost_h
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ namespace cms::alpakatools {
struct CopyToHost<SiPixelDigisDevice<TDevice>> {
template <typename TQueue>
static auto copyAsync(TQueue &queue, SiPixelDigisDevice<TDevice> const &srcData) {
SiPixelDigisHost dstData(srcData.view().metadata().size(), queue);
SiPixelDigisHost dstData(srcData.view().metadata().size() - 1, queue);
alpaka::memcpy(queue, dstData.buffer(), srcData.buffer());
dstData.setNModulesDigis(srcData.nModules(), srcData.nDigis());
dstData.setNModules(srcData.nModules());
return dstData;
}
};
Expand Down
3 changes: 2 additions & 1 deletion DataFormats/SiPixelDigiSoA/src/classes_def.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
<class name="SiPixelDigisSoA"/>
<class name="SiPixelDigisSoA::View"/>
<class name="PortableHostCollection<SiPixelDigisSoA>"/>
<class name="SiPixelDigisHost" ClassVersion="3">
<class name="SiPixelDigisHost" ClassVersion="4">
<version ClassVersion="4" checksum="2247404879"/>
<version ClassVersion="3" checksum="3022474662"/>
</class>
<class name="edm::Wrapper<SiPixelDigisHost>" splitLevel="0"/>
Expand Down
9 changes: 7 additions & 2 deletions Geometry/CommonTopologies/interface/SimplePixelTopology.h
Original file line number Diff line number Diff line change
Expand Up @@ -380,8 +380,13 @@ namespace pixelTopology {

static constexpr uint16_t numberOfModules = 3892;

// 1024 bins, 10 bits
static constexpr uint16_t clusterBinning = 1024;
// 1000 bins < 1024 bins (10 bits) must be:
// - < 32*32 (warpSize*warpSize for block prefix scan for CUDA)
// - > number of columns (y) in any module. This is due to the fact
// that in pixel clustering we give for granted that in each
// bin we only have the pixel belonging to the same column.
// See RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h#L325-L347
static constexpr uint16_t clusterBinning = 1000;
static constexpr uint16_t clusterBits = 10;

static constexpr uint16_t numberOfModulesInBarrel = 756;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,19 @@ namespace pixelClustering {
const uint32_t numElements) const {
constexpr int32_t maxNumClustersPerModules = TrackerTraits::maxNumClustersPerModules;

#ifdef GPU_DEBUG
if (cms::alpakatools::once_per_grid(acc)) {
printf("All digis before cut: \n");
for (uint32_t i = 0; i < numElements; i++)
printf("%d %d %d %d %d \n",
i,
digi_view[i].rawIdArr(),
digi_view[i].clus(),
digi_view[i].pdigi(),
digi_view[i].adc());
}
#endif

auto& charge = alpaka::declareSharedVar<int32_t[maxNumClustersPerModules], __COUNTER__>(acc);
auto& ok = alpaka::declareSharedVar<uint8_t[maxNumClustersPerModules], __COUNTER__>(acc);
auto& newclusId = alpaka::declareSharedVar<uint16_t[maxNumClustersPerModules], __COUNTER__>(acc);
Expand All @@ -37,6 +50,7 @@ namespace pixelClustering {
ALPAKA_ASSERT_ACC(startBPIX2 < TrackerTraits::numberOfModules);

auto endModule = clus_view[0].moduleStart();

for (auto module : cms::alpakatools::independent_groups(acc, endModule)) {
auto firstPixel = clus_view[1 + module].moduleStart();
auto thisModuleId = digi_view[firstPixel].moduleId();
Expand Down Expand Up @@ -112,8 +126,17 @@ namespace pixelClustering {
newclusId[i] = ok[i] = (charge[i] >= chargeCut) ? 1 : 0;
if (0 == ok[i])
good = false;
#ifdef GPU_DEBUG
printf("Cutting pix %d in module %d newId %d ok? %d charge %d cut %d -> good %d \n",
i,
thisModuleId,
newclusId[i],
ok[i],
charge[i],
chargeCut,
good);
#endif
}

// if all clusters are above threshold, do nothing
if (alpaka::syncBlockThreadsPredicate<alpaka::BlockAnd>(acc, good))
continue;
Expand All @@ -136,6 +159,7 @@ namespace pixelClustering {
alpaka::syncBlockThreads(acc);
}
}

ALPAKA_ASSERT_ACC(nclus >= newclusId[nclus - 1]);

clus_view[thisModuleId].clusInModule() = newclusId[nclus - 1];
Expand All @@ -154,6 +178,18 @@ namespace pixelClustering {

// done
alpaka::syncBlockThreads(acc);
#ifdef GPU_DEBUG
if (cms::alpakatools::once_per_grid(acc)) {
printf("All digis AFTER cut: \n");
for (uint32_t i = 0; i < numElements; i++)
printf("%d %d %d %d %d \n",
i,
digi_view[i].rawIdArr(),
digi_view[i].clus(),
digi_view[i].pdigi(),
digi_view[i].adc());
}
#endif
}
}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
const edm::EDGetTokenT<edm::DetSetVector<PixelDigi>> pixelDigiToken_;

device::EDPutToken<SiPixelDigisSoACollection> digiPutToken_;
device::EDPutToken<SiPixelDigiErrorsSoACollection> digiErrorPutToken_;
device::EDPutToken<SiPixelClustersSoACollection> clusterPutToken_;

Algo Algo_;

const bool includeErrors_;
const SiPixelClusterThresholds clusterThresholds_;
uint32_t nDigis_ = 0;

Expand All @@ -62,17 +60,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
pixelDigiToken_(consumes<edm::DetSetVector<PixelDigi>>(iConfig.getParameter<edm::InputTag>("InputDigis"))),
digiPutToken_(produces()),
clusterPutToken_(produces()),
includeErrors_(iConfig.getParameter<bool>("IncludeErrors")),
clusterThresholds_{iConfig.getParameter<int32_t>("clusterThreshold_layer1"),
iConfig.getParameter<int32_t>("clusterThreshold_otherLayers"),
static_cast<float>(iConfig.getParameter<double>("ElectronPerADCGain")),
static_cast<int8_t>(iConfig.getParameter<int>("Phase2ReadoutMode")),
static_cast<uint16_t>(iConfig.getParameter<uint32_t>("Phase2DigiBaseline")),
static_cast<uint8_t>(iConfig.getParameter<uint32_t>("Phase2KinkADC"))} {
if (includeErrors_) {
digiErrorPutToken_ = produces();
}
}
static_cast<uint8_t>(iConfig.getParameter<uint32_t>("Phase2KinkADC"))} {}

void SiPixelPhase2DigiToCluster::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
edm::ParameterSetDescription desc;
Expand Down Expand Up @@ -100,11 +93,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
nDigis += det.size();
}

if (nDigis_ == 0)
if (nDigis == 0)
return;

SiPixelDigisHost digis_h(nDigis, iEvent.queue());
nDigis_ = nDigis;
SiPixelDigisHost digis_h(nDigis_, iEvent.queue());

nDigis = 0;
for (const auto& det : input) {
Expand All @@ -119,6 +112,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
digis_h.view()[nDigis].yy() = uint16_t(px.column());
digis_h.view()[nDigis].adc() = uint16_t(px.adc());

digis_h.view()[nDigis].clus() = 0;

digis_h.view()[nDigis].pdigi() = uint32_t(px.packedData());

digis_h.view()[nDigis].rawIdArr() = uint32_t(detid);
Expand All @@ -130,27 +125,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
digis_d = SiPixelDigisSoACollection(nDigis, iEvent.queue());
alpaka::memcpy(iEvent.queue(), digis_d.buffer(), digis_h.buffer());

Algo_.makePhase2ClustersAsync(iEvent.queue(), clusterThresholds_, digis_d.view(), nDigis);
Algo_.makePhase2ClustersAsync(iEvent.queue(), clusterThresholds_, digis_d.view(), nDigis_);
}

void SiPixelPhase2DigiToCluster::produce(device::Event& iEvent, device::EventSetup const& iSetup) {
if (nDigis_ == 0) {
SiPixelClustersSoACollection clusters_d{pixelTopology::Phase1::numberOfModules, iEvent.queue()};
iEvent.emplace(digiPutToken_, std::move(digis_d));
SiPixelClustersSoACollection clusters_d{pixelTopology::Phase2::numberOfModules, iEvent.queue()};
SiPixelDigisSoACollection digis_d_zero{nDigis_, iEvent.queue()};
iEvent.emplace(digiPutToken_, std::move(digis_d_zero));
iEvent.emplace(clusterPutToken_, std::move(clusters_d));
if (includeErrors_) {
iEvent.emplace(digiErrorPutToken_, SiPixelDigiErrorsSoACollection());
}
return;
}

digis_d.setNModulesDigis(Algo_.nModules(), nDigis_);

digis_d.setNModules(Algo_.nModules());
iEvent.emplace(digiPutToken_, std::move(digis_d));
iEvent.emplace(clusterPutToken_, Algo_.getClusters());
if (includeErrors_) {
iEvent.emplace(digiErrorPutToken_, Algo_.getErrors());
}
}

} // namespace ALPAKA_ACCELERATOR_NAMESPACE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
struct FillHitsModuleStart {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const {
ALPAKA_ASSERT_ACC(TrackerTraits::numberOfModules < 2048); // easy to extend at least till 32*1024
constexpr bool isPhase2 = std::is_base_of<pixelTopology::Phase2, TrackerTraits>::value;

// For Phase1 there are 1856 pixel modules
// For Phase2 there are 3872 pixel modules
// For whichever setup with more modules it would be
// easy to extend at least till 32*1024

constexpr uint16_t prefixScanUpperLimit = isPhase2 ? 4096 : 2048;
ALPAKA_ASSERT_ACC(TrackerTraits::numberOfModules < prefixScanUpperLimit);

constexpr int numberOfModules = TrackerTraits::numberOfModules;
constexpr uint32_t maxHitsInModule = TrackerTraits::maxHitsInModule;
Expand All @@ -449,7 +457,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
clus_view[i + 1].clusModuleStart() = std::min(maxHitsInModule, clus_view[i].clusInModule());
}

constexpr bool isPhase2 = std::is_base_of<pixelTopology::Phase2, TrackerTraits>::value;
constexpr auto leftModules = isPhase2 ? 1024 : numberOfModules - 1024;

auto &&ws = alpaka::declareSharedVar<uint32_t[32], __COUNTER__>(acc);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
const uint32_t numDigis);

SiPixelDigisSoACollection getDigis() {
digis_d->setNModulesDigis(nModules_Clusters_h[0], nDigis);
digis_d->setNModules(nModules_Clusters_h[0]);
return std::move(*digis_d);
}

Expand Down

0 comments on commit 8581266

Please sign in to comment.