Skip to content

Commit

Permalink
Add dynamic allocation of the PFRecHitFraction SoA
Browse files Browse the repository at this point in the history
Fixed to work with latest updates

Fix for case with 0 nRH

Fix bad allocation crash

Remove extra lines

Simplify memcpy to single int

Code checks and formatting
  • Loading branch information
jsamudio committed Sep 26, 2024
1 parent c312b85 commit 12be989
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,55 +8,87 @@
#include "FWCore/Utilities/interface/EDGetToken.h"
#include "FWCore/Utilities/interface/InputTag.h"
#include "FWCore/Utilities/interface/StreamID.h"
#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/EDProducer.h"
#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/SynchronizingEDProducer.h"
#include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h"
#include "RecoParticleFlow/PFClusterProducer/interface/PFCPositionCalculatorBase.h"
#include "RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusterParamsDeviceCollection.h"
#include "RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducerKernel.h"
#include "RecoParticleFlow/PFRecHitProducer/interface/PFRecHitTopologyRecord.h"

namespace ALPAKA_ACCELERATOR_NAMESPACE {
class PFClusterSoAProducer : public stream::EDProducer<> {
class PFClusterSoAProducer : public stream::SynchronizingEDProducer<> {
public:
PFClusterSoAProducer(edm::ParameterSet const& config)
: pfClusParamsToken(esConsumes(config.getParameter<edm::ESInputTag>("pfClusterParams"))),
topologyToken_(esConsumes(config.getParameter<edm::ESInputTag>("topology"))),
inputPFRecHitSoA_Token_{consumes(config.getParameter<edm::InputTag>("pfRecHits"))},
outputPFClusterSoA_Token_{produces()},
outputPFRHFractionSoA_Token_{produces()},
num_rhf_{cms::alpakatools::make_host_buffer<uint32_t>()},
synchronise_(config.getParameter<bool>("synchronise")),
pfRecHitFractionAllocation_(config.getParameter<int>("pfRecHitFractionAllocation")) {}

void produce(device::Event& event, device::EventSetup const& setup) override {
void acquire(device::Event const& event, device::EventSetup const& setup) override {
const reco::PFClusterParamsDeviceCollection& params = setup.getData(pfClusParamsToken);
const reco::PFRecHitHCALTopologyDeviceCollection& topology = setup.getData(topologyToken_);
const reco::PFRecHitHostCollection& pfRecHits = event.get(inputPFRecHitSoA_Token_);
int nRH = 0;
if (pfRecHits->metadata().size() != 0)
nRH = pfRecHits->size();

reco::PFClusteringVarsDeviceCollection pfClusteringVars{nRH, event.queue()};
reco::PFClusteringEdgeVarsDeviceCollection pfClusteringEdgeVars{(nRH * 8), event.queue()};
reco::PFClusterDeviceCollection pfClusters{nRH, event.queue()};
reco::PFRecHitFractionDeviceCollection pfrhFractions{nRH * pfRecHitFractionAllocation_, event.queue()};
pfClusteringVars = std::make_unique<reco::PFClusteringVarsDeviceCollection>(nRH, event.queue());
pfClusteringEdgeVars = std::make_unique<reco::PFClusteringEdgeVarsDeviceCollection>(nRH * 8, event.queue());
pfClusters = std::make_unique<reco::PFClusterDeviceCollection>(nRH, event.queue());

// device buffer to store rhf size
auto size_d = cms::alpakatools::make_device_buffer<uint32_t>(event.queue());
alpaka::memset(event.queue(), size_d, 0x00);

if (nRH != 0) {
PFClusterProducerKernel kernel(event.queue(), pfRecHits);
kernel.step1(event.queue(),
params,
topology,
*pfClusteringVars,
*pfClusteringEdgeVars,
pfRecHits,
*pfClusters,
size_d.data());
}

auto device_num_rhf = cms::alpakatools::make_device_view<uint32_t>(alpaka::getDev(event.queue()), *size_d.data());

alpaka::memcpy(event.queue(), num_rhf_, device_num_rhf);
}

void produce(device::Event& event, device::EventSetup const& setup) override {
const reco::PFClusterParamsDeviceCollection& params = setup.getData(pfClusParamsToken);
const reco::PFRecHitHCALTopologyDeviceCollection& topology = setup.getData(topologyToken_);
const reco::PFRecHitHostCollection& pfRecHits = event.get(inputPFRecHitSoA_Token_);
int nRH = 0;
if (pfRecHits->metadata().size() != 0)
nRH = pfRecHits->size();

if (nRH != 0) {
pfrhFractions = std::make_unique<reco::PFRecHitFractionDeviceCollection>(*num_rhf_.data(), event.queue());
PFClusterProducerKernel kernel(event.queue(), pfRecHits);
kernel.execute(event.queue(),
params,
topology,
pfClusteringVars,
pfClusteringEdgeVars,
pfRecHits,
pfClusters,
pfrhFractions);
kernel.step2(event.queue(),
params,
topology,
*pfClusteringVars,
*pfClusteringEdgeVars,
pfRecHits,
*pfClusters,
*pfrhFractions);
} else {
pfrhFractions = std::make_unique<reco::PFRecHitFractionDeviceCollection>(0, event.queue());
}

if (synchronise_)
alpaka::wait(event.queue());

event.emplace(outputPFClusterSoA_Token_, std::move(pfClusters));
event.emplace(outputPFRHFractionSoA_Token_, std::move(pfrhFractions));
event.emplace(outputPFClusterSoA_Token_, std::move(*pfClusters));
event.emplace(outputPFRHFractionSoA_Token_, std::move(*pfrhFractions));
}

static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
Expand All @@ -75,6 +107,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
const edm::EDGetTokenT<reco::PFRecHitHostCollection> inputPFRecHitSoA_Token_;
const device::EDPutToken<reco::PFClusterDeviceCollection> outputPFClusterSoA_Token_;
const device::EDPutToken<reco::PFRecHitFractionDeviceCollection> outputPFRHFractionSoA_Token_;
cms::alpakatools::host_buffer<uint32_t> num_rhf_;
std::unique_ptr<reco::PFClusteringVarsDeviceCollection> pfClusteringVars;
std::unique_ptr<reco::PFClusteringEdgeVarsDeviceCollection> pfClusteringEdgeVars;
std::unique_ptr<reco::PFClusterDeviceCollection> pfClusters;
std::unique_ptr<reco::PFRecHitFractionDeviceCollection> pfrhFractions;
const bool synchronise_;
const int pfRecHitFractionAllocation_;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1090,7 +1090,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology,
const reco::PFRecHitHostCollection::ConstView pfRecHits,
reco::PFClusterDeviceCollection::View clusterView,
reco::PFRecHitFractionDeviceCollection::View fracView,
uint32_t* __restrict__ nSeeds) const {
const int nRH = pfRecHits.size();

Expand Down Expand Up @@ -1199,7 +1198,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
const reco::PFRecHitHostCollection::ConstView pfRecHits,
reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
reco::PFClusterDeviceCollection::View clusterView,
uint32_t* __restrict__ nSeeds) const {
uint32_t* __restrict__ nSeeds,
uint32_t* __restrict__ size_d) const {
const int nRH = pfRecHits.size();
int& totalSeedOffset = alpaka::declareSharedVar<int, __COUNTER__>(acc);
int& totalSeedFracOffset = alpaka::declareSharedVar<int, __COUNTER__>(acc);
Expand Down Expand Up @@ -1302,6 +1302,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
pfClusteringVars.pcrhFracSize() = totalSeedFracOffset;
pfClusteringVars.nRHFracs() = totalSeedFracOffset;
clusterView.nRHFracs() = totalSeedFracOffset;
*size_d = totalSeedFracOffset;
clusterView.nSeeds() = *nSeeds;
clusterView.nTopos() = pfClusteringVars.nTopos();

Expand Down Expand Up @@ -1467,14 +1468,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
alpaka::memset(queue, nSeeds, 0x00); // Reset nSeeds
}

void PFClusterProducerKernel::execute(Queue& queue,
const reco::PFClusterParamsDeviceCollection& params,
const reco::PFRecHitHCALTopologyDeviceCollection& topology,
reco::PFClusteringVarsDeviceCollection& pfClusteringVars,
reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars,
const reco::PFRecHitHostCollection& pfRecHits,
reco::PFClusterDeviceCollection& pfClusters,
reco::PFRecHitFractionDeviceCollection& pfrhFractions) {
void PFClusterProducerKernel::step1(Queue& queue,
const reco::PFClusterParamsDeviceCollection& params,
const reco::PFRecHitHCALTopologyDeviceCollection& topology,
reco::PFClusteringVarsDeviceCollection& pfClusteringVars,
reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars,
const reco::PFRecHitHostCollection& pfRecHits,
reco::PFClusterDeviceCollection& pfClusters,
uint32_t* __restrict__ size_d) {
const int nRH = pfRecHits->size();
const int threadsPerBlock = 256;
const int blocks = divide_up_by(nRH, threadsPerBlock);
Expand All @@ -1488,7 +1489,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
topology.view(),
pfRecHits.view(),
pfClusters.view(),
pfrhFractions.view(),
nSeeds.data());
// prepareTopoInputs
alpaka::exec<Acc1D>(queue,
Expand Down Expand Up @@ -1524,7 +1524,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
pfRecHits.view(),
pfClusteringVars.view(),
pfClusters.view(),
nSeeds.data());
nSeeds.data(),
size_d);
}

void PFClusterProducerKernel::step2(Queue& queue,
const reco::PFClusterParamsDeviceCollection& params,
const reco::PFRecHitHCALTopologyDeviceCollection& topology,
reco::PFClusteringVarsDeviceCollection& pfClusteringVars,
reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars,
const reco::PFRecHitHostCollection& pfRecHits,
reco::PFClusterDeviceCollection& pfClusters,
reco::PFRecHitFractionDeviceCollection& pfrhFractions) {
const int nRH = pfRecHits->size();

// fillRhfIndex
alpaka::exec<Acc2D>(queue,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "DataFormats/ParticleFlowReco/interface/alpaka/PFRecHitDeviceCollection.h"
#include "DataFormats/ParticleFlowReco/interface/PFRecHitHostCollection.h"
#include "DataFormats/ParticleFlowReco/interface/alpaka/PFClusterDeviceCollection.h"
#include "DataFormats/ParticleFlowReco/interface/PFClusterHostCollection.h"
#include "DataFormats/ParticleFlowReco/interface/alpaka/PFRecHitFractionDeviceCollection.h"
#include "RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusterParamsDeviceCollection.h"
#include "RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusteringVarsDeviceCollection.h"
Expand Down Expand Up @@ -39,14 +40,23 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
public:
PFClusterProducerKernel(Queue& queue, const reco::PFRecHitHostCollection& pfRecHits);

void execute(Queue& queue,
const reco::PFClusterParamsDeviceCollection& params,
const reco::PFRecHitHCALTopologyDeviceCollection& topology,
reco::PFClusteringVarsDeviceCollection& pfClusteringVars,
reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars,
const reco::PFRecHitHostCollection& pfRecHits,
reco::PFClusterDeviceCollection& pfClusters,
reco::PFRecHitFractionDeviceCollection& pfrhFractions);
void step1(Queue& queue,
const reco::PFClusterParamsDeviceCollection& params,
const reco::PFRecHitHCALTopologyDeviceCollection& topology,
reco::PFClusteringVarsDeviceCollection& pfClusteringVars,
reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars,
const reco::PFRecHitHostCollection& pfRecHits,
reco::PFClusterDeviceCollection& pfClusters,
uint32_t* __restrict__ size_d);

void step2(Queue& queue,
const reco::PFClusterParamsDeviceCollection& params,
const reco::PFRecHitHCALTopologyDeviceCollection& topology,
reco::PFClusteringVarsDeviceCollection& pfClusteringVars,
reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars,
const reco::PFRecHitHostCollection& pfRecHits,
reco::PFClusterDeviceCollection& pfClusters,
reco::PFRecHitFractionDeviceCollection& pfrhFractions);

private:
cms::alpakatools::device_buffer<Device, uint32_t> nSeeds;
Expand Down

0 comments on commit 12be989

Please sign in to comment.