From 155d54029c8dfb84ce6849d6219cea23aad002b8 Mon Sep 17 00:00:00 2001 From: jsamudio Date: Thu, 8 Aug 2024 08:29:15 -0500 Subject: [PATCH] Add dynamic allocation of the PFRecHitFraction SoA Fixed to work with latest updates Fix for case with 0 nRH Fix bad allocation crash Remove extra lines Simplify memcpy to single int Code checks and formatting --- .../plugins/alpaka/PFClusterSoAProducer.cc | 71 ++++++++++++++----- .../alpaka/PFClusterSoAProducerKernel.dev.cc | 36 ++++++---- .../alpaka/PFClusterSoAProducerKernel.h | 26 ++++--- 3 files changed, 96 insertions(+), 37 deletions(-) diff --git a/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducer.cc b/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducer.cc index 6017f539364ec..27371b3edd9c6 100644 --- a/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducer.cc +++ b/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducer.cc @@ -8,7 +8,7 @@ #include "FWCore/Utilities/interface/EDGetToken.h" #include "FWCore/Utilities/interface/InputTag.h" #include "FWCore/Utilities/interface/StreamID.h" -#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/EDProducer.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/SynchronizingEDProducer.h" #include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h" #include "RecoParticleFlow/PFClusterProducer/interface/PFCPositionCalculatorBase.h" #include "RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusterParamsDeviceCollection.h" @@ -16,7 +16,7 @@ #include "RecoParticleFlow/PFRecHitProducer/interface/PFRecHitTopologyRecord.h" namespace ALPAKA_ACCELERATOR_NAMESPACE { - class PFClusterSoAProducer : public stream::EDProducer<> { + class PFClusterSoAProducer : public stream::SynchronizingEDProducer<> { public: PFClusterSoAProducer(edm::ParameterSet const& config) : pfClusParamsToken(esConsumes(config.getParameter("pfClusterParams"))), @@ -24,10 +24,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { inputPFRecHitSoA_Token_{consumes(config.getParameter("pfRecHits"))}, outputPFClusterSoA_Token_{produces()}, outputPFRHFractionSoA_Token_{produces()}, + num_rhf_{cms::alpakatools::make_host_buffer()}, synchronise_(config.getParameter("synchronise")), pfRecHitFractionAllocation_(config.getParameter("pfRecHitFractionAllocation")) {} - void produce(device::Event& event, device::EventSetup const& setup) override { + void acquire(device::Event const& event, device::EventSetup const& setup) override { const reco::PFClusterParamsDeviceCollection& params = setup.getData(pfClusParamsToken); const reco::PFRecHitHCALTopologyDeviceCollection& topology = setup.getData(topologyToken_); const reco::PFRecHitHostCollection& pfRecHits = event.get(inputPFRecHitSoA_Token_); @@ -35,28 +36,59 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { if (pfRecHits->metadata().size() != 0) nRH = pfRecHits->size(); - reco::PFClusteringVarsDeviceCollection pfClusteringVars{nRH, event.queue()}; - reco::PFClusteringEdgeVarsDeviceCollection pfClusteringEdgeVars{(nRH * 8), event.queue()}; - reco::PFClusterDeviceCollection pfClusters{nRH, event.queue()}; - reco::PFRecHitFractionDeviceCollection pfrhFractions{nRH * pfRecHitFractionAllocation_, event.queue()}; + pfClusteringVars = std::make_unique(nRH, event.queue()); + pfClusteringEdgeVars = std::make_unique(nRH * 8, event.queue()); + pfClusters = std::make_unique(nRH, event.queue()); + + // device buffer to store rhf size + auto size_d = cms::alpakatools::make_device_buffer(event.queue()); + alpaka::memset(event.queue(), size_d, 0x00); + + if (nRH != 0) { + PFClusterProducerKernel kernel(event.queue(), pfRecHits); + kernel.step1(event.queue(), + params, + topology, + *pfClusteringVars, + *pfClusteringEdgeVars, + pfRecHits, + *pfClusters, + size_d.data()); + } + + auto device_num_rhf = cms::alpakatools::make_device_view(alpaka::getDev(event.queue()), *size_d.data()); + + alpaka::memcpy(event.queue(), num_rhf_, device_num_rhf); + } + + void produce(device::Event& event, device::EventSetup const& setup) override { + const reco::PFClusterParamsDeviceCollection& params = setup.getData(pfClusParamsToken); + const reco::PFRecHitHCALTopologyDeviceCollection& topology = setup.getData(topologyToken_); + const reco::PFRecHitHostCollection& pfRecHits = event.get(inputPFRecHitSoA_Token_); + int nRH = 0; + if (pfRecHits->metadata().size() != 0) + nRH = pfRecHits->size(); if (nRH != 0) { + pfrhFractions = std::make_unique(*num_rhf_.data(), event.queue()); PFClusterProducerKernel kernel(event.queue(), pfRecHits); - kernel.execute(event.queue(), - params, - topology, - pfClusteringVars, - pfClusteringEdgeVars, - pfRecHits, - pfClusters, - pfrhFractions); + kernel.step2(event.queue(), + params, + topology, + *pfClusteringVars, + *pfClusteringEdgeVars, + pfRecHits, + *pfClusters, + *pfrhFractions); + } else { + pfrhFractions = std::make_unique(0, event.queue()); } if (synchronise_) alpaka::wait(event.queue()); - event.emplace(outputPFClusterSoA_Token_, std::move(pfClusters)); - event.emplace(outputPFRHFractionSoA_Token_, std::move(pfrhFractions)); + event.emplace(outputPFClusterSoA_Token_, std::move(*pfClusters)); + event.emplace(outputPFRHFractionSoA_Token_, std::move(*pfrhFractions)); } static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -75,6 +107,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const edm::EDGetTokenT inputPFRecHitSoA_Token_; const device::EDPutToken outputPFClusterSoA_Token_; const device::EDPutToken outputPFRHFractionSoA_Token_; + cms::alpakatools::host_buffer num_rhf_; + std::unique_ptr pfClusteringVars; + std::unique_ptr pfClusteringEdgeVars; + std::unique_ptr pfClusters; + std::unique_ptr pfrhFractions; const bool synchronise_; const int pfRecHitFractionAllocation_; }; diff --git a/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducerKernel.dev.cc b/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducerKernel.dev.cc index 53095381d951b..cb480175cfa7a 100644 --- a/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducerKernel.dev.cc +++ b/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducerKernel.dev.cc @@ -1090,7 +1090,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology, const reco::PFRecHitHostCollection::ConstView pfRecHits, reco::PFClusterDeviceCollection::View clusterView, - reco::PFRecHitFractionDeviceCollection::View fracView, uint32_t* __restrict__ nSeeds) const { const int nRH = pfRecHits.size(); @@ -1199,7 +1198,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const reco::PFRecHitHostCollection::ConstView pfRecHits, reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, reco::PFClusterDeviceCollection::View clusterView, - uint32_t* __restrict__ nSeeds) const { + uint32_t* __restrict__ nSeeds, + uint32_t* __restrict__ size_d) const { const int nRH = pfRecHits.size(); int& totalSeedOffset = alpaka::declareSharedVar(acc); int& totalSeedFracOffset = alpaka::declareSharedVar(acc); @@ -1302,6 +1302,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { pfClusteringVars.pcrhFracSize() = totalSeedFracOffset; pfClusteringVars.nRHFracs() = totalSeedFracOffset; clusterView.nRHFracs() = totalSeedFracOffset; + *size_d = totalSeedFracOffset; clusterView.nSeeds() = *nSeeds; clusterView.nTopos() = pfClusteringVars.nTopos(); @@ -1467,14 +1468,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::memset(queue, nSeeds, 0x00); // Reset nSeeds } - void PFClusterProducerKernel::execute(Queue& queue, - const reco::PFClusterParamsDeviceCollection& params, - const reco::PFRecHitHCALTopologyDeviceCollection& topology, - reco::PFClusteringVarsDeviceCollection& pfClusteringVars, - reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars, - const reco::PFRecHitHostCollection& pfRecHits, - reco::PFClusterDeviceCollection& pfClusters, - reco::PFRecHitFractionDeviceCollection& pfrhFractions) { + void PFClusterProducerKernel::step1(Queue& queue, + const reco::PFClusterParamsDeviceCollection& params, + const reco::PFRecHitHCALTopologyDeviceCollection& topology, + reco::PFClusteringVarsDeviceCollection& pfClusteringVars, + reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars, + const reco::PFRecHitHostCollection& pfRecHits, + reco::PFClusterDeviceCollection& pfClusters, + uint32_t* __restrict__ size_d) { const int nRH = pfRecHits->size(); const int threadsPerBlock = 256; const int blocks = divide_up_by(nRH, threadsPerBlock); @@ -1488,7 +1489,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { topology.view(), pfRecHits.view(), pfClusters.view(), - pfrhFractions.view(), nSeeds.data()); // prepareTopoInputs alpaka::exec(queue, @@ -1524,7 +1524,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { pfRecHits.view(), pfClusteringVars.view(), pfClusters.view(), - nSeeds.data()); + nSeeds.data(), + size_d); + } + + void PFClusterProducerKernel::step2(Queue& queue, + const reco::PFClusterParamsDeviceCollection& params, + const reco::PFRecHitHCALTopologyDeviceCollection& topology, + reco::PFClusteringVarsDeviceCollection& pfClusteringVars, + reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars, + const reco::PFRecHitHostCollection& pfRecHits, + reco::PFClusterDeviceCollection& pfClusters, + reco::PFRecHitFractionDeviceCollection& pfrhFractions) { + const int nRH = pfRecHits->size(); // fillRhfIndex alpaka::exec(queue, diff --git a/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducerKernel.h b/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducerKernel.h index ed9541b1b9af4..b97bc63feeb7b 100644 --- a/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducerKernel.h +++ b/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducerKernel.h @@ -4,6 +4,7 @@ #include "DataFormats/ParticleFlowReco/interface/alpaka/PFRecHitDeviceCollection.h" #include "DataFormats/ParticleFlowReco/interface/PFRecHitHostCollection.h" #include "DataFormats/ParticleFlowReco/interface/alpaka/PFClusterDeviceCollection.h" +#include "DataFormats/ParticleFlowReco/interface/PFClusterHostCollection.h" #include "DataFormats/ParticleFlowReco/interface/alpaka/PFRecHitFractionDeviceCollection.h" #include "RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusterParamsDeviceCollection.h" #include "RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusteringVarsDeviceCollection.h" @@ -39,14 +40,23 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { public: PFClusterProducerKernel(Queue& queue, const reco::PFRecHitHostCollection& pfRecHits); - void execute(Queue& queue, - const reco::PFClusterParamsDeviceCollection& params, - const reco::PFRecHitHCALTopologyDeviceCollection& topology, - reco::PFClusteringVarsDeviceCollection& pfClusteringVars, - reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars, - const reco::PFRecHitHostCollection& pfRecHits, - reco::PFClusterDeviceCollection& pfClusters, - reco::PFRecHitFractionDeviceCollection& pfrhFractions); + void step1(Queue& queue, + const reco::PFClusterParamsDeviceCollection& params, + const reco::PFRecHitHCALTopologyDeviceCollection& topology, + reco::PFClusteringVarsDeviceCollection& pfClusteringVars, + reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars, + const reco::PFRecHitHostCollection& pfRecHits, + reco::PFClusterDeviceCollection& pfClusters, + uint32_t* __restrict__ size_d); + + void step2(Queue& queue, + const reco::PFClusterParamsDeviceCollection& params, + const reco::PFRecHitHCALTopologyDeviceCollection& topology, + reco::PFClusteringVarsDeviceCollection& pfClusteringVars, + reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars, + const reco::PFRecHitHostCollection& pfRecHits, + reco::PFClusterDeviceCollection& pfClusters, + reco::PFRecHitFractionDeviceCollection& pfrhFractions); private: cms::alpakatools::device_buffer nSeeds;