From 462d0159aa4f439916b5411ea63b5b90d037c5b2 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Mon, 3 Oct 2022 13:51:46 +0200 Subject: [PATCH 1/3] Reduce ECAL memory usage --- RecoLocalCalo/EcalRecProducers/plugins/DeclsForKernels.h | 6 ++---- .../plugins/EcalUncalibRecHitProducerGPU.cc | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/RecoLocalCalo/EcalRecProducers/plugins/DeclsForKernels.h b/RecoLocalCalo/EcalRecProducers/plugins/DeclsForKernels.h index b1e1dafdb7496..e3a07dc6961c5 100644 --- a/RecoLocalCalo/EcalRecProducers/plugins/DeclsForKernels.h +++ b/RecoLocalCalo/EcalRecProducers/plugins/DeclsForKernels.h @@ -98,8 +98,7 @@ namespace ecal { struct EventOutputDataGPU { UncalibratedRecHit<::calo::common::DevStoragePolicy> recHitsEB, recHitsEE; - void allocate(ConfigurationParameters const& configParameters, cudaStream_t cudaStream) { - auto const sizeEB = configParameters.maxNumberHitsEB; + void allocate(int sizeEB, int sizeEE, ConfigurationParameters const& configParameters, cudaStream_t cudaStream) { recHitsEB.amplitudesAll = cms::cuda::make_device_unique( sizeEB * EcalDataFrame::MAXSAMPLES, cudaStream); recHitsEB.amplitude = cms::cuda::make_device_unique(sizeEB, cudaStream); @@ -114,7 +113,6 @@ namespace ecal { recHitsEB.did = cms::cuda::make_device_unique(sizeEB, cudaStream); recHitsEB.flags = cms::cuda::make_device_unique(sizeEB, cudaStream); - auto const sizeEE = configParameters.maxNumberHitsEE; recHitsEE.amplitudesAll = cms::cuda::make_device_unique( sizeEE * EcalDataFrame::MAXSAMPLES, cudaStream); recHitsEE.amplitude = cms::cuda::make_device_unique(sizeEE, cudaStream); @@ -165,7 +163,7 @@ namespace ecal { cms::cuda::device::unique_ptr timeMax, timeError; cms::cuda::device::unique_ptr tcState; - void allocate(ConfigurationParameters const& configParameters, cudaStream_t cudaStream) { + void allocate(int size, ConfigurationParameters const& configParameters, cudaStream_t cudaStream) { constexpr auto svlength = getLength(); constexpr auto sgvlength = getLength(); constexpr auto smlength = getLength(); diff --git a/RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitProducerGPU.cc b/RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitProducerGPU.cc index b05dc828d7ad7..db618c9cdadb1 100644 --- a/RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitProducerGPU.cc +++ b/RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitProducerGPU.cc @@ -247,11 +247,11 @@ void EcalUncalibRecHitProducerGPU::produce(edm::Event& event, edm::EventSetup co multifitParameters}; // dev mem - eventOutputDataGPU.allocate(configParameters_, ctx.stream()); + eventOutputDataGPU.allocate(neb_, nee_, configParameters_, ctx.stream()); // scratch mem ecal::multifit::EventDataForScratchGPU eventDataForScratchGPU; - eventDataForScratchGPU.allocate(configParameters_, ctx.stream()); + eventDataForScratchGPU.allocate(neb_ + nee_, configParameters_, ctx.stream()); // // schedule algorithms From eaa3636a4076ef0e95bccd35ffa784d213704e0c Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Mon, 3 Oct 2022 13:52:16 +0200 Subject: [PATCH 2/3] Reduce the ECAL and HCAL GPU memory usage Allocate memory buffers based on the actual number of events, instead of always allocating the maximum size. --- .../plugins/DeclsForKernels.h | 24 ++++++++++-------- .../plugins/EcalRecHitProducerGPU.cc | 14 +---------- .../plugins/EcalUncalibRecHitProducerGPU.cc | 17 ++----------- .../python/ecalRecHitGPU_cfi.py | 3 --- .../HcalRecProducers/src/DeclsForKernels.h | 13 +++++----- .../src/HBHERecHitProducerGPU.cc | 25 +++++++------------ 6 files changed, 31 insertions(+), 65 deletions(-) diff --git a/RecoLocalCalo/EcalRecProducers/plugins/DeclsForKernels.h b/RecoLocalCalo/EcalRecProducers/plugins/DeclsForKernels.h index e3a07dc6961c5..cbd28df94eb42 100644 --- a/RecoLocalCalo/EcalRecProducers/plugins/DeclsForKernels.h +++ b/RecoLocalCalo/EcalRecProducers/plugins/DeclsForKernels.h @@ -90,15 +90,15 @@ namespace ecal { std::array kernelMinimizeThreads; bool shouldRunTimingComputation; - - uint32_t maxNumberHitsEB; - uint32_t maxNumberHitsEE; }; struct EventOutputDataGPU { UncalibratedRecHit<::calo::common::DevStoragePolicy> recHitsEB, recHitsEE; - void allocate(int sizeEB, int sizeEE, ConfigurationParameters const& configParameters, cudaStream_t cudaStream) { + void allocate(ConfigurationParameters const& configParameters, + uint32_t sizeEB, + uint32_t sizeEE, + cudaStream_t cudaStream) { recHitsEB.amplitudesAll = cms::cuda::make_device_unique( sizeEB * EcalDataFrame::MAXSAMPLES, cudaStream); recHitsEB.amplitude = cms::cuda::make_device_unique(sizeEB, cudaStream); @@ -163,13 +163,16 @@ namespace ecal { cms::cuda::device::unique_ptr timeMax, timeError; cms::cuda::device::unique_ptr tcState; - void allocate(int size, ConfigurationParameters const& configParameters, cudaStream_t cudaStream) { + void allocate(ConfigurationParameters const& configParameters, + uint32_t sizeEB, + uint32_t sizeEE, + cudaStream_t cudaStream) { constexpr auto svlength = getLength(); constexpr auto sgvlength = getLength(); constexpr auto smlength = getLength(); constexpr auto pmlength = getLength(); constexpr auto bxvlength = getLength(); - auto const size = configParameters.maxNumberHitsEB + configParameters.maxNumberHitsEE; + auto const size = sizeEB + sizeEE; auto alloc = [cudaStream](auto& var, uint32_t size) { using element_type = typename std::remove_reference_t::element_type; @@ -271,16 +274,16 @@ namespace ecal { uint32_t expanded_v_DB_reco_flagsSize; uint32_t flagmask; - uint32_t maxNumberHitsEB; - uint32_t maxNumberHitsEE; }; struct EventOutputDataGPU { RecHit<::calo::common::DevStoragePolicy> recHitsEB, recHitsEE; - void allocate(ConfigurationParameters const& configParameters, cudaStream_t cudaStream) { + void allocate(ConfigurationParameters const& configParameters, + uint32_t sizeEB, + uint32_t sizeEE, + cudaStream_t cudaStream) { //---- configParameters -> needed only to decide if to save the timing information or not - auto const sizeEB = configParameters.maxNumberHitsEB; recHitsEB.energy = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEB, cudaStream); recHitsEB.time = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEB, cudaStream); recHitsEB.chi2 = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEB, cudaStream); @@ -288,7 +291,6 @@ namespace ecal { recHitsEB.extra = cms::cuda::make_device_unique(sizeEB, cudaStream); recHitsEB.did = cms::cuda::make_device_unique(sizeEB, cudaStream); - auto const sizeEE = configParameters.maxNumberHitsEE; recHitsEE.energy = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEE, cudaStream); recHitsEE.time = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEE, cudaStream); recHitsEE.chi2 = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEE, cudaStream); diff --git a/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitProducerGPU.cc b/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitProducerGPU.cc index 130224547e8e7..c08d27c4ad196 100644 --- a/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitProducerGPU.cc +++ b/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitProducerGPU.cc @@ -103,9 +103,6 @@ void EcalRecHitProducerGPU::fillDescriptions(edm::ConfigurationDescriptions& con desc.add("EELaserMIN", 0.01); desc.add("EBLaserMAX", 30.0); desc.add("EELaserMAX", 30.0); - - desc.add("maxNumberHitsEB", 61200); - desc.add("maxNumberHitsEE", 14648); } EcalRecHitProducerGPU::EcalRecHitProducerGPU(const edm::ParameterSet& ps) { @@ -125,10 +122,6 @@ EcalRecHitProducerGPU::EcalRecHitProducerGPU(const edm::ParameterSet& ps) { configParameters_.EBLaserMAX = ps.getParameter("EBLaserMAX"); configParameters_.EELaserMAX = ps.getParameter("EELaserMAX"); - // max number of digis to allocate for - configParameters_.maxNumberHitsEB = ps.getParameter("maxNumberHitsEB"); - configParameters_.maxNumberHitsEE = ps.getParameter("maxNumberHitsEE"); - flagmask_ = 0; flagmask_ |= 0x1 << EcalRecHit::kNeighboursRecovered; flagmask_ |= 0x1 << EcalRecHit::kTowerRecovered; @@ -182,11 +175,6 @@ void EcalRecHitProducerGPU::acquire(edm::Event const& event, if (neb_ + nee_ == 0) return; - if ((neb_ > configParameters_.maxNumberHitsEB) || (nee_ > configParameters_.maxNumberHitsEE)) { - edm::LogError("EcalRecHitProducerGPU") - << "max number of channels exceeded. See options 'maxNumberHitsEB and maxNumberHitsEE' "; - } - int nchannelsEB = ebUncalibRecHits.size; // --> offsetForInput, first EB and then EE // conditions @@ -227,7 +215,7 @@ void EcalRecHitProducerGPU::acquire(edm::Event const& event, IntercalibConstantsHandle_->getOffset()}; // dev mem - eventOutputDataGPU_.allocate(configParameters_, ctx.stream()); + eventOutputDataGPU_.allocate(configParameters_, neb_, nee_, ctx.stream()); // // schedule algorithms diff --git a/RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitProducerGPU.cc b/RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitProducerGPU.cc index db618c9cdadb1..d7b03339724ab 100644 --- a/RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitProducerGPU.cc +++ b/RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitProducerGPU.cc @@ -88,8 +88,6 @@ void EcalUncalibRecHitProducerGPU::fillDescriptions(edm::ConfigurationDescriptio desc.add("outOfTimeThresholdGain61mEE", 1000); desc.add("amplitudeThresholdEB", 10); desc.add("amplitudeThresholdEE", 10); - desc.add("maxNumberHitsEB", 61200); - desc.add("maxNumberHitsEE", 14648); desc.addUntracked>("kernelMinimizeThreads", {32, 1, 1}); desc.add("shouldRunTimingComputation", true); confDesc.addWithDefaultLabel(desc); @@ -132,10 +130,6 @@ EcalUncalibRecHitProducerGPU::EcalUncalibRecHitProducerGPU(const edm::ParameterS auto amplitudeThreshEB = ps.getParameter("amplitudeThresholdEB"); auto amplitudeThreshEE = ps.getParameter("amplitudeThresholdEE"); - // max number of digis to allocate for - configParameters_.maxNumberHitsEB = ps.getParameter("maxNumberHitsEB"); - configParameters_.maxNumberHitsEE = ps.getParameter("maxNumberHitsEE"); - // switch to run timing computation kernels configParameters_.shouldRunTimingComputation = ps.getParameter("shouldRunTimingComputation"); @@ -203,13 +197,6 @@ void EcalUncalibRecHitProducerGPU::produce(edm::Event& event, edm::EventSetup co // stop here if there are no digis if (neb + nee > 0) { - if ((neb > configParameters_.maxNumberHitsEB) || (nee > configParameters_.maxNumberHitsEE)) { - edm::LogError("EcalUncalibRecHitProducerGPU") - << "Max number of channels exceeded in barrel or endcap. Number of barrel channels: " << neb - << " with maxNumberHitsEB=" << configParameters_.maxNumberHitsEB << ", number of endcap channels: " << nee - << " with maxNumberHitsEE=" << configParameters_.maxNumberHitsEE; - } - // conditions auto const& timeCalibConstantsData = setup.getData(timeCalibConstantsToken_); auto const& sampleMaskData = setup.getData(sampleMaskToken_); @@ -247,11 +234,11 @@ void EcalUncalibRecHitProducerGPU::produce(edm::Event& event, edm::EventSetup co multifitParameters}; // dev mem - eventOutputDataGPU.allocate(neb_, nee_, configParameters_, ctx.stream()); + eventOutputDataGPU.allocate(configParameters_, neb, nee, ctx.stream()); // scratch mem ecal::multifit::EventDataForScratchGPU eventDataForScratchGPU; - eventDataForScratchGPU.allocate(neb_ + nee_, configParameters_, ctx.stream()); + eventDataForScratchGPU.allocate(configParameters_, neb, nee, ctx.stream()); // // schedule algorithms diff --git a/RecoLocalCalo/EcalRecProducers/python/ecalRecHitGPU_cfi.py b/RecoLocalCalo/EcalRecProducers/python/ecalRecHitGPU_cfi.py index a9b5599fd970f..3e60b9e46f248 100644 --- a/RecoLocalCalo/EcalRecProducers/python/ecalRecHitGPU_cfi.py +++ b/RecoLocalCalo/EcalRecProducers/python/ecalRecHitGPU_cfi.py @@ -11,9 +11,6 @@ recHitsLabelEB = cms.string("EcalRecHitsEB"), recHitsLabelEE = cms.string("EcalRecHitsEE"), - maxNumberHitsEB = cms.uint32(61200), - maxNumberHitsEE = cms.uint32(14648), - ## db statuses to be exluded from reconstruction (some will be recovered) ChannelStatusToBeExcluded = cms.vstring( 'kDAC', 'kNoisy', diff --git a/RecoLocalCalo/HcalRecProducers/src/DeclsForKernels.h b/RecoLocalCalo/HcalRecProducers/src/DeclsForKernels.h index aa7ab64553a94..debfdfe3cb49c 100644 --- a/RecoLocalCalo/HcalRecProducers/src/DeclsForKernels.h +++ b/RecoLocalCalo/HcalRecProducers/src/DeclsForKernels.h @@ -68,7 +68,6 @@ namespace hcal { }; struct ConfigParameters { - uint32_t maxChannels; uint32_t maxTimeSamples; uint32_t kprep1dChannelsPerBlock; int sipmQTSShift; @@ -93,12 +92,12 @@ namespace hcal { struct OutputDataGPU { RecHitCollection<::calo::common::DevStoragePolicy> recHits; - void allocate(ConfigParameters const& config, cudaStream_t cudaStream) { - recHits.energy = cms::cuda::make_device_unique(config.maxChannels, cudaStream); - recHits.chi2 = cms::cuda::make_device_unique(config.maxChannels, cudaStream); - recHits.energyM0 = cms::cuda::make_device_unique(config.maxChannels, cudaStream); - recHits.timeM0 = cms::cuda::make_device_unique(config.maxChannels, cudaStream); - recHits.did = cms::cuda::make_device_unique(config.maxChannels, cudaStream); + void allocate(ConfigParameters const& config, uint32_t size, cudaStream_t cudaStream) { + recHits.energy = cms::cuda::make_device_unique(size, cudaStream); + recHits.chi2 = cms::cuda::make_device_unique(size, cudaStream); + recHits.energyM0 = cms::cuda::make_device_unique(size, cudaStream); + recHits.timeM0 = cms::cuda::make_device_unique(size, cudaStream); + recHits.did = cms::cuda::make_device_unique(size, cudaStream); } }; diff --git a/RecoLocalCalo/HcalRecProducers/src/HBHERecHitProducerGPU.cc b/RecoLocalCalo/HcalRecProducers/src/HBHERecHitProducerGPU.cc index e86942457980c..b598947423adf 100644 --- a/RecoLocalCalo/HcalRecProducers/src/HBHERecHitProducerGPU.cc +++ b/RecoLocalCalo/HcalRecProducers/src/HBHERecHitProducerGPU.cc @@ -80,7 +80,6 @@ HBHERecHitProducerGPU::HBHERecHitProducerGPU(edm::ParameterSet const& ps) sipmCharacteristicsToken_{esConsumes()}, chQualProductToken_{esConsumes()}, pulseOffsetsToken_{esConsumes()} { - configParameters_.maxChannels = ps.getParameter("maxChannels"); configParameters_.maxTimeSamples = ps.getParameter("maxTimeSamples"); configParameters_.kprep1dChannelsPerBlock = ps.getParameter("kprep1dChannelsPerBlock"); configParameters_.sipmQTSShift = ps.getParameter("sipmQTSShift"); @@ -115,7 +114,6 @@ HBHERecHitProducerGPU::~HBHERecHitProducerGPU() {} void HBHERecHitProducerGPU::fillDescriptions(edm::ConfigurationDescriptions& cdesc) { edm::ParameterSetDescription desc; - desc.add("maxChannels", 10000u); desc.add("maxTimeSamples", 10); desc.add("kprep1dChannelsPerBlock", 32); desc.add("digisLabelF01HE", edm::InputTag{"hcalRawToDigiGPU", "f01HEDigisGPU"}); @@ -156,6 +154,7 @@ void HBHERecHitProducerGPU::acquire(edm::Event const& event, auto const& f01HEDigis = ctx.get(f01HEProduct); auto const& f5HBDigis = ctx.get(f5HBProduct); auto const& f3HBDigis = ctx.get(f3HBProduct); + auto const totalChannels = f01HEDigis.size + f5HBDigis.size + f3HBDigis.size; hcal::reconstruction::InputDataGPU inputGPU{f01HEDigis, f5HBDigis, f3HBDigis}; @@ -225,26 +224,20 @@ void HBHERecHitProducerGPU::acquire(edm::Event const& event, // scratch mem on device hcal::reconstruction::ScratchDataGPU scratchGPU = { - cms::cuda::make_device_unique(configParameters_.maxChannels * configParameters_.maxTimeSamples, - ctx.stream()), - cms::cuda::make_device_unique(configParameters_.maxChannels * configParameters_.maxTimeSamples, - ctx.stream()), - cms::cuda::make_device_unique(configParameters_.maxChannels * configParameters_.maxTimeSamples, - ctx.stream()), + cms::cuda::make_device_unique(totalChannels * configParameters_.maxTimeSamples, ctx.stream()), + cms::cuda::make_device_unique(totalChannels * configParameters_.maxTimeSamples, ctx.stream()), + cms::cuda::make_device_unique(totalChannels * configParameters_.maxTimeSamples, ctx.stream()), cms::cuda::make_device_unique( - configParameters_.maxChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples, - ctx.stream()), + totalChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples, ctx.stream()), cms::cuda::make_device_unique( - configParameters_.maxChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples, - ctx.stream()), + totalChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples, ctx.stream()), cms::cuda::make_device_unique( - configParameters_.maxChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples, - ctx.stream()), - cms::cuda::make_device_unique(configParameters_.maxChannels, ctx.stream()), + totalChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples, ctx.stream()), + cms::cuda::make_device_unique(totalChannels, ctx.stream()), }; // output dev mem - outputGPU_.allocate(configParameters_, ctx.stream()); + outputGPU_.allocate(configParameters_, totalChannels, ctx.stream()); hcal::reconstruction::entryPoint(inputGPU, outputGPU_, conditions, scratchGPU, configParameters_, ctx.stream()); From af7913f572141fbd8fab1cdea5b895f291d64a27 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 2 Jun 2022 17:27:47 +0200 Subject: [PATCH 3/3] Remove the obsolete ECAL and HCAL rechit parameters from the HLT menu --- .../python/customizeHLTforCMSSW.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/HLTrigger/Configuration/python/customizeHLTforCMSSW.py b/HLTrigger/Configuration/python/customizeHLTforCMSSW.py index 53b007a50b775..4037114381ccd 100644 --- a/HLTrigger/Configuration/python/customizeHLTforCMSSW.py +++ b/HLTrigger/Configuration/python/customizeHLTforCMSSW.py @@ -211,6 +211,28 @@ def customiseForOffline(process): return process +# Reduce the ECAL and HCAL GPU memory usage (#39579) +# Remove the obsolete configuration parameters +def customizeHLTfor39579(process): + for producer in producers_by_type(process, "EcalUncalibRecHitProducerGPU"): + if hasattr(producer, "maxNumberHitsEB"): + delattr(producer, "maxNumberHitsEB") + if hasattr(producer, "maxNumberHitsEE"): + delattr(producer, "maxNumberHitsEE") + + for producer in producers_by_type(process, "EcalRecHitProducerGPU"): + if hasattr(producer, "maxNumberHitsEB"): + delattr(producer, "maxNumberHitsEB") + if hasattr(producer, "maxNumberHitsEE"): + delattr(producer, "maxNumberHitsEE") + + for producer in producers_by_type(process, "HBHERecHitProducerGPU"): + if hasattr(producer, "maxChannels"): + delattr(producer, "maxChannels") + + return process + + # CMSSW version specific customizations def customizeHLTforCMSSW(process, menuType="GRun"): @@ -218,5 +240,6 @@ def customizeHLTforCMSSW(process, menuType="GRun"): # add call to action function in proper order: newest last! # process = customiseFor12718(process) + process = customizeHLTfor39579(process) return process