Skip to content

Commit

Permalink
Merge pull request #39579 from fwyzard/reduce_ECAL_HCAL_GPU_memory_us…
Browse files Browse the repository at this point in the history
…age_125x

Reduce the ECAL and HCAL GPU memory usage [12.5.x]
  • Loading branch information
cmsbuild authored Oct 6, 2022
2 parents 980f9a3 + af7913f commit 96b4264
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 67 deletions.
23 changes: 23 additions & 0 deletions HLTrigger/Configuration/python/customizeHLTforCMSSW.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,12 +211,35 @@ def customiseForOffline(process):
return process


# Reduce the ECAL and HCAL GPU memory usage (#39579)
# Remove the obsolete configuration parameters
def customizeHLTfor39579(process):
for producer in producers_by_type(process, "EcalUncalibRecHitProducerGPU"):
if hasattr(producer, "maxNumberHitsEB"):
delattr(producer, "maxNumberHitsEB")
if hasattr(producer, "maxNumberHitsEE"):
delattr(producer, "maxNumberHitsEE")

for producer in producers_by_type(process, "EcalRecHitProducerGPU"):
if hasattr(producer, "maxNumberHitsEB"):
delattr(producer, "maxNumberHitsEB")
if hasattr(producer, "maxNumberHitsEE"):
delattr(producer, "maxNumberHitsEE")

for producer in producers_by_type(process, "HBHERecHitProducerGPU"):
if hasattr(producer, "maxChannels"):
delattr(producer, "maxChannels")

return process


# CMSSW version specific customizations
def customizeHLTforCMSSW(process, menuType="GRun"):

process = customiseForOffline(process)

# add call to action function in proper order: newest last!
# process = customiseFor12718(process)
process = customizeHLTfor39579(process)

return process
26 changes: 13 additions & 13 deletions RecoLocalCalo/EcalRecProducers/plugins/DeclsForKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,16 +90,15 @@ namespace ecal {
std::array<uint32_t, 3> kernelMinimizeThreads;

bool shouldRunTimingComputation;

uint32_t maxNumberHitsEB;
uint32_t maxNumberHitsEE;
};

struct EventOutputDataGPU {
UncalibratedRecHit<::calo::common::DevStoragePolicy> recHitsEB, recHitsEE;

void allocate(ConfigurationParameters const& configParameters, cudaStream_t cudaStream) {
auto const sizeEB = configParameters.maxNumberHitsEB;
void allocate(ConfigurationParameters const& configParameters,
uint32_t sizeEB,
uint32_t sizeEE,
cudaStream_t cudaStream) {
recHitsEB.amplitudesAll = cms::cuda::make_device_unique<reco::ComputationScalarType[]>(
sizeEB * EcalDataFrame::MAXSAMPLES, cudaStream);
recHitsEB.amplitude = cms::cuda::make_device_unique<reco::StorageScalarType[]>(sizeEB, cudaStream);
Expand All @@ -114,7 +113,6 @@ namespace ecal {
recHitsEB.did = cms::cuda::make_device_unique<uint32_t[]>(sizeEB, cudaStream);
recHitsEB.flags = cms::cuda::make_device_unique<uint32_t[]>(sizeEB, cudaStream);

auto const sizeEE = configParameters.maxNumberHitsEE;
recHitsEE.amplitudesAll = cms::cuda::make_device_unique<reco::ComputationScalarType[]>(
sizeEE * EcalDataFrame::MAXSAMPLES, cudaStream);
recHitsEE.amplitude = cms::cuda::make_device_unique<reco::StorageScalarType[]>(sizeEE, cudaStream);
Expand Down Expand Up @@ -165,13 +163,16 @@ namespace ecal {
cms::cuda::device::unique_ptr<SVT[]> timeMax, timeError;
cms::cuda::device::unique_ptr<TimeComputationState[]> tcState;

void allocate(ConfigurationParameters const& configParameters, cudaStream_t cudaStream) {
void allocate(ConfigurationParameters const& configParameters,
uint32_t sizeEB,
uint32_t sizeEE,
cudaStream_t cudaStream) {
constexpr auto svlength = getLength<SampleVector>();
constexpr auto sgvlength = getLength<SampleGainVector>();
constexpr auto smlength = getLength<SampleMatrix>();
constexpr auto pmlength = getLength<PulseMatrixType>();
constexpr auto bxvlength = getLength<BXVectorType>();
auto const size = configParameters.maxNumberHitsEB + configParameters.maxNumberHitsEE;
auto const size = sizeEB + sizeEE;

auto alloc = [cudaStream](auto& var, uint32_t size) {
using element_type = typename std::remove_reference_t<decltype(var)>::element_type;
Expand Down Expand Up @@ -273,24 +274,23 @@ namespace ecal {
uint32_t expanded_v_DB_reco_flagsSize;

uint32_t flagmask;
uint32_t maxNumberHitsEB;
uint32_t maxNumberHitsEE;
};

struct EventOutputDataGPU {
RecHit<::calo::common::DevStoragePolicy> recHitsEB, recHitsEE;

void allocate(ConfigurationParameters const& configParameters, cudaStream_t cudaStream) {
void allocate(ConfigurationParameters const& configParameters,
uint32_t sizeEB,
uint32_t sizeEE,
cudaStream_t cudaStream) {
//---- configParameters -> needed only to decide if to save the timing information or not
auto const sizeEB = configParameters.maxNumberHitsEB;
recHitsEB.energy = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEB, cudaStream);
recHitsEB.time = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEB, cudaStream);
recHitsEB.chi2 = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEB, cudaStream);
recHitsEB.flagBits = cms::cuda::make_device_unique<uint32_t[]>(sizeEB, cudaStream);
recHitsEB.extra = cms::cuda::make_device_unique<uint32_t[]>(sizeEB, cudaStream);
recHitsEB.did = cms::cuda::make_device_unique<uint32_t[]>(sizeEB, cudaStream);

auto const sizeEE = configParameters.maxNumberHitsEE;
recHitsEE.energy = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEE, cudaStream);
recHitsEE.time = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEE, cudaStream);
recHitsEE.chi2 = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEE, cudaStream);
Expand Down
14 changes: 1 addition & 13 deletions RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitProducerGPU.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,6 @@ void EcalRecHitProducerGPU::fillDescriptions(edm::ConfigurationDescriptions& con
desc.add<double>("EELaserMIN", 0.01);
desc.add<double>("EBLaserMAX", 30.0);
desc.add<double>("EELaserMAX", 30.0);

desc.add<uint32_t>("maxNumberHitsEB", 61200);
desc.add<uint32_t>("maxNumberHitsEE", 14648);
}

EcalRecHitProducerGPU::EcalRecHitProducerGPU(const edm::ParameterSet& ps) {
Expand All @@ -125,10 +122,6 @@ EcalRecHitProducerGPU::EcalRecHitProducerGPU(const edm::ParameterSet& ps) {
configParameters_.EBLaserMAX = ps.getParameter<double>("EBLaserMAX");
configParameters_.EELaserMAX = ps.getParameter<double>("EELaserMAX");

// max number of digis to allocate for
configParameters_.maxNumberHitsEB = ps.getParameter<uint32_t>("maxNumberHitsEB");
configParameters_.maxNumberHitsEE = ps.getParameter<uint32_t>("maxNumberHitsEE");

flagmask_ = 0;
flagmask_ |= 0x1 << EcalRecHit::kNeighboursRecovered;
flagmask_ |= 0x1 << EcalRecHit::kTowerRecovered;
Expand Down Expand Up @@ -182,11 +175,6 @@ void EcalRecHitProducerGPU::acquire(edm::Event const& event,
if (neb_ + nee_ == 0)
return;

if ((neb_ > configParameters_.maxNumberHitsEB) || (nee_ > configParameters_.maxNumberHitsEE)) {
edm::LogError("EcalRecHitProducerGPU")
<< "max number of channels exceeded. See options 'maxNumberHitsEB and maxNumberHitsEE' ";
}

int nchannelsEB = ebUncalibRecHits.size; // --> offsetForInput, first EB and then EE

// conditions
Expand Down Expand Up @@ -227,7 +215,7 @@ void EcalRecHitProducerGPU::acquire(edm::Event const& event,
IntercalibConstantsHandle_->getOffset()};

// dev mem
eventOutputDataGPU_.allocate(configParameters_, ctx.stream());
eventOutputDataGPU_.allocate(configParameters_, neb_, nee_, ctx.stream());

//
// schedule algorithms
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,6 @@ void EcalUncalibRecHitProducerGPU::fillDescriptions(edm::ConfigurationDescriptio
desc.add<double>("outOfTimeThresholdGain61mEE", 1000);
desc.add<double>("amplitudeThresholdEB", 10);
desc.add<double>("amplitudeThresholdEE", 10);
desc.add<uint32_t>("maxNumberHitsEB", 61200);
desc.add<uint32_t>("maxNumberHitsEE", 14648);
desc.addUntracked<std::vector<uint32_t>>("kernelMinimizeThreads", {32, 1, 1});
desc.add<bool>("shouldRunTimingComputation", true);
confDesc.addWithDefaultLabel(desc);
Expand Down Expand Up @@ -132,10 +130,6 @@ EcalUncalibRecHitProducerGPU::EcalUncalibRecHitProducerGPU(const edm::ParameterS
auto amplitudeThreshEB = ps.getParameter<double>("amplitudeThresholdEB");
auto amplitudeThreshEE = ps.getParameter<double>("amplitudeThresholdEE");

// max number of digis to allocate for
configParameters_.maxNumberHitsEB = ps.getParameter<uint32_t>("maxNumberHitsEB");
configParameters_.maxNumberHitsEE = ps.getParameter<uint32_t>("maxNumberHitsEE");

// switch to run timing computation kernels
configParameters_.shouldRunTimingComputation = ps.getParameter<bool>("shouldRunTimingComputation");

Expand Down Expand Up @@ -203,13 +197,6 @@ void EcalUncalibRecHitProducerGPU::produce(edm::Event& event, edm::EventSetup co

// stop here if there are no digis
if (neb + nee > 0) {
if ((neb > configParameters_.maxNumberHitsEB) || (nee > configParameters_.maxNumberHitsEE)) {
edm::LogError("EcalUncalibRecHitProducerGPU")
<< "Max number of channels exceeded in barrel or endcap. Number of barrel channels: " << neb
<< " with maxNumberHitsEB=" << configParameters_.maxNumberHitsEB << ", number of endcap channels: " << nee
<< " with maxNumberHitsEE=" << configParameters_.maxNumberHitsEE;
}

// conditions
auto const& timeCalibConstantsData = setup.getData(timeCalibConstantsToken_);
auto const& sampleMaskData = setup.getData(sampleMaskToken_);
Expand Down Expand Up @@ -247,11 +234,11 @@ void EcalUncalibRecHitProducerGPU::produce(edm::Event& event, edm::EventSetup co
multifitParameters};

// dev mem
eventOutputDataGPU.allocate(configParameters_, ctx.stream());
eventOutputDataGPU.allocate(configParameters_, neb, nee, ctx.stream());

// scratch mem
ecal::multifit::EventDataForScratchGPU eventDataForScratchGPU;
eventDataForScratchGPU.allocate(configParameters_, ctx.stream());
eventDataForScratchGPU.allocate(configParameters_, neb, nee, ctx.stream());

//
// schedule algorithms
Expand Down
3 changes: 0 additions & 3 deletions RecoLocalCalo/EcalRecProducers/python/ecalRecHitGPU_cfi.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@
recHitsLabelEB = cms.string("EcalRecHitsEB"),
recHitsLabelEE = cms.string("EcalRecHitsEE"),

maxNumberHitsEB = cms.uint32(61200),
maxNumberHitsEE = cms.uint32(14648),

## db statuses to be exluded from reconstruction (some will be recovered)
ChannelStatusToBeExcluded = cms.vstring( 'kDAC',
'kNoisy',
Expand Down
13 changes: 6 additions & 7 deletions RecoLocalCalo/HcalRecProducers/src/DeclsForKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ namespace hcal {
};

struct ConfigParameters {
uint32_t maxChannels;
uint32_t maxTimeSamples;
uint32_t kprep1dChannelsPerBlock;
int sipmQTSShift;
Expand All @@ -93,12 +92,12 @@ namespace hcal {
struct OutputDataGPU {
RecHitCollection<::calo::common::DevStoragePolicy> recHits;

void allocate(ConfigParameters const& config, cudaStream_t cudaStream) {
recHits.energy = cms::cuda::make_device_unique<float[]>(config.maxChannels, cudaStream);
recHits.chi2 = cms::cuda::make_device_unique<float[]>(config.maxChannels, cudaStream);
recHits.energyM0 = cms::cuda::make_device_unique<float[]>(config.maxChannels, cudaStream);
recHits.timeM0 = cms::cuda::make_device_unique<float[]>(config.maxChannels, cudaStream);
recHits.did = cms::cuda::make_device_unique<uint32_t[]>(config.maxChannels, cudaStream);
void allocate(ConfigParameters const& config, uint32_t size, cudaStream_t cudaStream) {
recHits.energy = cms::cuda::make_device_unique<float[]>(size, cudaStream);
recHits.chi2 = cms::cuda::make_device_unique<float[]>(size, cudaStream);
recHits.energyM0 = cms::cuda::make_device_unique<float[]>(size, cudaStream);
recHits.timeM0 = cms::cuda::make_device_unique<float[]>(size, cudaStream);
recHits.did = cms::cuda::make_device_unique<uint32_t[]>(size, cudaStream);
}
};

Expand Down
25 changes: 9 additions & 16 deletions RecoLocalCalo/HcalRecProducers/src/HBHERecHitProducerGPU.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ HBHERecHitProducerGPU::HBHERecHitProducerGPU(edm::ParameterSet const& ps)
sipmCharacteristicsToken_{esConsumes()},
chQualProductToken_{esConsumes()},
pulseOffsetsToken_{esConsumes()} {
configParameters_.maxChannels = ps.getParameter<uint32_t>("maxChannels");
configParameters_.maxTimeSamples = ps.getParameter<uint32_t>("maxTimeSamples");
configParameters_.kprep1dChannelsPerBlock = ps.getParameter<uint32_t>("kprep1dChannelsPerBlock");
configParameters_.sipmQTSShift = ps.getParameter<int>("sipmQTSShift");
Expand Down Expand Up @@ -115,7 +114,6 @@ HBHERecHitProducerGPU::~HBHERecHitProducerGPU() {}

void HBHERecHitProducerGPU::fillDescriptions(edm::ConfigurationDescriptions& cdesc) {
edm::ParameterSetDescription desc;
desc.add<uint32_t>("maxChannels", 10000u);
desc.add<uint32_t>("maxTimeSamples", 10);
desc.add<uint32_t>("kprep1dChannelsPerBlock", 32);
desc.add<edm::InputTag>("digisLabelF01HE", edm::InputTag{"hcalRawToDigiGPU", "f01HEDigisGPU"});
Expand Down Expand Up @@ -156,6 +154,7 @@ void HBHERecHitProducerGPU::acquire(edm::Event const& event,
auto const& f01HEDigis = ctx.get(f01HEProduct);
auto const& f5HBDigis = ctx.get(f5HBProduct);
auto const& f3HBDigis = ctx.get(f3HBProduct);
auto const totalChannels = f01HEDigis.size + f5HBDigis.size + f3HBDigis.size;

hcal::reconstruction::InputDataGPU inputGPU{f01HEDigis, f5HBDigis, f3HBDigis};

Expand Down Expand Up @@ -225,26 +224,20 @@ void HBHERecHitProducerGPU::acquire(edm::Event const& event,

// scratch mem on device
hcal::reconstruction::ScratchDataGPU scratchGPU = {
cms::cuda::make_device_unique<float[]>(configParameters_.maxChannels * configParameters_.maxTimeSamples,
ctx.stream()),
cms::cuda::make_device_unique<float[]>(configParameters_.maxChannels * configParameters_.maxTimeSamples,
ctx.stream()),
cms::cuda::make_device_unique<float[]>(configParameters_.maxChannels * configParameters_.maxTimeSamples,
ctx.stream()),
cms::cuda::make_device_unique<float[]>(totalChannels * configParameters_.maxTimeSamples, ctx.stream()),
cms::cuda::make_device_unique<float[]>(totalChannels * configParameters_.maxTimeSamples, ctx.stream()),
cms::cuda::make_device_unique<float[]>(totalChannels * configParameters_.maxTimeSamples, ctx.stream()),
cms::cuda::make_device_unique<float[]>(
configParameters_.maxChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples,
ctx.stream()),
totalChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples, ctx.stream()),
cms::cuda::make_device_unique<float[]>(
configParameters_.maxChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples,
ctx.stream()),
totalChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples, ctx.stream()),
cms::cuda::make_device_unique<float[]>(
configParameters_.maxChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples,
ctx.stream()),
cms::cuda::make_device_unique<int8_t[]>(configParameters_.maxChannels, ctx.stream()),
totalChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples, ctx.stream()),
cms::cuda::make_device_unique<int8_t[]>(totalChannels, ctx.stream()),
};

// output dev mem
outputGPU_.allocate(configParameters_, ctx.stream());
outputGPU_.allocate(configParameters_, totalChannels, ctx.stream());

hcal::reconstruction::entryPoint(inputGPU, outputGPU_, conditions, scratchGPU, configParameters_, ctx.stream());

Expand Down

0 comments on commit 96b4264

Please sign in to comment.