Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce the ECAL and HCAL GPU memory usage [12.5.x] #39579

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions HLTrigger/Configuration/python/customizeHLTforCMSSW.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,12 +211,35 @@ def customiseForOffline(process):
return process


# Reduce the ECAL and HCAL GPU memory usage (#39579)
# Remove the obsolete configuration parameters
def customizeHLTfor39579(process):
for producer in producers_by_type(process, "EcalUncalibRecHitProducerGPU"):
if hasattr(producer, "maxNumberHitsEB"):
delattr(producer, "maxNumberHitsEB")
if hasattr(producer, "maxNumberHitsEE"):
delattr(producer, "maxNumberHitsEE")

for producer in producers_by_type(process, "EcalRecHitProducerGPU"):
if hasattr(producer, "maxNumberHitsEB"):
delattr(producer, "maxNumberHitsEB")
if hasattr(producer, "maxNumberHitsEE"):
delattr(producer, "maxNumberHitsEE")

for producer in producers_by_type(process, "HBHERecHitProducerGPU"):
if hasattr(producer, "maxChannels"):
delattr(producer, "maxChannels")

return process


# CMSSW version specific customizations
def customizeHLTforCMSSW(process, menuType="GRun"):

process = customiseForOffline(process)

# add call to action function in proper order: newest last!
# process = customiseFor12718(process)
process = customizeHLTfor39579(process)

return process
26 changes: 13 additions & 13 deletions RecoLocalCalo/EcalRecProducers/plugins/DeclsForKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,16 +90,15 @@ namespace ecal {
std::array<uint32_t, 3> kernelMinimizeThreads;

bool shouldRunTimingComputation;

uint32_t maxNumberHitsEB;
uint32_t maxNumberHitsEE;
};

struct EventOutputDataGPU {
UncalibratedRecHit<::calo::common::DevStoragePolicy> recHitsEB, recHitsEE;

void allocate(ConfigurationParameters const& configParameters, cudaStream_t cudaStream) {
auto const sizeEB = configParameters.maxNumberHitsEB;
void allocate(ConfigurationParameters const& configParameters,
uint32_t sizeEB,
uint32_t sizeEE,
cudaStream_t cudaStream) {
recHitsEB.amplitudesAll = cms::cuda::make_device_unique<reco::ComputationScalarType[]>(
sizeEB * EcalDataFrame::MAXSAMPLES, cudaStream);
recHitsEB.amplitude = cms::cuda::make_device_unique<reco::StorageScalarType[]>(sizeEB, cudaStream);
Expand All @@ -114,7 +113,6 @@ namespace ecal {
recHitsEB.did = cms::cuda::make_device_unique<uint32_t[]>(sizeEB, cudaStream);
recHitsEB.flags = cms::cuda::make_device_unique<uint32_t[]>(sizeEB, cudaStream);

auto const sizeEE = configParameters.maxNumberHitsEE;
recHitsEE.amplitudesAll = cms::cuda::make_device_unique<reco::ComputationScalarType[]>(
sizeEE * EcalDataFrame::MAXSAMPLES, cudaStream);
recHitsEE.amplitude = cms::cuda::make_device_unique<reco::StorageScalarType[]>(sizeEE, cudaStream);
Expand Down Expand Up @@ -165,13 +163,16 @@ namespace ecal {
cms::cuda::device::unique_ptr<SVT[]> timeMax, timeError;
cms::cuda::device::unique_ptr<TimeComputationState[]> tcState;

void allocate(ConfigurationParameters const& configParameters, cudaStream_t cudaStream) {
void allocate(ConfigurationParameters const& configParameters,
uint32_t sizeEB,
uint32_t sizeEE,
cudaStream_t cudaStream) {
constexpr auto svlength = getLength<SampleVector>();
constexpr auto sgvlength = getLength<SampleGainVector>();
constexpr auto smlength = getLength<SampleMatrix>();
constexpr auto pmlength = getLength<PulseMatrixType>();
constexpr auto bxvlength = getLength<BXVectorType>();
auto const size = configParameters.maxNumberHitsEB + configParameters.maxNumberHitsEE;
auto const size = sizeEB + sizeEE;

auto alloc = [cudaStream](auto& var, uint32_t size) {
using element_type = typename std::remove_reference_t<decltype(var)>::element_type;
Expand Down Expand Up @@ -273,24 +274,23 @@ namespace ecal {
uint32_t expanded_v_DB_reco_flagsSize;

uint32_t flagmask;
uint32_t maxNumberHitsEB;
uint32_t maxNumberHitsEE;
};

struct EventOutputDataGPU {
RecHit<::calo::common::DevStoragePolicy> recHitsEB, recHitsEE;

void allocate(ConfigurationParameters const& configParameters, cudaStream_t cudaStream) {
void allocate(ConfigurationParameters const& configParameters,
uint32_t sizeEB,
uint32_t sizeEE,
cudaStream_t cudaStream) {
//---- configParameters -> needed only to decide if to save the timing information or not
auto const sizeEB = configParameters.maxNumberHitsEB;
recHitsEB.energy = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEB, cudaStream);
recHitsEB.time = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEB, cudaStream);
recHitsEB.chi2 = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEB, cudaStream);
recHitsEB.flagBits = cms::cuda::make_device_unique<uint32_t[]>(sizeEB, cudaStream);
recHitsEB.extra = cms::cuda::make_device_unique<uint32_t[]>(sizeEB, cudaStream);
recHitsEB.did = cms::cuda::make_device_unique<uint32_t[]>(sizeEB, cudaStream);

auto const sizeEE = configParameters.maxNumberHitsEE;
recHitsEE.energy = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEE, cudaStream);
recHitsEE.time = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEE, cudaStream);
recHitsEE.chi2 = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEE, cudaStream);
Expand Down
14 changes: 1 addition & 13 deletions RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitProducerGPU.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,6 @@ void EcalRecHitProducerGPU::fillDescriptions(edm::ConfigurationDescriptions& con
desc.add<double>("EELaserMIN", 0.01);
desc.add<double>("EBLaserMAX", 30.0);
desc.add<double>("EELaserMAX", 30.0);

desc.add<uint32_t>("maxNumberHitsEB", 61200);
desc.add<uint32_t>("maxNumberHitsEE", 14648);
}

EcalRecHitProducerGPU::EcalRecHitProducerGPU(const edm::ParameterSet& ps) {
Expand All @@ -125,10 +122,6 @@ EcalRecHitProducerGPU::EcalRecHitProducerGPU(const edm::ParameterSet& ps) {
configParameters_.EBLaserMAX = ps.getParameter<double>("EBLaserMAX");
configParameters_.EELaserMAX = ps.getParameter<double>("EELaserMAX");

// max number of digis to allocate for
configParameters_.maxNumberHitsEB = ps.getParameter<uint32_t>("maxNumberHitsEB");
configParameters_.maxNumberHitsEE = ps.getParameter<uint32_t>("maxNumberHitsEE");

flagmask_ = 0;
flagmask_ |= 0x1 << EcalRecHit::kNeighboursRecovered;
flagmask_ |= 0x1 << EcalRecHit::kTowerRecovered;
Expand Down Expand Up @@ -182,11 +175,6 @@ void EcalRecHitProducerGPU::acquire(edm::Event const& event,
if (neb_ + nee_ == 0)
return;

if ((neb_ > configParameters_.maxNumberHitsEB) || (nee_ > configParameters_.maxNumberHitsEE)) {
edm::LogError("EcalRecHitProducerGPU")
<< "max number of channels exceeded. See options 'maxNumberHitsEB and maxNumberHitsEE' ";
}

int nchannelsEB = ebUncalibRecHits.size; // --> offsetForInput, first EB and then EE

// conditions
Expand Down Expand Up @@ -227,7 +215,7 @@ void EcalRecHitProducerGPU::acquire(edm::Event const& event,
IntercalibConstantsHandle_->getOffset()};

// dev mem
eventOutputDataGPU_.allocate(configParameters_, ctx.stream());
eventOutputDataGPU_.allocate(configParameters_, neb_, nee_, ctx.stream());

//
// schedule algorithms
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,6 @@ void EcalUncalibRecHitProducerGPU::fillDescriptions(edm::ConfigurationDescriptio
desc.add<double>("outOfTimeThresholdGain61mEE", 1000);
desc.add<double>("amplitudeThresholdEB", 10);
desc.add<double>("amplitudeThresholdEE", 10);
desc.add<uint32_t>("maxNumberHitsEB", 61200);
desc.add<uint32_t>("maxNumberHitsEE", 14648);
desc.addUntracked<std::vector<uint32_t>>("kernelMinimizeThreads", {32, 1, 1});
desc.add<bool>("shouldRunTimingComputation", true);
confDesc.addWithDefaultLabel(desc);
Expand Down Expand Up @@ -132,10 +130,6 @@ EcalUncalibRecHitProducerGPU::EcalUncalibRecHitProducerGPU(const edm::ParameterS
auto amplitudeThreshEB = ps.getParameter<double>("amplitudeThresholdEB");
auto amplitudeThreshEE = ps.getParameter<double>("amplitudeThresholdEE");

// max number of digis to allocate for
configParameters_.maxNumberHitsEB = ps.getParameter<uint32_t>("maxNumberHitsEB");
configParameters_.maxNumberHitsEE = ps.getParameter<uint32_t>("maxNumberHitsEE");

// switch to run timing computation kernels
configParameters_.shouldRunTimingComputation = ps.getParameter<bool>("shouldRunTimingComputation");

Expand Down Expand Up @@ -203,13 +197,6 @@ void EcalUncalibRecHitProducerGPU::produce(edm::Event& event, edm::EventSetup co

// stop here if there are no digis
if (neb + nee > 0) {
if ((neb > configParameters_.maxNumberHitsEB) || (nee > configParameters_.maxNumberHitsEE)) {
edm::LogError("EcalUncalibRecHitProducerGPU")
<< "Max number of channels exceeded in barrel or endcap. Number of barrel channels: " << neb
<< " with maxNumberHitsEB=" << configParameters_.maxNumberHitsEB << ", number of endcap channels: " << nee
<< " with maxNumberHitsEE=" << configParameters_.maxNumberHitsEE;
}

// conditions
auto const& timeCalibConstantsData = setup.getData(timeCalibConstantsToken_);
auto const& sampleMaskData = setup.getData(sampleMaskToken_);
Expand Down Expand Up @@ -247,11 +234,11 @@ void EcalUncalibRecHitProducerGPU::produce(edm::Event& event, edm::EventSetup co
multifitParameters};

// dev mem
eventOutputDataGPU.allocate(configParameters_, ctx.stream());
eventOutputDataGPU.allocate(configParameters_, neb, nee, ctx.stream());

// scratch mem
ecal::multifit::EventDataForScratchGPU eventDataForScratchGPU;
eventDataForScratchGPU.allocate(configParameters_, ctx.stream());
eventDataForScratchGPU.allocate(configParameters_, neb, nee, ctx.stream());

//
// schedule algorithms
Expand Down
3 changes: 0 additions & 3 deletions RecoLocalCalo/EcalRecProducers/python/ecalRecHitGPU_cfi.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@
recHitsLabelEB = cms.string("EcalRecHitsEB"),
recHitsLabelEE = cms.string("EcalRecHitsEE"),

maxNumberHitsEB = cms.uint32(61200),
maxNumberHitsEE = cms.uint32(14648),

## db statuses to be exluded from reconstruction (some will be recovered)
ChannelStatusToBeExcluded = cms.vstring( 'kDAC',
'kNoisy',
Expand Down
13 changes: 6 additions & 7 deletions RecoLocalCalo/HcalRecProducers/src/DeclsForKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ namespace hcal {
};

struct ConfigParameters {
uint32_t maxChannels;
uint32_t maxTimeSamples;
uint32_t kprep1dChannelsPerBlock;
int sipmQTSShift;
Expand All @@ -93,12 +92,12 @@ namespace hcal {
struct OutputDataGPU {
RecHitCollection<::calo::common::DevStoragePolicy> recHits;

void allocate(ConfigParameters const& config, cudaStream_t cudaStream) {
recHits.energy = cms::cuda::make_device_unique<float[]>(config.maxChannels, cudaStream);
recHits.chi2 = cms::cuda::make_device_unique<float[]>(config.maxChannels, cudaStream);
recHits.energyM0 = cms::cuda::make_device_unique<float[]>(config.maxChannels, cudaStream);
recHits.timeM0 = cms::cuda::make_device_unique<float[]>(config.maxChannels, cudaStream);
recHits.did = cms::cuda::make_device_unique<uint32_t[]>(config.maxChannels, cudaStream);
void allocate(ConfigParameters const& config, uint32_t size, cudaStream_t cudaStream) {
recHits.energy = cms::cuda::make_device_unique<float[]>(size, cudaStream);
recHits.chi2 = cms::cuda::make_device_unique<float[]>(size, cudaStream);
recHits.energyM0 = cms::cuda::make_device_unique<float[]>(size, cudaStream);
recHits.timeM0 = cms::cuda::make_device_unique<float[]>(size, cudaStream);
recHits.did = cms::cuda::make_device_unique<uint32_t[]>(size, cudaStream);
}
};

Expand Down
25 changes: 9 additions & 16 deletions RecoLocalCalo/HcalRecProducers/src/HBHERecHitProducerGPU.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ HBHERecHitProducerGPU::HBHERecHitProducerGPU(edm::ParameterSet const& ps)
sipmCharacteristicsToken_{esConsumes()},
chQualProductToken_{esConsumes()},
pulseOffsetsToken_{esConsumes()} {
configParameters_.maxChannels = ps.getParameter<uint32_t>("maxChannels");
configParameters_.maxTimeSamples = ps.getParameter<uint32_t>("maxTimeSamples");
configParameters_.kprep1dChannelsPerBlock = ps.getParameter<uint32_t>("kprep1dChannelsPerBlock");
configParameters_.sipmQTSShift = ps.getParameter<int>("sipmQTSShift");
Expand Down Expand Up @@ -115,7 +114,6 @@ HBHERecHitProducerGPU::~HBHERecHitProducerGPU() {}

void HBHERecHitProducerGPU::fillDescriptions(edm::ConfigurationDescriptions& cdesc) {
edm::ParameterSetDescription desc;
desc.add<uint32_t>("maxChannels", 10000u);
desc.add<uint32_t>("maxTimeSamples", 10);
desc.add<uint32_t>("kprep1dChannelsPerBlock", 32);
desc.add<edm::InputTag>("digisLabelF01HE", edm::InputTag{"hcalRawToDigiGPU", "f01HEDigisGPU"});
Expand Down Expand Up @@ -156,6 +154,7 @@ void HBHERecHitProducerGPU::acquire(edm::Event const& event,
auto const& f01HEDigis = ctx.get(f01HEProduct);
auto const& f5HBDigis = ctx.get(f5HBProduct);
auto const& f3HBDigis = ctx.get(f3HBProduct);
auto const totalChannels = f01HEDigis.size + f5HBDigis.size + f3HBDigis.size;

hcal::reconstruction::InputDataGPU inputGPU{f01HEDigis, f5HBDigis, f3HBDigis};

Expand Down Expand Up @@ -225,26 +224,20 @@ void HBHERecHitProducerGPU::acquire(edm::Event const& event,

// scratch mem on device
hcal::reconstruction::ScratchDataGPU scratchGPU = {
cms::cuda::make_device_unique<float[]>(configParameters_.maxChannels * configParameters_.maxTimeSamples,
ctx.stream()),
cms::cuda::make_device_unique<float[]>(configParameters_.maxChannels * configParameters_.maxTimeSamples,
ctx.stream()),
cms::cuda::make_device_unique<float[]>(configParameters_.maxChannels * configParameters_.maxTimeSamples,
ctx.stream()),
cms::cuda::make_device_unique<float[]>(totalChannels * configParameters_.maxTimeSamples, ctx.stream()),
cms::cuda::make_device_unique<float[]>(totalChannels * configParameters_.maxTimeSamples, ctx.stream()),
cms::cuda::make_device_unique<float[]>(totalChannels * configParameters_.maxTimeSamples, ctx.stream()),
cms::cuda::make_device_unique<float[]>(
configParameters_.maxChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples,
ctx.stream()),
totalChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples, ctx.stream()),
cms::cuda::make_device_unique<float[]>(
configParameters_.maxChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples,
ctx.stream()),
totalChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples, ctx.stream()),
cms::cuda::make_device_unique<float[]>(
configParameters_.maxChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples,
ctx.stream()),
cms::cuda::make_device_unique<int8_t[]>(configParameters_.maxChannels, ctx.stream()),
totalChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples, ctx.stream()),
cms::cuda::make_device_unique<int8_t[]>(totalChannels, ctx.stream()),
};

// output dev mem
outputGPU_.allocate(configParameters_, ctx.stream());
outputGPU_.allocate(configParameters_, totalChannels, ctx.stream());

hcal::reconstruction::entryPoint(inputGPU, outputGPU_, conditions, scratchGPU, configParameters_, ctx.stream());

Expand Down