From 525a14a6457868fba16da28c08fbda4bb7fb01a4 Mon Sep 17 00:00:00 2001 From: Martin Kwok Date: Fri, 30 Jun 2023 18:38:47 +0200 Subject: [PATCH] HCAL local reconstruction in Alpaka - first implementation of MAHI in alpaka, running on CPU and GPU - implement and use atomicMaxPair function - implement HCAL portable conditions - implement HCAL digis portable SoA - update the HCAL offline reconstruction using the alpaka modifier - HLT customisation for alpaka-based HCAL reconstruction, skipping the legacy conversion for HCAL PF clusters - various fixes from the code review --- .../interface/HcalMahiConditionsRcd.h | 35 + .../DataRecord/src/HcalMahiConditionsRcd.cc | 4 + CondFormats/HcalObjects/BuildFile.xml | 5 + .../interface/HcalMahiConditionsHost.h | 11 + .../interface/HcalMahiConditionsSoA.h | 71 + .../interface/HcalMahiPulseOffsetsHost.h | 10 + .../interface/HcalMahiPulseOffsetsSoA.h | 13 + .../HcalRecoParamWithPulseShapeHost.h | 25 + .../HcalRecoParamWithPulseShapeSoA.h | 31 + .../interface/HcalRecoParamWithPulseShapeT.h | 57 + .../interface/HcalSiPMCharacteristicsHost.h | 12 + .../interface/HcalSiPMCharacteristicsSoA.h | 15 + .../alpaka/HcalMahiConditionsDevice.h | 20 + .../alpaka/HcalMahiPulseOffsetsDevice.h | 20 + .../HcalRecoParamWithPulseShapeDevice.h | 13 + .../alpaka/HcalSiPMCharacteristicsDevice.h | 19 + .../src/ES_HcalMahiConditionsHost.cc | 4 + .../src/ES_HcalMahiPulseOffsetsHost.cc | 4 + .../src/ES_HcalRecoParamWithPulseShapeHost.cc | 4 + .../src/ES_HcalSiPMCharacteristicsHost.cc | 4 + .../src/alpaka/ES_HcalMahiConditionDevice.cc | 4 + .../alpaka/ES_HcalMahiPulseOffsetsDevice.cc | 4 + .../ES_HcalRecoParamWithPulseShapeDevice.cc | 4 + .../ES_HcalSiPMCharacteristicsDevice.cc | 4 + .../python/upgradeWorkflowComponents.py | 2 +- .../python/OfflineSourceSequence_pp.py | 19 + DataFormats/Common/src/classes_def.xml | 1 + DataFormats/HcalDigi/BuildFile.xml | 4 + .../interface/HcalDigiHostCollection.h | 14 + DataFormats/HcalDigi/interface/HcalDigiSoA.h | 118 ++ .../alpaka/HcalDigiDeviceCollection.h | 23 + .../HcalDigi/src/alpaka/classes_cuda.h | 4 + .../HcalDigi/src/alpaka/classes_cuda_def.xml | 10 + .../HcalDigi/src/alpaka/classes_rocm.h | 4 + .../HcalDigi/src/alpaka/classes_rocm_def.xml | 10 + DataFormats/HcalDigi/src/classes.cc | 5 + DataFormats/HcalDigi/src/classes.h | 2 + DataFormats/HcalDigi/src/classes_def.xml | 12 + .../HcalRawToDigi/plugins/BuildFile.xml | 9 + .../plugins/alpaka/HcalDigisSoAProducer.cc | 198 +++ .../python/customizeHLTforAlpaka.py | 181 +++ .../AlpakaInterface/interface/atomicMaxPair.h | 38 + .../python/hcalGlobalReco_cff.py | 24 +- .../Configuration/python/hcalLocalReco_cff.py | 20 +- .../HcalRecAlgos/plugins/BuildFile.xml | 8 + ...HcalRecoParamsWithPulseShapesESProducer.cc | 113 ++ .../HcalRecProducers/plugins/BuildFile.xml | 25 + .../alpaka/HBHERecHitProducerPortable.cc | 167 ++ .../alpaka/HcalMahiConditionsESProducer.cc | 460 ++++++ .../alpaka/HcalMahiPulseOffsetsESProducer.cc | 44 + .../HcalSiPMCharacteristicsESProducer.cc | 64 + .../plugins/alpaka/Mahi.dev.cc | 1445 +++++++++++++++++ .../HcalRecProducers/plugins/alpaka/Mahi.h | 57 + .../hbheRecHitProducerPortableTask_cff.py | 57 + .../src/HcalRecHitSoAToLegacy.cc | 70 + 55 files changed, 3595 insertions(+), 6 deletions(-) create mode 100644 CondFormats/DataRecord/interface/HcalMahiConditionsRcd.h create mode 100644 CondFormats/DataRecord/src/HcalMahiConditionsRcd.cc create mode 100644 CondFormats/HcalObjects/interface/HcalMahiConditionsHost.h create mode 100644 CondFormats/HcalObjects/interface/HcalMahiConditionsSoA.h create mode 100644 CondFormats/HcalObjects/interface/HcalMahiPulseOffsetsHost.h create mode 100644 CondFormats/HcalObjects/interface/HcalMahiPulseOffsetsSoA.h create mode 100644 CondFormats/HcalObjects/interface/HcalRecoParamWithPulseShapeHost.h create mode 100644 CondFormats/HcalObjects/interface/HcalRecoParamWithPulseShapeSoA.h create mode 100644 CondFormats/HcalObjects/interface/HcalRecoParamWithPulseShapeT.h create mode 100644 CondFormats/HcalObjects/interface/HcalSiPMCharacteristicsHost.h create mode 100644 CondFormats/HcalObjects/interface/HcalSiPMCharacteristicsSoA.h create mode 100644 CondFormats/HcalObjects/interface/alpaka/HcalMahiConditionsDevice.h create mode 100644 CondFormats/HcalObjects/interface/alpaka/HcalMahiPulseOffsetsDevice.h create mode 100644 CondFormats/HcalObjects/interface/alpaka/HcalRecoParamWithPulseShapeDevice.h create mode 100644 CondFormats/HcalObjects/interface/alpaka/HcalSiPMCharacteristicsDevice.h create mode 100644 CondFormats/HcalObjects/src/ES_HcalMahiConditionsHost.cc create mode 100644 CondFormats/HcalObjects/src/ES_HcalMahiPulseOffsetsHost.cc create mode 100644 CondFormats/HcalObjects/src/ES_HcalRecoParamWithPulseShapeHost.cc create mode 100644 CondFormats/HcalObjects/src/ES_HcalSiPMCharacteristicsHost.cc create mode 100644 CondFormats/HcalObjects/src/alpaka/ES_HcalMahiConditionDevice.cc create mode 100644 CondFormats/HcalObjects/src/alpaka/ES_HcalMahiPulseOffsetsDevice.cc create mode 100644 CondFormats/HcalObjects/src/alpaka/ES_HcalRecoParamWithPulseShapeDevice.cc create mode 100644 CondFormats/HcalObjects/src/alpaka/ES_HcalSiPMCharacteristicsDevice.cc create mode 100644 DataFormats/HcalDigi/interface/HcalDigiHostCollection.h create mode 100644 DataFormats/HcalDigi/interface/HcalDigiSoA.h create mode 100644 DataFormats/HcalDigi/interface/alpaka/HcalDigiDeviceCollection.h create mode 100644 DataFormats/HcalDigi/src/alpaka/classes_cuda.h create mode 100644 DataFormats/HcalDigi/src/alpaka/classes_cuda_def.xml create mode 100644 DataFormats/HcalDigi/src/alpaka/classes_rocm.h create mode 100644 DataFormats/HcalDigi/src/alpaka/classes_rocm_def.xml create mode 100644 DataFormats/HcalDigi/src/classes.cc create mode 100644 EventFilter/HcalRawToDigi/plugins/alpaka/HcalDigisSoAProducer.cc create mode 100644 HeterogeneousCore/AlpakaInterface/interface/atomicMaxPair.h create mode 100644 RecoLocalCalo/HcalRecAlgos/plugins/alpaka/HcalRecoParamsWithPulseShapesESProducer.cc create mode 100644 RecoLocalCalo/HcalRecProducers/plugins/BuildFile.xml create mode 100644 RecoLocalCalo/HcalRecProducers/plugins/alpaka/HBHERecHitProducerPortable.cc create mode 100644 RecoLocalCalo/HcalRecProducers/plugins/alpaka/HcalMahiConditionsESProducer.cc create mode 100644 RecoLocalCalo/HcalRecProducers/plugins/alpaka/HcalMahiPulseOffsetsESProducer.cc create mode 100644 RecoLocalCalo/HcalRecProducers/plugins/alpaka/HcalSiPMCharacteristicsESProducer.cc create mode 100644 RecoLocalCalo/HcalRecProducers/plugins/alpaka/Mahi.dev.cc create mode 100644 RecoLocalCalo/HcalRecProducers/plugins/alpaka/Mahi.h create mode 100644 RecoLocalCalo/HcalRecProducers/python/hbheRecHitProducerPortableTask_cff.py create mode 100644 RecoLocalCalo/HcalRecProducers/src/HcalRecHitSoAToLegacy.cc diff --git a/CondFormats/DataRecord/interface/HcalMahiConditionsRcd.h b/CondFormats/DataRecord/interface/HcalMahiConditionsRcd.h new file mode 100644 index 0000000000000..714b0e6b2a5c7 --- /dev/null +++ b/CondFormats/DataRecord/interface/HcalMahiConditionsRcd.h @@ -0,0 +1,35 @@ +#ifndef CondFormats_DataRecord_HcalMahiConditionsRcd_h +#define CondFormats_DataRecord_HcalMahiConditionsRcd_h + +#include "FWCore/Framework/interface/DependentRecordImplementation.h" + +#include "CondFormats/DataRecord/interface/HcalRecoParamsRcd.h" +#include "CondFormats/DataRecord/interface/HcalPedestalsRcd.h" +#include "CondFormats/DataRecord/interface/HcalGainsRcd.h" +#include "CondFormats/DataRecord/interface/HcalLUTCorrsRcd.h" +#include "CondFormats/DataRecord/interface/HcalRespCorrsRcd.h" +#include "CondFormats/DataRecord/interface/HcalTimeCorrsRcd.h" +#include "CondFormats/DataRecord/interface/HcalPedestalWidthsRcd.h" +#include "CondFormats/DataRecord/interface/HcalGainWidthsRcd.h" +#include "CondFormats/DataRecord/interface/HcalChannelQualityRcd.h" +#include "CondFormats/DataRecord/interface/HcalQIETypesRcd.h" +#include "CondFormats/DataRecord/interface/HcalQIEDataRcd.h" +#include "CondFormats/DataRecord/interface/HcalSiPMParametersRcd.h" +#include "Geometry/Records/interface/HcalRecNumberingRecord.h" + +class HcalMahiConditionsRcd + : public edm::eventsetup::DependentRecordImplementation> {}; +#endif diff --git a/CondFormats/DataRecord/src/HcalMahiConditionsRcd.cc b/CondFormats/DataRecord/src/HcalMahiConditionsRcd.cc new file mode 100644 index 0000000000000..f8a3f2a02db7f --- /dev/null +++ b/CondFormats/DataRecord/src/HcalMahiConditionsRcd.cc @@ -0,0 +1,4 @@ +#include "CondFormats/DataRecord/interface/HcalMahiConditionsRcd.h" +#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" + +EVENTSETUP_RECORD_REG(HcalMahiConditionsRcd); diff --git a/CondFormats/HcalObjects/BuildFile.xml b/CondFormats/HcalObjects/BuildFile.xml index f1e6c99fb4646..86637f594c35b 100644 --- a/CondFormats/HcalObjects/BuildFile.xml +++ b/CondFormats/HcalObjects/BuildFile.xml @@ -3,13 +3,18 @@ + + + + + diff --git a/CondFormats/HcalObjects/interface/HcalMahiConditionsHost.h b/CondFormats/HcalObjects/interface/HcalMahiConditionsHost.h new file mode 100644 index 0000000000000..50144c6322df4 --- /dev/null +++ b/CondFormats/HcalObjects/interface/HcalMahiConditionsHost.h @@ -0,0 +1,11 @@ +#ifndef CondFormats_HcalObjects_interface_HcalMahiConditionsHost_h +#define CondFormats_HcalObjects_interface_HcalMahiConditionsHost_h + +#include "CondFormats/HcalObjects/interface/HcalMahiConditionsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace hcal { + using HcalMahiConditionsPortableHost = PortableHostCollection; +} + +#endif diff --git a/CondFormats/HcalObjects/interface/HcalMahiConditionsSoA.h b/CondFormats/HcalObjects/interface/HcalMahiConditionsSoA.h new file mode 100644 index 0000000000000..5d8d96968af7a --- /dev/null +++ b/CondFormats/HcalObjects/interface/HcalMahiConditionsSoA.h @@ -0,0 +1,71 @@ +#ifndef CondFormats_HcalObjects_HcalMahiConditionsSoA_h +#define CondFormats_HcalObjects_HcalMahiConditionsSoA_h + +#include "RecoLocalCalo/HcalRecAlgos/interface/HcalConstants.h" + +#include "DataFormats/SoATemplate/interface/SoACommon.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoAView.h" + +#include + +namespace hcal { + + static constexpr uint32_t numValuesPerChannel = 16; + using HcalPedestalArray = std::array; // 4 capIds + using HcalQIECodersArray = std::array; // QIEData + + GENERATE_SOA_LAYOUT(HcalMahiConditionsSoALayout, + SOA_COLUMN(HcalPedestalArray, pedestals_value), + SOA_COLUMN(HcalPedestalArray, pedestals_width), + SOA_COLUMN(HcalPedestalArray, gains_value), + SOA_COLUMN(HcalPedestalArray, effectivePedestals), + SOA_COLUMN(HcalPedestalArray, effectivePedestalWidths), + SOA_COLUMN(float, lutCorrs_values), + SOA_COLUMN(float, respCorrs_values), + SOA_COLUMN(float, timeCorrs_values), + // Use EIGEN_COLUMN for matrix? + SOA_COLUMN(float, pedestalWidths_sigma00), + SOA_COLUMN(float, pedestalWidths_sigma01), + SOA_COLUMN(float, pedestalWidths_sigma02), + SOA_COLUMN(float, pedestalWidths_sigma03), + SOA_COLUMN(float, pedestalWidths_sigma10), + SOA_COLUMN(float, pedestalWidths_sigma11), + SOA_COLUMN(float, pedestalWidths_sigma12), + SOA_COLUMN(float, pedestalWidths_sigma13), + SOA_COLUMN(float, pedestalWidths_sigma20), + SOA_COLUMN(float, pedestalWidths_sigma21), + SOA_COLUMN(float, pedestalWidths_sigma22), + SOA_COLUMN(float, pedestalWidths_sigma23), + SOA_COLUMN(float, pedestalWidths_sigma30), + SOA_COLUMN(float, pedestalWidths_sigma31), + SOA_COLUMN(float, pedestalWidths_sigma32), + SOA_COLUMN(float, pedestalWidths_sigma33), + SOA_COLUMN(float, gainWidths_value0), + SOA_COLUMN(float, gainWidths_value1), + SOA_COLUMN(float, gainWidths_value2), + SOA_COLUMN(float, gainWidths_value3), + SOA_COLUMN(uint32_t, channelQuality_status), + SOA_COLUMN(HcalQIECodersArray, qieCoders_offsets), + SOA_COLUMN(HcalQIECodersArray, qieCoders_slopes), + SOA_COLUMN(int, qieTypes_values), + SOA_COLUMN(int, sipmPar_type), + SOA_COLUMN(int, sipmPar_auxi1), + SOA_COLUMN(float, sipmPar_fcByPE), + SOA_COLUMN(float, sipmPar_darkCurrent), + SOA_COLUMN(float, sipmPar_auxi2), + SOA_SCALAR(int, maxDepthHB), + SOA_SCALAR(int, maxDepthHE), + SOA_SCALAR(int, maxPhiHE), + SOA_SCALAR(int, firstHBRing), + SOA_SCALAR(int, lastHBRing), + SOA_SCALAR(int, firstHERing), + SOA_SCALAR(int, lastHERing), + SOA_SCALAR(int, nEtaHB), + SOA_SCALAR(int, nEtaHE), + SOA_SCALAR(uint32_t, offsetForHashes)) + using HcalMahiConditionsSoA = HcalMahiConditionsSoALayout<>; + +} // namespace hcal + +#endif diff --git a/CondFormats/HcalObjects/interface/HcalMahiPulseOffsetsHost.h b/CondFormats/HcalObjects/interface/HcalMahiPulseOffsetsHost.h new file mode 100644 index 0000000000000..c1ca23ac6d897 --- /dev/null +++ b/CondFormats/HcalObjects/interface/HcalMahiPulseOffsetsHost.h @@ -0,0 +1,10 @@ +#ifndef CondFormats_HcalObjects_interface_HcalMahiPulseOffsetsPortable_h +#define CondFormats_HcalObjects_interface_HcalMahiPulseOffsetsPortable_h + +#include "CondFormats/HcalObjects/interface/HcalMahiPulseOffsetsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace hcal { + using HcalMahiPulseOffsetsPortableHost = PortableHostCollection; +} +#endif diff --git a/CondFormats/HcalObjects/interface/HcalMahiPulseOffsetsSoA.h b/CondFormats/HcalObjects/interface/HcalMahiPulseOffsetsSoA.h new file mode 100644 index 0000000000000..33fecb3d1e27a --- /dev/null +++ b/CondFormats/HcalObjects/interface/HcalMahiPulseOffsetsSoA.h @@ -0,0 +1,13 @@ +#ifndef CondFormats_HcalObjects_HcalMahiPulseOffsetsSoA_h +#define CondFormats_HcalObjects_HcalMahiPulseOffsetsSoA_h + +#include "DataFormats/SoATemplate/interface/SoACommon.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoAView.h" + +namespace hcal { + GENERATE_SOA_LAYOUT(HcalMahiPulseOffsetsSoALayout, SOA_COLUMN(int, offsets)) + using HcalMahiPulseOffsetsSoA = HcalMahiPulseOffsetsSoALayout<>; + +} // namespace hcal +#endif diff --git a/CondFormats/HcalObjects/interface/HcalRecoParamWithPulseShapeHost.h b/CondFormats/HcalObjects/interface/HcalRecoParamWithPulseShapeHost.h new file mode 100644 index 0000000000000..70874003a5499 --- /dev/null +++ b/CondFormats/HcalObjects/interface/HcalRecoParamWithPulseShapeHost.h @@ -0,0 +1,25 @@ +#ifndef CondFormats_HcalObjects_interface_HcalRecoParamWithPulseShapeHost_h +#define CondFormats_HcalObjects_interface_HcalRecoParamWithPulseShapeHost_h + +#include "CondFormats/HcalObjects/interface/HcalRecoParamWithPulseShapeT.h" +#include "DataFormats/Portable/interface/PortableCollection.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace hcal { + using HcalRecoParamWithPulseShapeHost = HcalRecoParamWithPulseShapeT; +} +namespace cms::alpakatools { + template <> + struct CopyToDevice<::hcal::HcalRecoParamWithPulseShapeHost> { + template + static auto copyAsync(TQueue& queue, ::hcal::HcalRecoParamWithPulseShapeHost const& hostProduct) { + using RecoParamCopy = CopyToDevice<::hcal::HcalRecoParamWithPulseShapeHost::RecoParamCollection>; + using PulseShapeCopy = CopyToDevice<::hcal::HcalRecoParamWithPulseShapeHost::PulseShapeCollection>; + using Device = alpaka::Dev; + return ::hcal::HcalRecoParamWithPulseShapeT(RecoParamCopy::copyAsync(queue, hostProduct.recoParam()), + PulseShapeCopy::copyAsync(queue, hostProduct.pulseShape())); + } + }; +} // namespace cms::alpakatools + +#endif diff --git a/CondFormats/HcalObjects/interface/HcalRecoParamWithPulseShapeSoA.h b/CondFormats/HcalObjects/interface/HcalRecoParamWithPulseShapeSoA.h new file mode 100644 index 0000000000000..532448e17aed7 --- /dev/null +++ b/CondFormats/HcalObjects/interface/HcalRecoParamWithPulseShapeSoA.h @@ -0,0 +1,31 @@ +#ifndef CondFormats_HcalObjects_HcalRecoParamWithPulseShapeSoA_h +#define CondFormats_HcalObjects_HcalRecoParamWithPulseShapeSoA_h + +#include "RecoLocalCalo/HcalRecAlgos/interface/HcalConstants.h" + +#include "DataFormats/SoATemplate/interface/SoACommon.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoAView.h" + +#include + +namespace hcal { + using HcalPSfunctorArray = std::array; // 256 + using HcalPSfunctorBXarray = std::array; // 25 + + GENERATE_SOA_LAYOUT(HcalRecoParamSoALayout, + SOA_COLUMN(uint32_t, param1), + SOA_COLUMN(uint32_t, param2), + SOA_COLUMN(uint32_t, ids)) + GENERATE_SOA_LAYOUT(HcalPulseShapeSoALayout, + SOA_COLUMN(HcalPSfunctorArray, acc25nsVec), + SOA_COLUMN(HcalPSfunctorArray, diff25nsItvlVec), + SOA_COLUMN(HcalPSfunctorBXarray, accVarLenIdxMinusOneVec), + SOA_COLUMN(HcalPSfunctorBXarray, diffVarItvlIdxMinusOneVec), + SOA_COLUMN(HcalPSfunctorBXarray, accVarLenIdxZEROVec), + SOA_COLUMN(HcalPSfunctorBXarray, diffVarItvlIdxZEROVec)) + + using HcalRecoParamSoA = HcalRecoParamSoALayout<>; + using HcalPulseShapeSoA = HcalPulseShapeSoALayout<>; +} // namespace hcal +#endif diff --git a/CondFormats/HcalObjects/interface/HcalRecoParamWithPulseShapeT.h b/CondFormats/HcalObjects/interface/HcalRecoParamWithPulseShapeT.h new file mode 100644 index 0000000000000..bfffd31092666 --- /dev/null +++ b/CondFormats/HcalObjects/interface/HcalRecoParamWithPulseShapeT.h @@ -0,0 +1,57 @@ +#ifndef CondFormats_HcalObjects_interface_HcalRecoParamWithPulseShapeHostT_h +#define CondFormats_HcalObjects_interface_HcalRecoParamWithPulseShapeHostT_h + +#include "CondFormats/HcalObjects/interface/HcalRecoParamWithPulseShapeSoA.h" +#include "DataFormats/Portable/interface/PortableCollection.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace hcal { + template + class HcalRecoParamWithPulseShapeT { + public: + using RecoParamCollection = PortableCollection; + using PulseShapeCollection = PortableCollection; + + using PulseShapeConstElement = typename PulseShapeCollection::ConstView::const_element; + + class ConstView { + public: + using RecoParamConstView = typename RecoParamCollection::ConstView; + using PulseShapeConstView = typename PulseShapeCollection::ConstView; + constexpr ConstView(RecoParamConstView recoView, PulseShapeConstView psView) + : recoParamView_{recoView}, pulseShapeView_{psView} {}; + + ALPAKA_FN_ACC PulseShapeConstElement getPulseShape(uint32_t const hashedId) const { + auto const recoPulseShapeId = recoParamView_.ids()[hashedId]; + return pulseShapeView_[recoPulseShapeId]; + } + + constexpr typename RecoParamCollection::ConstView recoParamView() { return recoParamView_; } + + private: + typename RecoParamCollection::ConstView recoParamView_; + typename PulseShapeCollection::ConstView pulseShapeView_; + }; + + HcalRecoParamWithPulseShapeT(size_t recoSize, size_t pulseSize, TDev const& dev) + : recoParam_(recoSize, dev), pulseShape_(pulseSize, dev) {} + template >> + HcalRecoParamWithPulseShapeT(size_t recoSize, size_t pulseSize, TQueue const& queue) + : recoParam_(recoSize, queue), pulseShape_(pulseSize, queue) {} + HcalRecoParamWithPulseShapeT(RecoParamCollection reco, PulseShapeCollection pulse) + : recoParam_(std::move(reco)), pulseShape_(std::move(pulse)) {} + + const RecoParamCollection& recoParam() const { return recoParam_; } + const PulseShapeCollection& pulseShape() const { return pulseShape_; } + + typename RecoParamCollection::View recoParamView() { return recoParam_.view(); } + typename PulseShapeCollection::View pulseShapeView() { return pulseShape_.view(); } + + ConstView const_view() const { return ConstView(recoParam_.view(), pulseShape_.view()); } + + private: + RecoParamCollection recoParam_; + PulseShapeCollection pulseShape_; + }; +} // namespace hcal +#endif diff --git a/CondFormats/HcalObjects/interface/HcalSiPMCharacteristicsHost.h b/CondFormats/HcalObjects/interface/HcalSiPMCharacteristicsHost.h new file mode 100644 index 0000000000000..dad50f32a160a --- /dev/null +++ b/CondFormats/HcalObjects/interface/HcalSiPMCharacteristicsHost.h @@ -0,0 +1,12 @@ +#ifndef CondFormats_HcalObjects_interface_HcalSiPMCharacteristicsPortable_h +#define CondFormats_HcalObjects_interface_HcalSiPMCharacteristicsPortable_h + +#include "CondFormats/HcalObjects/interface/HcalSiPMCharacteristicsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +namespace hcal { + using HcalSiPMCharacteristicsPortableHost = PortableHostCollection; +} +#endif diff --git a/CondFormats/HcalObjects/interface/HcalSiPMCharacteristicsSoA.h b/CondFormats/HcalObjects/interface/HcalSiPMCharacteristicsSoA.h new file mode 100644 index 0000000000000..fe423fbf085cf --- /dev/null +++ b/CondFormats/HcalObjects/interface/HcalSiPMCharacteristicsSoA.h @@ -0,0 +1,15 @@ +#ifndef CondFormats_HcalObjects_HcalSiPMCharacteristicsSoA_h +#define CondFormats_HcalObjects_HcalSiPMCharacteristicsSoA_h + +#include "CondFormats/HcalObjects/interface/HcalSiPMCharacteristics.h" + +#include "DataFormats/SoATemplate/interface/SoACommon.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoAView.h" + +namespace hcal { + GENERATE_SOA_LAYOUT(HcalSiPMCharacteristicsSoALayout, + SOA_COLUMN(HcalSiPMCharacteristics::PrecisionItem, precisionItem)) + using HcalSiPMCharacteristicsSoA = HcalSiPMCharacteristicsSoALayout<>; +} // namespace hcal +#endif diff --git a/CondFormats/HcalObjects/interface/alpaka/HcalMahiConditionsDevice.h b/CondFormats/HcalObjects/interface/alpaka/HcalMahiConditionsDevice.h new file mode 100644 index 0000000000000..d96d1ffde98ef --- /dev/null +++ b/CondFormats/HcalObjects/interface/alpaka/HcalMahiConditionsDevice.h @@ -0,0 +1,20 @@ +#ifndef CondFormats_HcalObjects_interface_alpaka_HcalMahiConditionsDevice_h +#define CondFormats_HcalObjects_interface_alpaka_HcalMahiConditionsDevice_h + +#include "CondFormats/HcalObjects/interface/HcalMahiConditionsHost.h" +#include "CondFormats/HcalObjects/interface/HcalMahiConditionsSoA.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + namespace hcal { + + using ::hcal::HcalMahiConditionsPortableHost; + using HcalMahiConditionsPortableDevice = PortableCollection<::hcal::HcalMahiConditionsSoA>; + } // namespace hcal + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif diff --git a/CondFormats/HcalObjects/interface/alpaka/HcalMahiPulseOffsetsDevice.h b/CondFormats/HcalObjects/interface/alpaka/HcalMahiPulseOffsetsDevice.h new file mode 100644 index 0000000000000..1799141c80c91 --- /dev/null +++ b/CondFormats/HcalObjects/interface/alpaka/HcalMahiPulseOffsetsDevice.h @@ -0,0 +1,20 @@ +#ifndef CondFormats_HcalObjects_interface_alpaka_HcalMahiPulseOffsetsPortable_h +#define CondFormats_HcalObjects_interface_alpaka_HcalMahiPulseOffsetsPortable_h + +#include "CondFormats/HcalObjects/interface/HcalMahiPulseOffsetsHost.h" +#include "CondFormats/HcalObjects/interface/HcalMahiPulseOffsetsSoA.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + namespace hcal { + using ::hcal::HcalMahiPulseOffsetsPortableHost; + using HcalMahiPulseOffsetsPortableDevice = PortableCollection<::hcal::HcalMahiPulseOffsetsSoA>; + + } // namespace hcal + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif diff --git a/CondFormats/HcalObjects/interface/alpaka/HcalRecoParamWithPulseShapeDevice.h b/CondFormats/HcalObjects/interface/alpaka/HcalRecoParamWithPulseShapeDevice.h new file mode 100644 index 0000000000000..726904399c36b --- /dev/null +++ b/CondFormats/HcalObjects/interface/alpaka/HcalRecoParamWithPulseShapeDevice.h @@ -0,0 +1,13 @@ +#ifndef CondFormats_HcalObjects_interface_alpaka_HcalRecoParamWithPulseShapeDevice_h +#define CondFormats_HcalObjects_interface_alpaka_HcalRecoParamWithPulseShapeDevice_h + +#include "CondFormats/HcalObjects/interface/HcalRecoParamWithPulseShapeT.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace hcal { + using HcalRecoParamWithPulseShapeDevice = ::hcal::HcalRecoParamWithPulseShapeT; + } +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif diff --git a/CondFormats/HcalObjects/interface/alpaka/HcalSiPMCharacteristicsDevice.h b/CondFormats/HcalObjects/interface/alpaka/HcalSiPMCharacteristicsDevice.h new file mode 100644 index 0000000000000..bdc6d4ea0e7f5 --- /dev/null +++ b/CondFormats/HcalObjects/interface/alpaka/HcalSiPMCharacteristicsDevice.h @@ -0,0 +1,19 @@ +#ifndef CondFormats_HcalObjects_interface_alpaka_HcalSiPMCharacteristicsPortable_h +#define CondFormats_HcalObjects_interface_alpaka_HcalSiPMCharacteristicsPortable_h + +#include "CondFormats/HcalObjects/interface/HcalSiPMCharacteristicsHost.h" +#include "CondFormats/HcalObjects/interface/HcalSiPMCharacteristicsSoA.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + namespace hcal { + using ::hcal::HcalSiPMCharacteristicsPortableHost; + using HcalSiPMCharacteristicsPortableDevice = PortableCollection<::hcal::HcalSiPMCharacteristicsSoA>; + } // namespace hcal + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif diff --git a/CondFormats/HcalObjects/src/ES_HcalMahiConditionsHost.cc b/CondFormats/HcalObjects/src/ES_HcalMahiConditionsHost.cc new file mode 100644 index 0000000000000..0c1cad2385a53 --- /dev/null +++ b/CondFormats/HcalObjects/src/ES_HcalMahiConditionsHost.cc @@ -0,0 +1,4 @@ +#include "CondFormats/HcalObjects/interface/HcalMahiConditionsHost.h" +#include "FWCore/Utilities/interface/typelookup.h" + +TYPELOOKUP_DATA_REG(hcal::HcalMahiConditionsPortableHost); diff --git a/CondFormats/HcalObjects/src/ES_HcalMahiPulseOffsetsHost.cc b/CondFormats/HcalObjects/src/ES_HcalMahiPulseOffsetsHost.cc new file mode 100644 index 0000000000000..1611ee29aa7d8 --- /dev/null +++ b/CondFormats/HcalObjects/src/ES_HcalMahiPulseOffsetsHost.cc @@ -0,0 +1,4 @@ +#include "CondFormats/HcalObjects/interface/HcalMahiPulseOffsetsHost.h" +#include "FWCore/Utilities/interface/typelookup.h" + +TYPELOOKUP_DATA_REG(hcal::HcalMahiPulseOffsetsPortableHost); diff --git a/CondFormats/HcalObjects/src/ES_HcalRecoParamWithPulseShapeHost.cc b/CondFormats/HcalObjects/src/ES_HcalRecoParamWithPulseShapeHost.cc new file mode 100644 index 0000000000000..58cb4fb40e12e --- /dev/null +++ b/CondFormats/HcalObjects/src/ES_HcalRecoParamWithPulseShapeHost.cc @@ -0,0 +1,4 @@ +#include "CondFormats/HcalObjects/interface/HcalRecoParamWithPulseShapeHost.h" +#include "FWCore/Utilities/interface/typelookup.h" + +TYPELOOKUP_DATA_REG(hcal::HcalRecoParamWithPulseShapeHost); diff --git a/CondFormats/HcalObjects/src/ES_HcalSiPMCharacteristicsHost.cc b/CondFormats/HcalObjects/src/ES_HcalSiPMCharacteristicsHost.cc new file mode 100644 index 0000000000000..b5ecde5f22b84 --- /dev/null +++ b/CondFormats/HcalObjects/src/ES_HcalSiPMCharacteristicsHost.cc @@ -0,0 +1,4 @@ +#include "CondFormats/HcalObjects/interface/HcalSiPMCharacteristicsHost.h" +#include "FWCore/Utilities/interface/typelookup.h" + +TYPELOOKUP_DATA_REG(hcal::HcalSiPMCharacteristicsPortableHost); diff --git a/CondFormats/HcalObjects/src/alpaka/ES_HcalMahiConditionDevice.cc b/CondFormats/HcalObjects/src/alpaka/ES_HcalMahiConditionDevice.cc new file mode 100644 index 0000000000000..cfec374296229 --- /dev/null +++ b/CondFormats/HcalObjects/src/alpaka/ES_HcalMahiConditionDevice.cc @@ -0,0 +1,4 @@ +#include "CondFormats/HcalObjects/interface/alpaka/HcalMahiConditionsDevice.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/typelookup.h" + +TYPELOOKUP_ALPAKA_DATA_REG(hcal::HcalMahiConditionsPortableDevice); diff --git a/CondFormats/HcalObjects/src/alpaka/ES_HcalMahiPulseOffsetsDevice.cc b/CondFormats/HcalObjects/src/alpaka/ES_HcalMahiPulseOffsetsDevice.cc new file mode 100644 index 0000000000000..32fa3bb0e3410 --- /dev/null +++ b/CondFormats/HcalObjects/src/alpaka/ES_HcalMahiPulseOffsetsDevice.cc @@ -0,0 +1,4 @@ +#include "CondFormats/HcalObjects/interface/alpaka/HcalMahiPulseOffsetsDevice.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/typelookup.h" + +TYPELOOKUP_ALPAKA_DATA_REG(hcal::HcalMahiPulseOffsetsPortableDevice); diff --git a/CondFormats/HcalObjects/src/alpaka/ES_HcalRecoParamWithPulseShapeDevice.cc b/CondFormats/HcalObjects/src/alpaka/ES_HcalRecoParamWithPulseShapeDevice.cc new file mode 100644 index 0000000000000..ab764f8e82e3a --- /dev/null +++ b/CondFormats/HcalObjects/src/alpaka/ES_HcalRecoParamWithPulseShapeDevice.cc @@ -0,0 +1,4 @@ +#include "CondFormats/HcalObjects/interface/alpaka/HcalRecoParamWithPulseShapeDevice.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/typelookup.h" + +TYPELOOKUP_ALPAKA_TEMPLATED_DATA_REG(hcal::HcalRecoParamWithPulseShapeT); diff --git a/CondFormats/HcalObjects/src/alpaka/ES_HcalSiPMCharacteristicsDevice.cc b/CondFormats/HcalObjects/src/alpaka/ES_HcalSiPMCharacteristicsDevice.cc new file mode 100644 index 0000000000000..8f9b06b1b37dd --- /dev/null +++ b/CondFormats/HcalObjects/src/alpaka/ES_HcalSiPMCharacteristicsDevice.cc @@ -0,0 +1,4 @@ +#include "CondFormats/HcalObjects/interface/alpaka/HcalSiPMCharacteristicsDevice.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/typelookup.h" + +TYPELOOKUP_ALPAKA_DATA_REG(hcal::HcalSiPMCharacteristicsPortableDevice); diff --git a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py index 759c64985b477..bb1417dcbea77 100644 --- a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py +++ b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py @@ -1253,7 +1253,7 @@ def setup_(self, step, stepName, stepDict, k, properties): '--procModifiers': 'alpaka', # alpaka modifier activates customiseHLTForAlpaka }, reco = { - '-s': 'RAW2DIGI:RawToDigi_hcalOnly,RECO:reconstruction_hcalOnlyLegacy+reconstruction_hcalOnly,VALIDATION:@hcalOnlyValidation+pfClusterHBHEOnlyAlpakaComparisonSequence,DQM:@hcalOnly+@hcal2Only', + '-s' : 'RAW2DIGI:RawToDigi_hcalOnly,RECO:reconstruction_hcalOnlyLegacy+reconstruction_hcalOnly,VALIDATION:@hcalOnlyValidation+pfClusterHBHEOnlyAlpakaComparisonSequence,DQM:@hcalOnly+@hcal2Only+hcalOnlyOfflineSourceSequenceAlpaka', '--procModifiers': 'alpaka' }, harvest = { diff --git a/DQM/HcalTasks/python/OfflineSourceSequence_pp.py b/DQM/HcalTasks/python/OfflineSourceSequence_pp.py index 668543aca5d0d..e857dd95f1b45 100644 --- a/DQM/HcalTasks/python/OfflineSourceSequence_pp.py +++ b/DQM/HcalTasks/python/OfflineSourceSequence_pp.py @@ -20,6 +20,8 @@ recHitPreRecoTask.ptype = 1 hcalGPUComparisonTask.ptype = 1 +hcalAlpakaComparisonTask = hcalGPUComparisonTask.clone() + # set the label for Emulator TP Task tpTask.tagEmul = "valHcalTriggerPrimitiveDigis" @@ -40,6 +42,13 @@ rawTask + hcalGPUComparisonTask ) +hcalOnlyOfflineSourceSequenceAlpaka = cms.Sequence( + digiTask + + recHitTask + + rawTask + + hcalAlpakaComparisonTask +) + from Configuration.ProcessModifiers.gpuValidationHcal_cff import gpuValidationHcal gpuValidationHcal.toReplaceWith(hcalOnlyOfflineSourceSequence, hcalOnlyOfflineSourceSequenceGPU) @@ -54,6 +63,7 @@ ) from Configuration.Eras.Modifier_run3_HB_cff import run3_HB +from Configuration.ProcessModifiers.alpaka_cff import alpaka ### reverting the reco tag setting that inherited from run2 run3_HB.toModify(hcalGPUComparisonTask, tagHBHE_ref = "hbhereco@cpu", @@ -62,6 +72,15 @@ run3_HB.toModify(recHitTask, tagHBHE = "hbhereco" ) +(alpaka & run3_HB).toModify(hcalGPUComparisonTask, + tagHBHE_ref = "hbherecoSerial", + tagHBHE_target = "hbhereco" +) +run3_HB.toModify(hcalAlpakaComparisonTask, + tagHBHE_ref = "hbherecoLegacy", + tagHBHE_target = "hbhereco" +) + _phase1_hcalOnlyOfflineSourceSequence = hcalOnlyOfflineSourceSequence.copy() _phase1_hcalOnlyOfflineSourceSequence.replace(recHitPreRecoTask, recHitTask) run3_HB.toReplaceWith(hcalOnlyOfflineSourceSequence, _phase1_hcalOnlyOfflineSourceSequence) diff --git a/DataFormats/Common/src/classes_def.xml b/DataFormats/Common/src/classes_def.xml index c00a37238a4e7..2c963c6f49f1f 100644 --- a/DataFormats/Common/src/classes_def.xml +++ b/DataFormats/Common/src/classes_def.xml @@ -206,4 +206,5 @@ + diff --git a/DataFormats/HcalDigi/BuildFile.xml b/DataFormats/HcalDigi/BuildFile.xml index 5227530aaaa5e..de6b6ceb0a9ee 100644 --- a/DataFormats/HcalDigi/BuildFile.xml +++ b/DataFormats/HcalDigi/BuildFile.xml @@ -1,6 +1,10 @@ + + + + diff --git a/DataFormats/HcalDigi/interface/HcalDigiHostCollection.h b/DataFormats/HcalDigi/interface/HcalDigiHostCollection.h new file mode 100644 index 0000000000000..e7b7e08cfc577 --- /dev/null +++ b/DataFormats/HcalDigi/interface/HcalDigiHostCollection.h @@ -0,0 +1,14 @@ +#ifndef DataFormats_HcalDigi_HcalDigiHostCollection_h +#define DataFormats_HcalDigi_HcalDigiHostCollection_h + +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "DataFormats/HcalDigi/interface/HcalDigiSoA.h" + +namespace hcal { + + // HcalDigiSoA in host memory + using Phase1DigiHostCollection = PortableHostCollection; + using Phase0DigiHostCollection = PortableHostCollection; +} // namespace hcal + +#endif diff --git a/DataFormats/HcalDigi/interface/HcalDigiSoA.h b/DataFormats/HcalDigi/interface/HcalDigiSoA.h new file mode 100644 index 0000000000000..e2424fb141296 --- /dev/null +++ b/DataFormats/HcalDigi/interface/HcalDigiSoA.h @@ -0,0 +1,118 @@ +#ifndef DataFormats_HcalDigi_HcalDigiSoA_h +#define DataFormats_HcalDigi_HcalDigiSoA_h + +//TODO: Use Eigen column for data(?) +//#include +//#include + +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/HcalDigi/interface/HcalDigiCollections.h" + +namespace hcal { + + // FLAVOR_HE_QIE11 = 1; Phase1 upgrade + struct Flavor1 { + static constexpr int WORDS_PER_SAMPLE = 1; + static constexpr int SAMPLES_PER_WORD = 1; + static constexpr int HEADER_WORDS = 1; + + static constexpr uint8_t adc(uint16_t const* const sample_start) { return (*sample_start & 0xff); } + static constexpr uint8_t tdc(uint16_t const* const sample_start) { return (*sample_start >> 8) & 0x3f; } + static constexpr uint8_t soibit(uint16_t const* const sample_start) { return (*sample_start >> 14) & 0x1; } + }; + + // FLAVOR_HB_QIE11 = 3; Phase1 upgrade + struct Flavor3 { + static constexpr int WORDS_PER_SAMPLE = 1; + static constexpr int SAMPLES_PER_WORD = 1; + static constexpr int HEADER_WORDS = 1; + + static constexpr uint8_t adc(uint16_t const* const sample_start) { return (*sample_start & 0xff); } + static constexpr uint8_t tdc(uint16_t const* const sample_start) { return ((*sample_start >> 8) & 0x3); } + static constexpr uint8_t soibit(uint16_t const* const sample_start) { return ((*sample_start >> 14) & 0x1); } + static constexpr uint8_t capid(uint16_t const* const sample_start) { return ((*sample_start >> 10) & 0x3); } + }; + + // FLAVOR_HB_QIE10 = 5; Phase0 + struct Flavor5 { + static constexpr float WORDS_PER_SAMPLE = 0.5; + static constexpr int SAMPLES_PER_WORD = 2; + static constexpr int HEADER_WORDS = 1; + + static constexpr uint8_t adc(uint16_t const* const sample_start, uint8_t const shifter) { + return ((*sample_start >> shifter * 8) & 0x7f); + } + }; + + template + constexpr uint8_t capid_for_sample(uint16_t const* const dfstart, uint32_t const sample) { + auto const capid_first = (*dfstart >> 8) & 0x3; + return (capid_first + sample) & 0x3; // same as % 4 + } + + template <> + constexpr uint8_t capid_for_sample(uint16_t const* const dfstart, uint32_t const sample) { + return Flavor3::capid(dfstart + Flavor3::HEADER_WORDS + sample * Flavor3::WORDS_PER_SAMPLE); + } + + template + constexpr uint8_t soibit_for_sample(uint16_t const* const dfstart, uint32_t const sample) { + return Flavor::soibit(dfstart + Flavor::HEADER_WORDS + sample * Flavor::WORDS_PER_SAMPLE); + } + + template + constexpr uint8_t adc_for_sample(uint16_t const* const dfstart, uint32_t const sample) { + return Flavor::adc(dfstart + Flavor::HEADER_WORDS + sample * Flavor::WORDS_PER_SAMPLE); + } + + template + constexpr uint8_t tdc_for_sample(uint16_t const* const dfstart, uint32_t const sample) { + return Flavor::tdc(dfstart + Flavor::HEADER_WORDS + sample * Flavor::WORDS_PER_SAMPLE); + } + + template <> + constexpr uint8_t adc_for_sample(uint16_t const* const dfstart, uint32_t const sample) { + // avoid using WORDS_PER_SAMPLE and simply shift + return Flavor5::adc(dfstart + Flavor5::HEADER_WORDS + (sample >> 1), sample % 2); + } + + template + constexpr uint32_t compute_stride(uint32_t const nsamples) { + return static_cast(nsamples * Flavor::WORDS_PER_SAMPLE) + Flavor::HEADER_WORDS; + } + + template + constexpr uint32_t compute_nsamples(uint32_t const nwords) { + if constexpr (Flavor::SAMPLES_PER_WORD >= 1) + return (nwords - Flavor::HEADER_WORDS) * Flavor::SAMPLES_PER_WORD; + else + return (nwords - Flavor::HEADER_WORDS) / Flavor::WORDS_PER_SAMPLE; + } + + using QIE11dataArray = edm::StdArray; + using QIE10dataArray = edm::StdArray; + + //using QIE11dataVector = Eigen::Matrix; + //using QIE10dataVector = Eigen::Matrix; + + GENERATE_SOA_LAYOUT(HcalPhase1DigiSoALayout, + SOA_COLUMN(uint32_t, ids), + //SOA_EIGEN_COLUMN(QIE11dataVector, data), + SOA_COLUMN(QIE11dataArray, data), + SOA_SCALAR(uint32_t, stride), + SOA_SCALAR(uint32_t, size)) + GENERATE_SOA_LAYOUT(HcalPhase0DigiSoALayout, + SOA_COLUMN(uint32_t, ids), + SOA_COLUMN(uint32_t, npresamples), + //SOA_EIGEN_COLUMN(QIE10dataVector, data), + SOA_COLUMN(QIE10dataArray, data), + SOA_SCALAR(uint32_t, stride), + SOA_SCALAR(uint32_t, size)) + + using HcalPhase1DigiSoA = HcalPhase1DigiSoALayout<>; + using HcalPhase0DigiSoA = HcalPhase0DigiSoALayout<>; + +} // namespace hcal + +#endif diff --git a/DataFormats/HcalDigi/interface/alpaka/HcalDigiDeviceCollection.h b/DataFormats/HcalDigi/interface/alpaka/HcalDigiDeviceCollection.h new file mode 100644 index 0000000000000..98791230330df --- /dev/null +++ b/DataFormats/HcalDigi/interface/alpaka/HcalDigiDeviceCollection.h @@ -0,0 +1,23 @@ +#ifndef DataFormats_HcalDigi_interface_alpaka_HcalDigiDeviceCollection_h +#define DataFormats_HcalDigi_interface_alpaka_HcalDigiDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/HcalDigi/interface/HcalDigiSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + namespace hcal { + + // make the names from the top-level hcal namespace visible for unqualified lookup + // inside the ALPAKA_ACCELERATOR_NAMESPACE::hcal namespace + using namespace ::hcal; + + // HcalDigiSoA in device global memory + using Phase1DigiDeviceCollection = PortableCollection; + using Phase0DigiDeviceCollection = PortableCollection; + + } // namespace hcal +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif diff --git a/DataFormats/HcalDigi/src/alpaka/classes_cuda.h b/DataFormats/HcalDigi/src/alpaka/classes_cuda.h new file mode 100644 index 0000000000000..9133baede975b --- /dev/null +++ b/DataFormats/HcalDigi/src/alpaka/classes_cuda.h @@ -0,0 +1,4 @@ +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/HcalDigi/interface/HcalDigiSoA.h" +#include "DataFormats/HcalDigi/interface/alpaka/HcalDigiDeviceCollection.h" diff --git a/DataFormats/HcalDigi/src/alpaka/classes_cuda_def.xml b/DataFormats/HcalDigi/src/alpaka/classes_cuda_def.xml new file mode 100644 index 0000000000000..e40162e514ed4 --- /dev/null +++ b/DataFormats/HcalDigi/src/alpaka/classes_cuda_def.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/DataFormats/HcalDigi/src/alpaka/classes_rocm.h b/DataFormats/HcalDigi/src/alpaka/classes_rocm.h new file mode 100644 index 0000000000000..9133baede975b --- /dev/null +++ b/DataFormats/HcalDigi/src/alpaka/classes_rocm.h @@ -0,0 +1,4 @@ +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/HcalDigi/interface/HcalDigiSoA.h" +#include "DataFormats/HcalDigi/interface/alpaka/HcalDigiDeviceCollection.h" diff --git a/DataFormats/HcalDigi/src/alpaka/classes_rocm_def.xml b/DataFormats/HcalDigi/src/alpaka/classes_rocm_def.xml new file mode 100644 index 0000000000000..09a4687fe13f7 --- /dev/null +++ b/DataFormats/HcalDigi/src/alpaka/classes_rocm_def.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/DataFormats/HcalDigi/src/classes.cc b/DataFormats/HcalDigi/src/classes.cc new file mode 100644 index 0000000000000..e6135a6bb2ad3 --- /dev/null +++ b/DataFormats/HcalDigi/src/classes.cc @@ -0,0 +1,5 @@ +#include "DataFormats/HcalDigi/interface/HcalDigiHostCollection.h" +#include "DataFormats/Portable/interface/PortableHostCollectionReadRules.h" + +SET_PORTABLEHOSTCOLLECTION_READ_RULES(hcal::Phase1DigiHostCollection); +SET_PORTABLEHOSTCOLLECTION_READ_RULES(hcal::Phase0DigiHostCollection); diff --git a/DataFormats/HcalDigi/src/classes.h b/DataFormats/HcalDigi/src/classes.h index 0cc57f20a7257..1909bcee06312 100644 --- a/DataFormats/HcalDigi/src/classes.h +++ b/DataFormats/HcalDigi/src/classes.h @@ -6,6 +6,8 @@ #include "DataFormats/HcalDigi/interface/HODataFrame.h" #include "DataFormats/HcalDigi/interface/HcalHistogramDigi.h" #include "DataFormats/HcalDigi/interface/HcalDigiCollections.h" +#include "DataFormats/HcalDigi/interface/HcalDigiHostCollection.h" +#include "DataFormats/HcalDigi/interface/HcalDigiSoA.h" #include "DataFormats/HcalDigi/interface/HcalUnpackerReport.h" #include "DataFormats/HcalDigi/interface/HcalLaserDigi.h" #include "DataFormats/HcalDigi/interface/HcalTTPDigi.h" diff --git a/DataFormats/HcalDigi/src/classes_def.xml b/DataFormats/HcalDigi/src/classes_def.xml index db233bd7f7f0e..b6bb89cb7a7dd 100644 --- a/DataFormats/HcalDigi/src/classes_def.xml +++ b/DataFormats/HcalDigi/src/classes_def.xml @@ -113,4 +113,16 @@ + + + + + + + + + + + + diff --git a/EventFilter/HcalRawToDigi/plugins/BuildFile.xml b/EventFilter/HcalRawToDigi/plugins/BuildFile.xml index d4f72086ce85b..709745e08edaa 100644 --- a/EventFilter/HcalRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/HcalRawToDigi/plugins/BuildFile.xml @@ -25,3 +25,12 @@ + + + + + + + + + diff --git a/EventFilter/HcalRawToDigi/plugins/alpaka/HcalDigisSoAProducer.cc b/EventFilter/HcalRawToDigi/plugins/alpaka/HcalDigisSoAProducer.cc new file mode 100644 index 0000000000000..3299b01a41ecf --- /dev/null +++ b/EventFilter/HcalRawToDigi/plugins/alpaka/HcalDigisSoAProducer.cc @@ -0,0 +1,198 @@ +#include "DataFormats/HcalDigi/interface/HcalDigiCollections.h" +#include "DataFormats/HcalDigi/interface/HcalDigiCollections.h" +#include "DataFormats/HcalDigi/interface/HcalDigiHostCollection.h" +#include "DataFormats/HcalDigi/interface/alpaka/HcalDigiDeviceCollection.h" + +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDGetToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDPutToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/EDProducer.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class HcalDigisSoAProducer : public stream::EDProducer<> { + public: + explicit HcalDigisSoAProducer(edm::ParameterSet const& ps); + ~HcalDigisSoAProducer() override = default; + static void fillDescriptions(edm::ConfigurationDescriptions&); + + private: + void produce(device::Event&, device::EventSetup const&) override; + + private: + // input product tokens + edm::EDGetTokenT hbheDigiToken_; + edm::EDGetTokenT qie11DigiToken_; + + // type aliases + using HostCollectionPhase1 = hcal::Phase1DigiHostCollection; + using HostCollectionPhase0 = hcal::Phase0DigiHostCollection; + + using DeviceCollectionPhase1 = hcal::Phase1DigiDeviceCollection; + using DeviceCollectionPhase0 = hcal::Phase0DigiDeviceCollection; + + // output product tokens + device::EDPutToken digisF01HEToken_; + device::EDPutToken digisF5HBToken_; + device::EDPutToken digisF3HBToken_; + + struct ConfigParameters { + uint32_t maxChannelsF01HE, maxChannelsF5HB, maxChannelsF3HB; + }; + ConfigParameters config_; + }; + + void HcalDigisSoAProducer::fillDescriptions(edm::ConfigurationDescriptions& confDesc) { + edm::ParameterSetDescription desc; + + desc.add("hbheDigisLabel", edm::InputTag("hcalDigis")); + desc.add("qie11DigiLabel", edm::InputTag("hcalDigis")); + desc.add("digisLabelF01HE", std::string{"f01HEDigis"}); + desc.add("digisLabelF5HB", std::string{"f5HBDigis"}); + desc.add("digisLabelF3HB", std::string{"f3HBDigis"}); + desc.add("maxChannelsF01HE", 10000u); + desc.add("maxChannelsF5HB", 10000u); + desc.add("maxChannelsF3HB", 10000u); + + confDesc.addWithDefaultLabel(desc); + } + + HcalDigisSoAProducer::HcalDigisSoAProducer(const edm::ParameterSet& ps) + : hbheDigiToken_{consumes(ps.getParameter("hbheDigisLabel"))}, + qie11DigiToken_{consumes(ps.getParameter("qie11DigiLabel"))}, + digisF01HEToken_{produces(ps.getParameter("digisLabelF01HE"))}, + digisF5HBToken_{produces(ps.getParameter("digisLabelF5HB"))}, + digisF3HBToken_{produces(ps.getParameter("digisLabelF3HB"))} { + config_.maxChannelsF01HE = ps.getParameter("maxChannelsF01HE"); + config_.maxChannelsF5HB = ps.getParameter("maxChannelsF5HB"); + config_.maxChannelsF3HB = ps.getParameter("maxChannelsF3HB"); + } + + void HcalDigisSoAProducer::produce(device::Event& event, device::EventSetup const& setup) { + const auto& hbheDigis = event.get(hbheDigiToken_); + const auto& qie11Digis = event.get(qie11DigiToken_); + + //Get the number of samples in data from the first digi + const int stride = HBHEDataFrame::MAXSAMPLES / 2 + 1; + const int size = hbheDigis.size() * stride; // number of channels * stride + + // stack host memory in the queue + HostCollectionPhase0 hf5_(size, event.queue()); + + // device product + DeviceCollectionPhase0 df5_(size, event.queue()); + + // set SoA_Scalar; + hf5_.view().stride() = stride; + hf5_.view().size() = hbheDigis.size(); + + for (unsigned int i = 0; i < hbheDigis.size(); ++i) { + auto const hbhe = hbheDigis[i]; + auto const id = hbhe.id().rawId(); + auto const presamples = hbhe.presamples(); + uint16_t header_word = (1 << 15) | (0x5 << 12) | (0 << 10) | ((hbhe.sample(0).capid() & 0x3) << 8); + + auto hf5_vi = hf5_.view()[i]; + hf5_vi.ids() = id; + hf5_vi.npresamples() = presamples; + hf5_vi.data()[0] = header_word; + //TODO:: get HEADER_WORDS/WORDS_PER_SAMPLE from DataFormat + for (unsigned int i = 0; i < hf5_.view().stride() - 1; i++) { + uint16_t s0 = (0 << 7) | (static_cast(hbhe.sample(2 * i).adc()) & 0x7f); + uint16_t s1 = (0 << 7) | (static_cast(hbhe.sample(2 * i + 1).adc()) & 0x7f); + uint16_t sample = (s1 << 8) | s0; + hf5_vi.data()[i + 1] = sample; + } + } + // copy hf5 to df5 + alpaka::memcpy(event.queue(), df5_.buffer(), hf5_.const_buffer()); + event.emplace(digisF5HBToken_, std::move(df5_)); + + if (qie11Digis.empty()) { + event.emplace(digisF01HEToken_); + event.emplace(digisF3HBToken_); + + } else { + auto size_f1 = 0; + auto size_f3 = 0; + + // count the size of the SOA; + for (unsigned int i = 0; i < qie11Digis.size(); i++) { + auto const digi = QIE11DataFrame{qie11Digis[i]}; + + if (digi.flavor() == 0 or digi.flavor() == 1) { + if (digi.detid().subdetId() == HcalEndcap) { + size_f1++; + } + } else if (digi.flavor() == 3) { + if (digi.detid().subdetId() == HcalBarrel) { + size_f3++; + } + } + } + auto const nsamples = qie11Digis.samples(); + auto const stride01 = nsamples * QIE11DataFrame::WORDS_PER_SAMPLE + QIE11DataFrame::HEADER_WORDS; + + // stack host memory in the queue + HostCollectionPhase1 hf1_(size_f1, event.queue()); + HostCollectionPhase1 hf3_(size_f3, event.queue()); + + // device product + DeviceCollectionPhase1 df1_(size_f1, event.queue()); + DeviceCollectionPhase1 df3_(size_f3, event.queue()); + + // set SoA_Scalar; + hf1_.view().stride() = stride01; + hf3_.view().stride() = stride01; + + unsigned int i_f1 = 0; //counters for f1 digis + unsigned int i_f3 = 0; //counters for f3 digis + + for (unsigned int i = 0; i < qie11Digis.size(); i++) { + auto const digi = QIE11DataFrame{qie11Digis[i]}; + assert(digi.samples() == qie11Digis.samples() && "collection nsamples must equal per digi samples"); + + if (digi.flavor() == 0 or digi.flavor() == 1) { + if (digi.detid().subdetId() != HcalEndcap) + continue; + auto hf01_vi = hf1_.view()[i_f1]; + + hf01_vi.ids() = digi.detid().rawId(); + for (int hw = 0; hw < QIE11DataFrame::HEADER_WORDS + digi.samples(); hw++) { + hf01_vi.data()[hw] = (qie11Digis[i][hw]); + } + i_f1++; + } else if (digi.flavor() == 3) { + if (digi.detid().subdetId() != HcalBarrel) + continue; + auto hf03_vi = hf3_.view()[i_f3]; + + hf03_vi.ids() = digi.detid().rawId(); + + for (int hw = 0; hw < QIE11DataFrame::HEADER_WORDS + digi.samples(); hw++) { + hf03_vi.data()[hw] = (qie11Digis[i][hw]); + } + i_f3++; + } + } + + hf1_.view().size() = size_f1; + hf3_.view().size() = size_f3; + + alpaka::memcpy(event.queue(), df1_.buffer(), hf1_.const_buffer()); + alpaka::memcpy(event.queue(), df3_.buffer(), hf3_.const_buffer()); + + event.emplace(digisF01HEToken_, std::move(df1_)); + event.emplace(digisF3HBToken_, std::move(df3_)); + } + } +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(HcalDigisSoAProducer); diff --git a/HLTrigger/Configuration/python/customizeHLTforAlpaka.py b/HLTrigger/Configuration/python/customizeHLTforAlpaka.py index 1ed387fe11a6f..619f38cf27ebd 100644 --- a/HLTrigger/Configuration/python/customizeHLTforAlpaka.py +++ b/HLTrigger/Configuration/python/customizeHLTforAlpaka.py @@ -272,6 +272,16 @@ def replaceItemsInSequence(process, seqNames, itemsToReplace, replacingSequence) return process +def customizeHLTforAlpakaPFSoA(process): + + # avoid the conversion from SoA to legacy to SoA for the HCAL rechits + process.hltParticleFlowRecHitHBHESoA.producers[0].src = 'hltHbheRecoSoA' + process.hltParticleFlowRecHitHBHESoASerialSync.producers[0].src = 'hltHbheRecoSoASerialSync' + + del process.hltHbheRecHitSoA + del process.hltHbheRecHitSoASerialSync + + return process ## Pixel HLT in Alpaka def customizeHLTforDQMGPUvsCPUPixel(process): @@ -1090,6 +1100,175 @@ def customizeHLTforAlpakaEcalLocalReco(process): return process +def customizeHLTforAlpakaHcalLocalReco(process): + + ## failsafe for fake menus + if not hasattr(process, 'HLTDoLocalHcalSequence'): + return process + + ## do not re-apply the customization if the menu is already migrated to alpaka + for prod in producers_by_type(process, 'HcalDigisSoAProducer@alpaka'): + return process + + # EventSetup modules + process.load('RecoLocalCalo.HcalRecProducers.hcalMahiConditionsESProducer_cfi') + process.load('RecoLocalCalo.HcalRecProducers.hcalMahiPulseOffsetsESProducer_cfi') + process.load('RecoLocalCalo.HcalRecProducers.hcalSiPMCharacteristicsESProducer_cfi') + process.load('RecoLocalCalo.HcalRecAlgos.hcalRecoParamWithPulseShapeESProducer_cfi') + + # the JobConfigurationGPURecord is provided by the hltESSJobConfigurationGPURecord ESSource + + # convert the HCAL digis to SoA format + from EventFilter.HcalRawToDigi.hcalDigisSoAProducer_cfi import hcalDigisSoAProducer as _hcalDigisSoAProducer + + # convert the HCAL digis to SoA format, and copies them to the device + process.hltHcalDigisSoA = _hcalDigisSoAProducer.clone( + hbheDigisLabel = 'hltHcalDigis', + qie11DigiLabel = 'hltHcalDigis' + ) + + # convert the HCAL digis to SoA format, and copies them to the host + process.hltHcalDigisSoASerialSync = makeSerialClone(process.hltHcalDigisSoA) + + + # run the HCAL local reconstruction (MAHI) and produce the rechits in SoA format + from RecoLocalCalo.HcalRecProducers.hbheRecHitProducerPortable_cfi import hbheRecHitProducerPortable as _hbheRecHitProducerPortable + + # run the HCAL local reconstruction (MAHI) and produce the rechits in SoA format on the device, and optionally copy the rechits to the host + process.hltHbheRecoSoA = _hbheRecHitProducerPortable.clone( + digisLabelF01HE = ('hltHcalDigisSoA', 'f01HEDigis'), + digisLabelF5HB = ('hltHcalDigisSoA', 'f5HBDigis'), + digisLabelF3HB = ('hltHcalDigisSoA', 'f3HBDigis'), + recHitsLabelM0HBHE = '', + mahiPulseOffSets = 'hcalMahiPulseOffsetsESProducer:' + ) + + # run the HCAL local reconstruction (MAHI) and produce the rechits in SoA format on the host + process.hltHbheRecoSoASerialSync = makeSerialClone(process.hltHbheRecoSoA, + digisLabelF01HE = ('hltHcalDigisSoASerialSync', 'f01HEDigis'), + digisLabelF5HB = ('hltHcalDigisSoASerialSync', 'f5HBDigis'), + digisLabelF3HB = ('hltHcalDigisSoASerialSync', 'f3HBDigis'), + ) + + + # convert the rechits in SoA format on the host to legacy format + from RecoLocalCalo.HcalRecProducers.hcalRecHitSoAToLegacy_cfi import hcalRecHitSoAToLegacy as _hcalRecHitSoAToLegacy + + # convert the rechits in SoA format on the host to legacy format + process.hltHbhereco = _hcalRecHitSoAToLegacy.clone( + src = 'hltHbheRecoSoA' + ) + + # convert the rechits in SoA format on the host to legacy format + process.hltHbherecoSerialSync = process.hltHbhereco.clone( + src = 'hltHbheRecoSoASerialSync' + ) + + # update the label of the rechits produced on the host + process.hltTowerMakerForAllSerialSync.hbheInput = "hltHbherecoSerialSync" + process.hltAK4CaloJetsIDPassedSerialSync.JetIDParams.hbheRecHitsColl = "hltHbherecoSerialSync" + process.hltHbheRecHitSoASerialSync.src = "hltHbherecoSerialSync" + process.hltMuonsSerialSync.TrackAssociatorParameters.HBHERecHitCollectionLabel = "hltHbherecoSerialSync" + process.hltMuonsSerialSync.CaloExtractorPSet.TrackAssociatorParameters.HBHERecHitCollectionLabel = "hltHbherecoSerialSync" + process.hltMuonsSerialSync.JetExtractorPSet.TrackAssociatorParameters.HBHERecHitCollectionLabel = "hltHbherecoSerialSync" + + # run the HCAL local reconstruction, potentially offloading the MHI step to the device + process.HLTDoLocalHcalSequence = cms.Sequence( + process.hltHcalDigis + + process.hltHcalDigisSoA + + process.hltHbheRecoSoA + + process.hltHbhereco + + process.hltHfprereco + + process.hltHfreco + + process.hltHoreco ) + + # run the HCAL local reconstruction on the host + process.HLTDoLocalHcalSequenceSerialSync = cms.Sequence( + process.hltHcalDigis + + process.hltHcalDigisSoASerialSync + + process.hltHbheRecoSoASerialSync + + process.hltHbherecoSerialSync + + process.hltHfprereco + + process.hltHfreco + + process.hltHoreco ) + + process.HLTDoCaloSequenceSerialSync = cms.Sequence( + process.HLTDoFullUnpackingEgammaEcalWithoutPreshowerSequenceSerialSync + + process.HLTDoLocalHcalSequenceSerialSync + + process.hltTowerMakerForAllSerialSync ) + + process.HLTDoCaloSequencePFSerialSync = cms.Sequence( + process.HLTDoFullUnpackingEgammaEcalWithoutPreshowerSequenceSerialSync + + process.HLTDoLocalHcalSequenceSerialSync + + process.hltTowerMakerForAllSerialSync ) + + # run the HBHE local reconstruction, potentially offloading the MHI step to the device + process.HLTStoppedHSCPLocalHcalReco = cms.Sequence( + process.hltHcalDigis + + process.hltHcalDigisSoA + + process.hltHbheRecoSoA + + process.hltHbhereco ) + + # PFJet reconstruction running on the host + process.HLTPFHcalClusteringSerialSync = cms.Sequence( + process.hltHbheRecHitSoASerialSync + + process.hltParticleFlowRecHitHBHESoASerialSync + + process.hltParticleFlowRecHitHBHESerialSync + + process.hltParticleFlowClusterHBHESoASerialSync + + process.hltParticleFlowClusterHBHESerialSync + + process.hltParticleFlowClusterHCALSerialSync ) + + # compare the HCAL local reconstruction running on the device and on the host + for pathNameMatch in ['DQM_HcalReconstruction_v', 'DQM_HIHcalReconstruction_v']: + dqmHcalRecoPathName = None + for pathName in process.paths_(): + if pathName.startswith(pathNameMatch): + dqmHcalRecoPath = getattr(process, pathName) + dqmHcalRecoPath.insert(dqmHcalRecoPath.index(process.HLTPFHcalClustering), getattr(process, 'HLTDoLocalHcalSequenceSerialSync')) + for delmod in ['hltHcalConsumerCPU', 'hltHcalConsumerGPU']: + if hasattr(process, delmod): + process.__delattr__(delmod) + + # modify EventContent of *DQMGPUvsCPU streams + for hltOutModMatch in ['hltOutputDQMGPUvsCPU', 'hltOutputHIDQMGPUvsCPU']: + if hasattr(process, hltOutModMatch): + outMod = getattr(process, hltOutModMatch) + outCmds_new = [foo for foo in outMod.outputCommands if 'Hbhe' not in foo] + outCmds_new += [ + 'keep *_hltHbhereco_*_*', + 'keep *_hltHbherecoSerialSync_*_*', + ] + outMod.outputCommands = outCmds_new[:] + + # delete the obsolete modules and tasks + del process.hcalMahiPulseOffsetsGPUESProducer + del process.hcalChannelQualityGPUESProducer + del process.hcalConvertedEffectivePedestalWidthsGPUESProducer + del process.hcalConvertedEffectivePedestalsGPUESProducer + del process.hcalConvertedPedestalWidthsGPUESProducer + del process.hcalConvertedPedestalsGPUESProducer + del process.hcalElectronicsMappingGPUESProducer + del process.hcalGainWidthsGPUESProducer + del process.hcalGainsGPUESProducer + del process.hcalLUTCorrsGPUESProducer + del process.hcalQIECodersGPUESProducer + del process.hcalQIETypesGPUESProducer + del process.hcalRecoParamsWithPulseShapesGPUESProducer + del process.hcalRespCorrsGPUESProducer + del process.hcalSiPMCharacteristicsGPUESProducer + del process.hcalSiPMParametersGPUESProducer + del process.hcalTimeCorrsGPUESProducer + + del process.hltHbherecoLegacy + del process.hltHcalDigisGPU + del process.hltHbherecoGPU + del process.hltHbherecoFromGPU + + del process.HLTDoLocalHcalTask + del process.HLTStoppedHSCPLocalHcalRecoTask + + return process + def customizeHLTforAlpakaStatus(process): if not hasattr(process, 'statusOnGPU'): @@ -1209,7 +1388,9 @@ def customizeHLTforAlpaka(process): process = customizeHLTforAlpakaStatus(process) process = customizeHLTforAlpakaPixelReco(process) process = customizeHLTforAlpakaEcalLocalReco(process) + process = customizeHLTforAlpakaHcalLocalReco(process) process = customizeHLTforAlpakaParticleFlowClustering(process) + process = customizeHLTforAlpakaPFSoA(process) process = customizeHLTforAlpakaRename(process) return process diff --git a/HeterogeneousCore/AlpakaInterface/interface/atomicMaxPair.h b/HeterogeneousCore/AlpakaInterface/interface/atomicMaxPair.h new file mode 100644 index 0000000000000..7daac00750cf6 --- /dev/null +++ b/HeterogeneousCore/AlpakaInterface/interface/atomicMaxPair.h @@ -0,0 +1,38 @@ +#ifndef HeterogeneousCore_AlpakaCore_interface_atomicMaxPair_h +#define HeterogeneousCore_AlpakaCore_interface_atomicMaxPair_h +#include + +#include "FWCore/Utilities/interface/bit_cast.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +// Note: Does not compile with ALPAKA_FN_ACC on ROCm +template >, typename F> +ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void atomicMaxPair(const TAcc& acc, + unsigned long long int* address, + std::pair value, + F comparator) { +#if defined(__CUDA_ARCH__) or defined(__HIP_DEVICE_COMPILE__) + unsigned long long int val = (static_cast(value.first) << 32) + __float_as_uint(value.second); + unsigned long long int ret = *address; + while (comparator(value, + std::pair{static_cast(ret >> 32 & 0xffffffff), + __uint_as_float(ret & 0xffffffff)})) { + unsigned long long int old = ret; + if ((ret = atomicCAS(address, old, val)) == old) + break; + } +#else + unsigned long long int val = + (static_cast(value.first) << 32) + edm::bit_cast(value.second); + unsigned long long int ret = *address; + while (comparator(value, + std::pair{static_cast(ret >> 32 & 0xffffffff), + edm::bit_cast(static_cast(ret & 0xffffffff))})) { + unsigned long long int old = ret; + if ((ret = alpaka::atomicCas(acc, address, old, val)) == old) + break; + } +#endif // __CUDA_ARCH__ or __HIP_DEVICE_COMPILE__ +} + +#endif // HeterogeneousCore_AlpakaCore_interface_atomicMaxPair_h diff --git a/RecoLocalCalo/Configuration/python/hcalGlobalReco_cff.py b/RecoLocalCalo/Configuration/python/hcalGlobalReco_cff.py index a665bb5c1c296..078e8f2804f60 100644 --- a/RecoLocalCalo/Configuration/python/hcalGlobalReco_cff.py +++ b/RecoLocalCalo/Configuration/python/hcalGlobalReco_cff.py @@ -6,24 +6,31 @@ hbhereco = SwitchProducerCUDA( cpu = _phase0_hbhereco.clone() ) +hbherecoLegacy = _phase0_hbhereco.clone() + + hcalGlobalRecoTask = cms.Task(hbhereco) hcalGlobalRecoSequence = cms.Sequence(hcalGlobalRecoTask) hcalOnlyGlobalRecoTask = cms.Task() hcalOnlyGlobalRecoSequence = cms.Sequence(hcalOnlyGlobalRecoTask) +#-- Legacy HCAL Only Task +hcalOnlyLegacyGlobalRecoTask = cms.Task() + #--- for Run 3 and later from Configuration.Eras.Modifier_run3_HB_cff import run3_HB from RecoLocalCalo.HcalRecProducers.HBHEPhase1Reconstructor_cfi import hbheprereco as _phase1_hbheprereco run3_HB.toReplaceWith(hbhereco.cpu, _phase1_hbheprereco) run3_HB.toReplaceWith(hcalOnlyGlobalRecoTask, cms.Task(hbhereco)) +run3_HB.toReplaceWith(hbherecoLegacy, _phase1_hbheprereco) +run3_HB.toReplaceWith(hcalOnlyLegacyGlobalRecoTask, cms.Task(hbherecoLegacy)) -#-- Legacy HCAL Only Task -hcalOnlyLegacyGlobalRecoTask = hcalOnlyGlobalRecoTask.copy() #--- for Run 3 on GPU from Configuration.ProcessModifiers.gpu_cff import gpu +from Configuration.ProcessModifiers.alpaka_cff import alpaka from RecoLocalCalo.HcalRecProducers.hcalCPURecHitsProducer_cfi import hcalCPURecHitsProducer as _hbherecoFromCUDA (run3_HB & gpu).toModify(hbhereco, @@ -31,3 +38,16 @@ produceSoA = False ) ) + +from RecoLocalCalo.HcalRecProducers.hcalRecHitSoAToLegacy_cfi import hcalRecHitSoAToLegacy +(alpaka & run3_HB).toModify(hbhereco, + cpu = hcalRecHitSoAToLegacy.clone( + src = ("hbheRecHitProducerPortable","") + ) +) + +hbherecoSerial = hcalRecHitSoAToLegacy.clone( + src = ("hbheRecHitProducerSerial","") +) +alpaka.toReplaceWith(hcalGlobalRecoTask, hcalGlobalRecoTask.copyAndAdd(hbherecoSerial)) +alpaka.toReplaceWith(hcalOnlyGlobalRecoTask, hcalOnlyGlobalRecoTask.copyAndAdd(hbherecoSerial)) diff --git a/RecoLocalCalo/Configuration/python/hcalLocalReco_cff.py b/RecoLocalCalo/Configuration/python/hcalLocalReco_cff.py index 87b829c77a12b..feffe2e21b3fe 100644 --- a/RecoLocalCalo/Configuration/python/hcalLocalReco_cff.py +++ b/RecoLocalCalo/Configuration/python/hcalLocalReco_cff.py @@ -52,15 +52,17 @@ from Configuration.ProcessModifiers.run2_HECollapse_2018_cff import run2_HECollapse_2018 run2_HECollapse_2018.toReplaceWith(hcalLocalRecoTask, _collapse_hcalLocalRecoTask) +#--- Legacy HCAL Only Task +hbheprerecoLegacy = hbheprereco.cpu.clone() +hcalOnlyLegacyLocalRecoTask = hcalLocalRecoTask.copyAndExclude([zdcreco,hbheprereco]) +hcalOnlyLegacyLocalRecoTask.add(hbheprerecoLegacy) + #--- for Run 3 and later _run3_hcalLocalRecoTask = _phase1_hcalLocalRecoTask.copy() _run3_hcalLocalRecoTask.remove(hbheprereco) from Configuration.Eras.Modifier_run3_HB_cff import run3_HB run3_HB.toReplaceWith(hcalLocalRecoTask, _run3_hcalLocalRecoTask) -#--- Legacy HCAL Only Task -hcalOnlyLegacyLocalRecoTask = hcalLocalRecoTask.copyAndExclude([zdcreco]) - #--- for Run 3 on GPU from Configuration.ProcessModifiers.gpu_cff import gpu @@ -69,6 +71,13 @@ _run3_hcalLocalRecoGPUTask.add(hbheRecHitProducerGPUTask) gpu.toReplaceWith(hcalLocalRecoTask, _run3_hcalLocalRecoGPUTask) +#--- for alpaka +from Configuration.ProcessModifiers.alpaka_cff import alpaka +from RecoLocalCalo.HcalRecProducers.hbheRecHitProducerPortableTask_cff import * +_run3_hcalLocalRecoPortableTask = hcalLocalRecoTask.copy() +_run3_hcalLocalRecoPortableTask.add(hbheRecHitProducerPortableTask) +alpaka.toReplaceWith(hcalLocalRecoTask, _run3_hcalLocalRecoPortableTask) + #--- HCAL-only workflow hcalOnlyLocalRecoTask = hcalLocalRecoTask.copyAndExclude([zdcreco]) @@ -79,6 +88,11 @@ produceSoA = False ) ) +#--- HCAL-only workflow for Run 2 on GPU +from RecoLocalCalo.HcalRecProducers.hcalRecHitSoAToLegacy_cfi import hcalRecHitSoAToLegacy +(alpaka & ~run3_HB).toModify(hbheprereco, + cpu = hcalRecHitSoAToLegacy.clone() +) #--- for FastSim _fastSim_hcalLocalRecoTask = hcalLocalRecoTask.copyAndExclude([zdcreco]) diff --git a/RecoLocalCalo/HcalRecAlgos/plugins/BuildFile.xml b/RecoLocalCalo/HcalRecAlgos/plugins/BuildFile.xml index dcf7f83ae8db1..5d9d2b9f969cc 100644 --- a/RecoLocalCalo/HcalRecAlgos/plugins/BuildFile.xml +++ b/RecoLocalCalo/HcalRecAlgos/plugins/BuildFile.xml @@ -5,3 +5,11 @@ + + + + + + + + diff --git a/RecoLocalCalo/HcalRecAlgos/plugins/alpaka/HcalRecoParamsWithPulseShapesESProducer.cc b/RecoLocalCalo/HcalRecAlgos/plugins/alpaka/HcalRecoParamsWithPulseShapesESProducer.cc new file mode 100644 index 0000000000000..49df0ad42749d --- /dev/null +++ b/RecoLocalCalo/HcalRecAlgos/plugins/alpaka/HcalRecoParamsWithPulseShapesESProducer.cc @@ -0,0 +1,113 @@ +#include "FWCore/Framework/interface/ESTransientHandle.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#include "CondFormats/HcalObjects/interface/HcalRecoParamWithPulseShapeHost.h" +#include "CondFormats/HcalObjects/interface/HcalRecoParamWithPulseShapeSoA.h" +#include "CondFormats/DataRecord/interface/HcalRecoParamsRcd.h" + +#include "CalibCalorimetry/HcalAlgos/interface/HcalPulseShapes.h" +#include "RecoLocalCalo/HcalRecAlgos/interface/PulseShapeFunctor.h" +#include "RecoLocalCalo/HcalRecAlgos/interface/HcalConstants.h" + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESGetToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + class HcalRecoParamWithPulseShapeESProducer : public ESProducer { + public: + HcalRecoParamWithPulseShapeESProducer(edm::ParameterSet const& iConfig) : ESProducer(iConfig) { + auto cc = setWhatProduced(this); + recoParamsToken_ = cc.consumes(); + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + descriptions.addWithDefaultLabel(desc); + } + + std::unique_ptr produce(HcalRecoParamsRcd const& iRecord) { + auto const& recoParams = iRecord.get(recoParamsToken_); + + auto const containers = recoParams.getAllContainers(); + size_t const totalChannels = + recoParams.getAllContainers()[0].second.size() + recoParams.getAllContainers()[1].second.size(); + + //Get unique ids + HcalPulseShapes pulseShapes; + std::unordered_map idCache; // + + auto const& barrelValues = containers[0].second; + for (uint64_t i = 0; i < barrelValues.size(); ++i) { + auto const pulseShapeId = barrelValues[i].pulseShapeID(); + if (pulseShapeId == 0) + continue; + if (auto const iter = idCache.find(pulseShapeId); iter == idCache.end()) { + idCache[pulseShapeId] = idCache.size(); + } + } + auto const& endcapValues = containers[1].second; + for (uint64_t i = 0; i < endcapValues.size(); ++i) { + auto const pulseShapeId = endcapValues[i].pulseShapeID(); + if (pulseShapeId == 0) + continue; + if (auto const iter = idCache.find(pulseShapeId); iter == idCache.end()) { + idCache[pulseShapeId] = idCache.size(); + } + } + + // Fill products + auto product = std::make_unique( + totalChannels, idCache.size(), cms::alpakatools::host()); + auto recoView = product->recoParamView(); + auto pulseShapeView = product->pulseShapeView(); + for (uint64_t i = 0; i < barrelValues.size(); ++i) { + auto vi = recoView[i]; + vi.param1() = barrelValues[i].param1(); + vi.param2() = barrelValues[i].param2(); + vi.ids() = (barrelValues[i].pulseShapeID() == 0) + ? 0 + : idCache.at(barrelValues[i].pulseShapeID()); //idx of the pulseShape of channel i + } + // fill in endcap + auto const offset = barrelValues.size(); + for (uint64_t i = 0; i < endcapValues.size(); ++i) { + auto vi = recoView[i + offset]; + vi.param1() = endcapValues[i].param1(); + vi.param2() = endcapValues[i].param2(); + vi.ids() = (endcapValues[i].pulseShapeID() == 0) + ? 0 + : idCache.at(endcapValues[i].pulseShapeID()); //idx of the pulseShape of channel i + } + + //fill pulseShape views + for (auto& it : idCache) { + auto const pulseShapeId = it.first; + auto const arrId = it.second; + auto const& pulseShape = pulseShapes.getShape(pulseShapeId); + FitterFuncs::PulseShapeFunctor functor{pulseShape, false, false, false, 1, 0, 0, hcal::constants::maxSamples}; + + for (int i = 0; i < hcal::constants::maxPSshapeBin; i++) { + pulseShapeView[arrId].acc25nsVec()[i] = functor.acc25nsVec()[i]; + pulseShapeView[arrId].diff25nsItvlVec()[i] = functor.diff25nsItvlVec()[i]; + } + for (int i = 0; i < hcal::constants::nsPerBX; i++) { + pulseShapeView[arrId].accVarLenIdxMinusOneVec()[i] = functor.accVarLenIdxMinusOneVec()[i]; + pulseShapeView[arrId].diffVarItvlIdxMinusOneVec()[i] = functor.diffVarItvlIdxMinusOneVec()[i]; + pulseShapeView[arrId].accVarLenIdxZEROVec()[i] = functor.accVarLenIdxZEROVec()[i]; + pulseShapeView[arrId].diffVarItvlIdxZEROVec()[i] = functor.diffVarItvlIdxZEROVec()[i]; + } + } + + return product; + } + + private: + edm::ESGetToken recoParamsToken_; + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(HcalRecoParamWithPulseShapeESProducer); diff --git a/RecoLocalCalo/HcalRecProducers/plugins/BuildFile.xml b/RecoLocalCalo/HcalRecProducers/plugins/BuildFile.xml new file mode 100644 index 0000000000000..351449b0d24e1 --- /dev/null +++ b/RecoLocalCalo/HcalRecProducers/plugins/BuildFile.xml @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/RecoLocalCalo/HcalRecProducers/plugins/alpaka/HBHERecHitProducerPortable.cc b/RecoLocalCalo/HcalRecProducers/plugins/alpaka/HBHERecHitProducerPortable.cc new file mode 100644 index 0000000000000..6575b6d95964d --- /dev/null +++ b/RecoLocalCalo/HcalRecProducers/plugins/alpaka/HBHERecHitProducerPortable.cc @@ -0,0 +1,167 @@ +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#include "CondFormats/DataRecord/interface/HcalSiPMCharacteristicsRcd.h" +#include "CondFormats/HcalObjects/interface/alpaka/HcalSiPMCharacteristicsDevice.h" +#include "CondFormats/DataRecord/interface/HcalMahiConditionsRcd.h" +#include "CondFormats/HcalObjects/interface/alpaka/HcalMahiConditionsDevice.h" +#include "CondFormats/DataRecord/interface/HcalRecoParamsRcd.h" +#include "CondFormats/HcalObjects/interface/alpaka/HcalRecoParamWithPulseShapeDevice.h" +#include "CondFormats/HcalObjects/interface/alpaka/HcalMahiPulseOffsetsDevice.h" + +#include "DataFormats/HcalDigi/interface/alpaka/HcalDigiDeviceCollection.h" +#include "DataFormats/HcalRecHit/interface/alpaka/HcalRecHitDeviceCollection.h" + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDGetToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDPutToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/EDProducer.h" +#include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h" + +#include "Mahi.h" +#include "CalibCalorimetry/HcalAlgos/interface/HcalTimeSlew.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class HBHERecHitProducerPortable : public stream::EDProducer<> { + public: + explicit HBHERecHitProducerPortable(edm::ParameterSet const&); + ~HBHERecHitProducerPortable() override = default; + static void fillDescriptions(edm::ConfigurationDescriptions&); + + private: + void produce(device::Event&, device::EventSetup const&) override; + + using IProductTypef01 = hcal::Phase1DigiDeviceCollection; + const device::EDGetToken digisTokenF01HE_; + + using IProductTypef5 = hcal::Phase0DigiDeviceCollection; + const device::EDGetToken digisTokenF5HB_; + + using IProductTypef3 = hcal::Phase1DigiDeviceCollection; + const device::EDGetToken digisTokenF3HB_; + + using OProductType = hcal::RecHitDeviceCollection; + const device::EDPutToken rechitsM0Token_; + + const device::ESGetToken mahiConditionsToken_; + const device::ESGetToken + sipmCharacteristicsToken_; + const device::ESGetToken recoParamsToken_; + const device::ESGetToken mahiPulseOffsetsToken_; + // + + hcal::reconstruction::ConfigParameters configParameters_; + }; + + HBHERecHitProducerPortable::HBHERecHitProducerPortable(edm::ParameterSet const& ps) + : digisTokenF01HE_{consumes(ps.getParameter("digisLabelF01HE"))}, + digisTokenF5HB_{consumes(ps.getParameter("digisLabelF5HB"))}, + digisTokenF3HB_{consumes(ps.getParameter("digisLabelF3HB"))}, + rechitsM0Token_{produces()}, + mahiConditionsToken_{esConsumes()}, + sipmCharacteristicsToken_{esConsumes()}, + recoParamsToken_{esConsumes()}, + mahiPulseOffsetsToken_{esConsumes(ps.getParameter("mahiPulseOffSets"))} { + configParameters_.maxTimeSamples = ps.getParameter("maxTimeSamples"); + configParameters_.kprep1dChannelsPerBlock = ps.getParameter("kprep1dChannelsPerBlock"); + configParameters_.sipmQTSShift = ps.getParameter("sipmQTSShift"); + configParameters_.sipmQNTStoSum = ps.getParameter("sipmQNTStoSum"); + configParameters_.firstSampleShift = ps.getParameter("firstSampleShift"); + //TODO: produce only pedestals_width or convertedPedestalWidths depending on this bool + configParameters_.useEffectivePedestals = ps.getParameter("useEffectivePedestals"); + + configParameters_.meanTime = ps.getParameter("meanTime"); + configParameters_.timeSigmaSiPM = ps.getParameter("timeSigmaSiPM"); + configParameters_.timeSigmaHPD = ps.getParameter("timeSigmaHPD"); + configParameters_.ts4Thresh = ps.getParameter("ts4Thresh"); + + configParameters_.applyTimeSlew = ps.getParameter("applyTimeSlew"); + auto const tzeroValues = ps.getParameter>("tzeroTimeSlewParameters"); + auto const slopeValues = ps.getParameter>("slopeTimeSlewParameters"); + auto const tmaxValues = ps.getParameter>("tmaxTimeSlewParameters"); + + configParameters_.tzeroTimeSlew = tzeroValues[HcalTimeSlew::Medium]; + configParameters_.slopeTimeSlew = slopeValues[HcalTimeSlew::Medium]; + configParameters_.tmaxTimeSlew = tmaxValues[HcalTimeSlew::Medium]; + + auto threadsMinimize = ps.getParameter>("kernelMinimizeThreads"); + configParameters_.kernelMinimizeThreads[0] = threadsMinimize[0]; + configParameters_.kernelMinimizeThreads[1] = threadsMinimize[1]; + configParameters_.kernelMinimizeThreads[2] = threadsMinimize[2]; + } + + void HBHERecHitProducerPortable::fillDescriptions(edm::ConfigurationDescriptions& cdesc) { + edm::ParameterSetDescription desc; + desc.add("mahiPulseOffSets", edm::ESInputTag("")); + desc.add("maxTimeSamples", 10); + desc.add("kprep1dChannelsPerBlock", 32); + desc.add("digisLabelF01HE", edm::InputTag{"hcalRawToDigiGPU", "f01HEDigisGPU"}); + desc.add("digisLabelF5HB", edm::InputTag{"hcalRawToDigiGPU", "f5HBDigisGPU"}); + desc.add("digisLabelF3HB", edm::InputTag{"hcalRawToDigiGPU", "f3HBDigisGPU"}); + desc.add("recHitsLabelM0HBHE", "recHitsM0HBHE"); + desc.add("sipmQTSShift", 0); + desc.add("sipmQNTStoSum", 3); + desc.add("firstSampleShift", 0); + desc.add("useEffectivePedestals", true); + + desc.add("meanTime", 0.f); + desc.add("timeSigmaSiPM", 2.5f); + desc.add("timeSigmaHPD", 5.0f); + desc.add("ts4Thresh", 0.0); + + desc.add("applyTimeSlew", true); + desc.add>("tzeroTimeSlewParameters", {23.960177, 11.977461, 9.109694}); + desc.add>("slopeTimeSlewParameters", {-3.178648, -1.5610227, -1.075824}); + desc.add>("tmaxTimeSlewParameters", {16.00, 10.00, 6.25}); + desc.add>("kernelMinimizeThreads", {16, 1, 1}); + + cdesc.addWithDefaultLabel(desc); + } + + void HBHERecHitProducerPortable::produce(device::Event& event, device::EventSetup const& setup) { + auto& queue = event.queue(); + + // get device collections from event + auto const& f01HEDigisDev = event.get(digisTokenF01HE_); + auto const& f5HBDigisDev = event.get(digisTokenF5HB_); + auto const& f3HBDigisDev = event.get(digisTokenF3HB_); + + auto const f01DigisSize = f01HEDigisDev->metadata().size(); + auto const f5DigisSize = f5HBDigisDev->metadata().size(); + auto const f3DigisSize = f3HBDigisDev->metadata().size(); + + auto const totalChannels = f01DigisSize + f5DigisSize + f3DigisSize; + OProductType outputGPU_{totalChannels, queue}; + + if (totalChannels > 0) { + // conditions + auto const& mahiConditionsDev = setup.getData(mahiConditionsToken_); + auto const& sipmCharacteristicsDev = setup.getData(sipmCharacteristicsToken_); + auto const& recoParamsWithPulseShapeDev = setup.getData(recoParamsToken_); + auto const& mahiPulseOffsetsDev = setup.getData(mahiPulseOffsetsToken_); + + // + // schedule algorithms + // + hcal::reconstruction::runMahiAsync(queue, + f01HEDigisDev.const_view(), + f5HBDigisDev.const_view(), + f3HBDigisDev.const_view(), + outputGPU_.view(), + mahiConditionsDev.const_view(), + sipmCharacteristicsDev.const_view(), + recoParamsWithPulseShapeDev.const_view(), + mahiPulseOffsetsDev.const_view(), + configParameters_); + } + //put into the event + event.emplace(rechitsM0Token_, std::move(outputGPU_)); + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(HBHERecHitProducerPortable); diff --git a/RecoLocalCalo/HcalRecProducers/plugins/alpaka/HcalMahiConditionsESProducer.cc b/RecoLocalCalo/HcalRecProducers/plugins/alpaka/HcalMahiConditionsESProducer.cc new file mode 100644 index 0000000000000..7a5d0e0291061 --- /dev/null +++ b/RecoLocalCalo/HcalRecProducers/plugins/alpaka/HcalMahiConditionsESProducer.cc @@ -0,0 +1,460 @@ +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/Utilities/interface/ESGetToken.h" + +#include "CondFormats/DataRecord/interface/HcalRecoParamsRcd.h" +#include "CondFormats/DataRecord/interface/HcalPedestalsRcd.h" +#include "CondFormats/DataRecord/interface/HcalGainsRcd.h" +#include "CondFormats/DataRecord/interface/HcalLUTCorrsRcd.h" +#include "CondFormats/DataRecord/interface/HcalRespCorrsRcd.h" +#include "CondFormats/DataRecord/interface/HcalTimeCorrsRcd.h" +#include "CondFormats/DataRecord/interface/HcalPedestalWidthsRcd.h" +#include "CondFormats/DataRecord/interface/HcalGainWidthsRcd.h" +#include "CondFormats/DataRecord/interface/HcalChannelQualityRcd.h" +#include "CondFormats/DataRecord/interface/HcalQIETypesRcd.h" +#include "CondFormats/DataRecord/interface/HcalQIEDataRcd.h" +#include "CondFormats/DataRecord/interface/HcalSiPMParametersRcd.h" +#include "CondFormats/HcalObjects/interface/HcalRecoParams.h" +#include "CondFormats/HcalObjects/interface/HcalPedestals.h" +#include "CondFormats/HcalObjects/interface/HcalGains.h" +#include "CondFormats/HcalObjects/interface/HcalLUTCorrs.h" +#include "CondFormats/HcalObjects/interface/HcalRespCorrs.h" +#include "CondFormats/HcalObjects/interface/HcalTimeCorrs.h" +#include "CondFormats/HcalObjects/interface/HcalPedestalWidths.h" +#include "CondFormats/HcalObjects/interface/HcalGainWidths.h" +#include "CondFormats/HcalObjects/interface/HcalChannelQuality.h" +#include "CondFormats/HcalObjects/interface/HcalQIETypes.h" +#include "CondFormats/HcalObjects/interface/HcalQIEData.h" +#include "CondFormats/HcalObjects/interface/HcalSiPMParameters.h" + +#include "Geometry/CaloTopology/interface/HcalTopology.h" +#include "Geometry/HcalCommonData/interface/HcalDDDRecConstants.h" + +#include "CondFormats/HcalObjects/interface/alpaka/HcalMahiConditionsDevice.h" +#include "CondFormats/HcalObjects/interface/HcalMahiConditionsSoA.h" +#include "CondFormats/DataRecord/interface/HcalMahiConditionsRcd.h" + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace { + float convertPedWidths( + float const value, float const width, int const i, HcalQIECoder const& coder, HcalQIEShape const& shape) { + float const y = value; + float const x = width; + unsigned const x1 = static_cast(std::floor(y)); + unsigned const x2 = static_cast(std::floor(y + 1.)); + unsigned iun = static_cast(i); + float const y1 = coder.charge(shape, x1, iun); + float const y2 = coder.charge(shape, x2, iun); + return (y2 - y1) * x; + } + float convertPed(float const x, int const i, HcalQIECoder const& coder, HcalQIEShape const& shape) { + int const x1 = static_cast(std::floor(x)); + int const x2 = static_cast(std::floor(x + 1)); + float const y2 = coder.charge(shape, x2, i); + float const y1 = coder.charge(shape, x1, i); + return (y2 - y1) * (x - x1) + y1; + } + } // namespace + + class HcalMahiConditionsESProducer : public ESProducer { + public: + HcalMahiConditionsESProducer(edm::ParameterSet const& iConfig) : ESProducer(iConfig) { + auto cc = setWhatProduced(this); + recoParamsToken_ = cc.consumes(); + pedestalsToken_ = cc.consumes(); + effectivePedestalsToken_ = cc.consumes(edm::ESInputTag{"", "withTopoEff"}); + gainsToken_ = cc.consumes(); + lutCorrsToken_ = cc.consumes(); + respCorrsToken_ = cc.consumes(); + timeCorrsToken_ = cc.consumes(); + pedestalWidthsToken_ = cc.consumes(); + effectivePedestalWidthsToken_ = cc.consumes(edm::ESInputTag{"", "withTopoEff"}); + gainWidthsToken_ = cc.consumes(); + channelQualityToken_ = cc.consumes(); + qieTypesToken_ = cc.consumes(); + qieDataToken_ = cc.consumes(); + sipmParametersToken_ = cc.consumes(); + topologyToken_ = cc.consumes(); + recConstantsToken_ = cc.consumes(); + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + descriptions.addWithDefaultLabel(desc); + } + + std::unique_ptr produce(HcalMahiConditionsRcd const& iRecord) { + auto const& recoParams = iRecord.get(recoParamsToken_); + auto const& pedestals = iRecord.get(pedestalsToken_); + auto const& effectivePedestals = iRecord.get(effectivePedestalsToken_); + auto const& gains = iRecord.get(gainsToken_); + auto const& lutCorrs = iRecord.get(lutCorrsToken_); + auto const& respCorrs = iRecord.get(respCorrsToken_); + auto const& timeCorrs = iRecord.get(timeCorrsToken_); + auto const& pedestalWidths = iRecord.get(pedestalWidthsToken_); + auto const& effectivePedestalWidths = iRecord.get(effectivePedestalWidthsToken_); + auto const& gainWidths = iRecord.get(gainWidthsToken_); + auto const& channelQuality = iRecord.get(channelQualityToken_); + auto const& qieTypes = iRecord.get(qieTypesToken_); + auto const& qieData = iRecord.get(qieDataToken_); + auto const& sipmParameters = iRecord.get(sipmParametersToken_); + auto const& topology = iRecord.get(topologyToken_); + auto const& recConstants = iRecord.get(recConstantsToken_); + + size_t const totalChannels = + pedestals.getAllContainers()[0].second.size() + pedestals.getAllContainers()[1].second.size(); + + auto product = std::make_unique(totalChannels, cms::alpakatools::host()); + + auto view = product->view(); + + // convert pedestals + auto const ped_unitIsADC = pedestals.isADC(); + auto const effPed_unitIsADC = effectivePedestals.isADC(); + + // fill HB channels + auto const recoParams_containers = recoParams.getAllContainers(); + auto const pedestals_containers = pedestals.getAllContainers(); + auto const effectivePedestals_containers = effectivePedestals.getAllContainers(); + auto const gains_containers = gains.getAllContainers(); + auto const lutCorrs_containers = lutCorrs.getAllContainers(); + auto const respCorrs_containers = respCorrs.getAllContainers(); + auto const timeCorrs_containers = timeCorrs.getAllContainers(); + auto const pedestalWidths_containers = pedestalWidths.getAllContainers(); + auto const effectivePedestalWidths_containers = effectivePedestalWidths.getAllContainers(); + auto const gainWidths_containers = gainWidths.getAllContainers(); + auto const channelQuality_containers = channelQuality.getAllContainers(); + auto const qieTypes_containers = qieTypes.getAllContainers(); + auto const qieData_containers = qieData.getAllContainers(); + auto const sipmParameters_containers = sipmParameters.getAllContainers(); + + auto const& pedestals_barrel = pedestals_containers[0].second; + auto const& effectivePedestals_barrel = effectivePedestals_containers[0].second; + auto const& gains_barrel = gains_containers[0].second; + auto const& lutCorrs_barrel = lutCorrs_containers[0].second; + auto const& respCorrs_barrel = respCorrs_containers[0].second; + auto const& timeCorrs_barrel = timeCorrs_containers[0].second; + auto const& pedestalWidths_barrel = pedestalWidths_containers[0].second; + auto const& effectivePedestalWidths_barrel = effectivePedestalWidths_containers[0].second; + auto const& gainWidths_barrel = gainWidths_containers[0].second; + auto const& channelQuality_barrel = channelQuality_containers[0].second; + auto const& qieTypes_barrel = qieTypes_containers[0].second; + auto const& qieData_barrel = qieData_containers[0].second; + auto const& sipmParameters_barrel = sipmParameters_containers[0].second; + + for (uint64_t i = 0; i < pedestals_barrel.size(); ++i) { + auto vi = view[i]; + + // convert pedestals + auto const& qieCoder = qieData_barrel[i]; + auto const qieType = qieTypes_barrel[i].getValue() > 1 ? 1 : 0; + auto const& qieShape = qieData.getShape(qieType); + + // covert pedestal values if unit is ADC + vi.pedestals_value()[0] = ped_unitIsADC ? convertPed(pedestals_barrel[i].getValue(0), 0, qieCoder, qieShape) + : pedestals_barrel[i].getValue(0); + vi.pedestals_value()[1] = ped_unitIsADC ? convertPed(pedestals_barrel[i].getValue(1), 1, qieCoder, qieShape) + : pedestals_barrel[i].getValue(1); + vi.pedestals_value()[2] = ped_unitIsADC ? convertPed(pedestals_barrel[i].getValue(2), 2, qieCoder, qieShape) + : pedestals_barrel[i].getValue(2); + vi.pedestals_value()[3] = ped_unitIsADC ? convertPed(pedestals_barrel[i].getValue(3), 3, qieCoder, qieShape) + : pedestals_barrel[i].getValue(3); + + vi.pedestals_width()[0] = + ped_unitIsADC + ? convertPedWidths( + pedestals_barrel[i].getValue(0), pedestalWidths_barrel[i].getWidth(0), 0, qieCoder, qieShape) + : pedestalWidths_barrel[i].getWidth(0); + vi.pedestals_width()[1] = + ped_unitIsADC + ? convertPedWidths( + pedestals_barrel[i].getValue(1), pedestalWidths_barrel[i].getWidth(1), 1, qieCoder, qieShape) + : pedestalWidths_barrel[i].getWidth(1); + vi.pedestals_width()[2] = + ped_unitIsADC + ? convertPedWidths( + pedestals_barrel[i].getValue(2), pedestalWidths_barrel[i].getWidth(2), 2, qieCoder, qieShape) + : pedestalWidths_barrel[i].getWidth(2); + vi.pedestals_width()[3] = + ped_unitIsADC + ? convertPedWidths( + pedestals_barrel[i].getValue(3), pedestalWidths_barrel[i].getWidth(3), 3, qieCoder, qieShape) + : pedestalWidths_barrel[i].getWidth(3); + + vi.effectivePedestals()[0] = effPed_unitIsADC + ? convertPed(effectivePedestals_barrel[i].getValue(0), 0, qieCoder, qieShape) + : effectivePedestals_barrel[i].getValue(0); + vi.effectivePedestals()[1] = effPed_unitIsADC + ? convertPed(effectivePedestals_barrel[i].getValue(1), 1, qieCoder, qieShape) + : effectivePedestals_barrel[i].getValue(1); + vi.effectivePedestals()[2] = effPed_unitIsADC + ? convertPed(effectivePedestals_barrel[i].getValue(2), 2, qieCoder, qieShape) + : effectivePedestals_barrel[i].getValue(2); + vi.effectivePedestals()[3] = effPed_unitIsADC + ? convertPed(effectivePedestals_barrel[i].getValue(3), 3, qieCoder, qieShape) + : effectivePedestals_barrel[i].getValue(3); + + vi.effectivePedestalWidths()[0] = effPed_unitIsADC + ? convertPedWidths(effectivePedestals_barrel[i].getValue(0), + effectivePedestalWidths_barrel[i].getWidth(0), + 0, + qieCoder, + qieShape) + : effectivePedestalWidths_barrel[i].getWidth(0); + vi.effectivePedestalWidths()[1] = effPed_unitIsADC + ? convertPedWidths(effectivePedestals_barrel[i].getValue(1), + effectivePedestalWidths_barrel[i].getWidth(1), + 1, + qieCoder, + qieShape) + : effectivePedestalWidths_barrel[i].getWidth(1); + vi.effectivePedestalWidths()[2] = effPed_unitIsADC + ? convertPedWidths(effectivePedestals_barrel[i].getValue(2), + effectivePedestalWidths_barrel[i].getWidth(2), + 2, + qieCoder, + qieShape) + : effectivePedestalWidths_barrel[i].getWidth(2); + vi.effectivePedestalWidths()[3] = effPed_unitIsADC + ? convertPedWidths(effectivePedestals_barrel[i].getValue(3), + effectivePedestalWidths_barrel[i].getWidth(3), + 3, + qieCoder, + qieShape) + : effectivePedestalWidths_barrel[i].getWidth(3); + + vi.gains_value()[0] = gains_barrel[i].getValue(0); + vi.gains_value()[1] = gains_barrel[i].getValue(1); + vi.gains_value()[2] = gains_barrel[i].getValue(2); + vi.gains_value()[3] = gains_barrel[i].getValue(3); + + vi.lutCorrs_values() = lutCorrs_barrel[i].getValue(); + vi.respCorrs_values() = respCorrs_barrel[i].getValue(); + vi.timeCorrs_values() = timeCorrs_barrel[i].getValue(); + + vi.pedestalWidths_sigma00() = *(pedestalWidths_barrel[i].getValues()); + vi.pedestalWidths_sigma01() = *(pedestalWidths_barrel[i].getValues() + 1); + vi.pedestalWidths_sigma02() = *(pedestalWidths_barrel[i].getValues() + 2); + vi.pedestalWidths_sigma03() = *(pedestalWidths_barrel[i].getValues() + 3); + vi.pedestalWidths_sigma10() = *(pedestalWidths_barrel[i].getValues() + 4); + vi.pedestalWidths_sigma11() = *(pedestalWidths_barrel[i].getValues() + 5); + vi.pedestalWidths_sigma12() = *(pedestalWidths_barrel[i].getValues() + 6); + vi.pedestalWidths_sigma13() = *(pedestalWidths_barrel[i].getValues() + 7); + vi.pedestalWidths_sigma20() = *(pedestalWidths_barrel[i].getValues() + 8); + vi.pedestalWidths_sigma21() = *(pedestalWidths_barrel[i].getValues() + 9); + vi.pedestalWidths_sigma22() = *(pedestalWidths_barrel[i].getValues() + 10); + vi.pedestalWidths_sigma23() = *(pedestalWidths_barrel[i].getValues() + 11); + vi.pedestalWidths_sigma30() = *(pedestalWidths_barrel[i].getValues() + 12); + vi.pedestalWidths_sigma31() = *(pedestalWidths_barrel[i].getValues() + 13); + vi.pedestalWidths_sigma32() = *(pedestalWidths_barrel[i].getValues() + 14); + vi.pedestalWidths_sigma33() = *(pedestalWidths_barrel[i].getValues() + 15); + + vi.gainWidths_value0() = gainWidths_barrel[i].getValue(0); + vi.gainWidths_value1() = gainWidths_barrel[i].getValue(1); + vi.gainWidths_value2() = gainWidths_barrel[i].getValue(2); + vi.gainWidths_value3() = gainWidths_barrel[i].getValue(3); + + vi.channelQuality_status() = channelQuality_barrel[i].getValue(); + vi.qieTypes_values() = qieTypes_barrel[i].getValue(); + + for (uint32_t k = 0; k < 4; k++) + for (uint32_t l = 0; l < 4; l++) { + auto const linear = k * 4 + l; + vi.qieCoders_offsets()[linear] = qieData_barrel[i].offset(k, l); + vi.qieCoders_slopes()[linear] = qieData_barrel[i].slope(k, l); + } + + vi.sipmPar_type() = sipmParameters_barrel[i].getType(); + vi.sipmPar_auxi1() = sipmParameters_barrel[i].getauxi1(); + vi.sipmPar_fcByPE() = sipmParameters_barrel[i].getFCByPE(); + vi.sipmPar_darkCurrent() = sipmParameters_barrel[i].getDarkCurrent(); + vi.sipmPar_auxi2() = sipmParameters_barrel[i].getauxi2(); + } + + // fill HE channels + auto const& pedestals_endcaps = pedestals_containers[1].second; + auto const& effectivePedestals_endcaps = effectivePedestals_containers[1].second; + auto const& gains_endcaps = gains_containers[1].second; + auto const& lutCorrs_endcaps = lutCorrs_containers[1].second; + auto const& respCorrs_endcaps = respCorrs_containers[1].second; + auto const& timeCorrs_endcaps = timeCorrs_containers[1].second; + auto const& pedestalWidths_endcaps = pedestalWidths_containers[1].second; + auto const& effectivePedestalWidths_endcaps = effectivePedestalWidths_containers[1].second; + auto const& gainWidths_endcaps = gainWidths_containers[1].second; + auto const& channelQuality_endcaps = channelQuality_containers[1].second; + auto const& qieTypes_endcaps = qieTypes_containers[1].second; + auto const& qieData_endcaps = qieData_containers[1].second; + auto const& sipmParameters_endcaps = sipmParameters_containers[1].second; + + auto const offset = pedestals_barrel.size(); + + for (uint64_t i = 0; i < pedestals_endcaps.size(); ++i) { + auto const& qieCoder = qieData_endcaps[i]; + auto const qieType = qieTypes_endcaps[i].getValue() > 1 ? 1 : 0; + auto const& qieShape = qieData.getShape(qieType); + + auto vi = view[offset + i]; + + vi.pedestals_value()[0] = ped_unitIsADC ? convertPed(pedestals_endcaps[i].getValue(0), 0, qieCoder, qieShape) + : pedestals_endcaps[i].getValue(0); + vi.pedestals_value()[1] = ped_unitIsADC ? convertPed(pedestals_endcaps[i].getValue(1), 1, qieCoder, qieShape) + : pedestals_endcaps[i].getValue(1); + vi.pedestals_value()[2] = ped_unitIsADC ? convertPed(pedestals_endcaps[i].getValue(2), 2, qieCoder, qieShape) + : pedestals_endcaps[i].getValue(2); + vi.pedestals_value()[3] = ped_unitIsADC ? convertPed(pedestals_endcaps[i].getValue(3), 3, qieCoder, qieShape) + : pedestals_endcaps[i].getValue(3); + + vi.pedestals_width()[0] = + ped_unitIsADC + ? convertPedWidths( + pedestals_endcaps[i].getValue(0), pedestalWidths_endcaps[i].getWidth(0), 0, qieCoder, qieShape) + : pedestalWidths_endcaps[i].getWidth(0); + vi.pedestals_width()[1] = + ped_unitIsADC + ? convertPedWidths( + pedestals_endcaps[i].getValue(1), pedestalWidths_endcaps[i].getWidth(1), 1, qieCoder, qieShape) + : pedestalWidths_endcaps[i].getWidth(1); + vi.pedestals_width()[2] = + ped_unitIsADC + ? convertPedWidths( + pedestals_endcaps[i].getValue(2), pedestalWidths_endcaps[i].getWidth(2), 2, qieCoder, qieShape) + : pedestalWidths_endcaps[i].getWidth(2); + vi.pedestals_width()[3] = + ped_unitIsADC + ? convertPedWidths( + pedestals_endcaps[i].getValue(3), pedestalWidths_endcaps[i].getWidth(3), 3, qieCoder, qieShape) + : pedestalWidths_endcaps[i].getWidth(3); + + vi.effectivePedestals()[0] = effPed_unitIsADC + ? convertPed(effectivePedestals_endcaps[i].getValue(0), 0, qieCoder, qieShape) + : effectivePedestals_endcaps[i].getValue(0); + vi.effectivePedestals()[1] = effPed_unitIsADC + ? convertPed(effectivePedestals_endcaps[i].getValue(1), 1, qieCoder, qieShape) + : effectivePedestals_endcaps[i].getValue(1); + vi.effectivePedestals()[2] = effPed_unitIsADC + ? convertPed(effectivePedestals_endcaps[i].getValue(2), 2, qieCoder, qieShape) + : effectivePedestals_endcaps[i].getValue(2); + vi.effectivePedestals()[3] = effPed_unitIsADC + ? convertPed(effectivePedestals_endcaps[i].getValue(3), 3, qieCoder, qieShape) + : effectivePedestals_endcaps[i].getValue(3); + + vi.effectivePedestalWidths()[0] = effPed_unitIsADC + ? convertPedWidths(effectivePedestals_endcaps[i].getValue(0), + effectivePedestalWidths_endcaps[i].getWidth(0), + 0, + qieCoder, + qieShape) + : effectivePedestalWidths_endcaps[i].getWidth(0); + vi.effectivePedestalWidths()[1] = effPed_unitIsADC + ? convertPedWidths(effectivePedestals_endcaps[i].getValue(1), + effectivePedestalWidths_endcaps[i].getWidth(1), + 1, + qieCoder, + qieShape) + : effectivePedestalWidths_endcaps[i].getWidth(1); + vi.effectivePedestalWidths()[2] = effPed_unitIsADC + ? convertPedWidths(effectivePedestals_endcaps[i].getValue(2), + effectivePedestalWidths_endcaps[i].getWidth(2), + 2, + qieCoder, + qieShape) + : effectivePedestalWidths_endcaps[i].getWidth(2); + vi.effectivePedestalWidths()[3] = effPed_unitIsADC + ? convertPedWidths(effectivePedestals_endcaps[i].getValue(3), + effectivePedestalWidths_endcaps[i].getWidth(3), + 3, + qieCoder, + qieShape) + : effectivePedestalWidths_endcaps[i].getWidth(3); + + vi.gains_value()[0] = gains_endcaps[i].getValue(0); + vi.gains_value()[1] = gains_endcaps[i].getValue(1); + vi.gains_value()[2] = gains_endcaps[i].getValue(2); + vi.gains_value()[3] = gains_endcaps[i].getValue(3); + + vi.lutCorrs_values() = lutCorrs_endcaps[i].getValue(); + vi.respCorrs_values() = respCorrs_endcaps[i].getValue(); + vi.timeCorrs_values() = timeCorrs_endcaps[i].getValue(); + + vi.pedestalWidths_sigma00() = *(pedestalWidths_endcaps[i].getValues()); + vi.pedestalWidths_sigma01() = *(pedestalWidths_endcaps[i].getValues() + 1); + vi.pedestalWidths_sigma02() = *(pedestalWidths_endcaps[i].getValues() + 2); + vi.pedestalWidths_sigma03() = *(pedestalWidths_endcaps[i].getValues() + 3); + vi.pedestalWidths_sigma10() = *(pedestalWidths_endcaps[i].getValues() + 4); + vi.pedestalWidths_sigma11() = *(pedestalWidths_endcaps[i].getValues() + 5); + vi.pedestalWidths_sigma12() = *(pedestalWidths_endcaps[i].getValues() + 6); + vi.pedestalWidths_sigma13() = *(pedestalWidths_endcaps[i].getValues() + 7); + vi.pedestalWidths_sigma20() = *(pedestalWidths_endcaps[i].getValues() + 8); + vi.pedestalWidths_sigma21() = *(pedestalWidths_endcaps[i].getValues() + 9); + vi.pedestalWidths_sigma22() = *(pedestalWidths_endcaps[i].getValues() + 10); + vi.pedestalWidths_sigma23() = *(pedestalWidths_endcaps[i].getValues() + 11); + vi.pedestalWidths_sigma30() = *(pedestalWidths_endcaps[i].getValues() + 12); + vi.pedestalWidths_sigma31() = *(pedestalWidths_endcaps[i].getValues() + 13); + vi.pedestalWidths_sigma32() = *(pedestalWidths_endcaps[i].getValues() + 14); + vi.pedestalWidths_sigma33() = *(pedestalWidths_endcaps[i].getValues() + 15); + + vi.gainWidths_value0() = gainWidths_endcaps[i].getValue(0); + vi.gainWidths_value1() = gainWidths_endcaps[i].getValue(1); + vi.gainWidths_value2() = gainWidths_endcaps[i].getValue(2); + vi.gainWidths_value3() = gainWidths_endcaps[i].getValue(3); + + vi.channelQuality_status() = channelQuality_endcaps[i].getValue(); + vi.qieTypes_values() = qieTypes_endcaps[i].getValue(); + + for (uint32_t k = 0; k < 4; k++) + for (uint32_t l = 0; l < 4; l++) { + auto const linear = k * 4u + l; + vi.qieCoders_offsets()[linear] = qieData_endcaps[i].offset(k, l); + vi.qieCoders_slopes()[linear] = qieData_endcaps[i].slope(k, l); + } + + vi.sipmPar_type() = sipmParameters_endcaps[i].getType(); + vi.sipmPar_auxi1() = sipmParameters_endcaps[i].getauxi1(); + vi.sipmPar_fcByPE() = sipmParameters_endcaps[i].getFCByPE(); + vi.sipmPar_darkCurrent() = sipmParameters_endcaps[i].getDarkCurrent(); + vi.sipmPar_auxi2() = sipmParameters_endcaps[i].getauxi2(); + } + //fill the scalars + static const int IPHI_MAX = 72; // private member of topology + + view.maxDepthHB() = topology.maxDepthHB(); + view.maxDepthHE() = topology.maxDepthHE(); + view.maxPhiHE() = recConstants.getNPhi(1) > IPHI_MAX ? recConstants.getNPhi(1) : IPHI_MAX; + view.firstHBRing() = topology.firstHBRing(); + view.lastHBRing() = topology.lastHBRing(); + view.firstHERing() = topology.firstHERing(); + view.lastHERing() = topology.lastHERing(); + view.nEtaHB() = recConstants.getEtaRange(0).second - recConstants.getEtaRange(0).first + 1; + view.nEtaHE() = + topology.firstHERing() > topology.lastHERing() ? 0 : (topology.lastHERing() - topology.firstHERing() + 1); + view.offsetForHashes() = offset; + + return product; + } + + private: + edm::ESGetToken recoParamsToken_; + edm::ESGetToken pedestalsToken_; + edm::ESGetToken effectivePedestalsToken_; + edm::ESGetToken gainsToken_; + edm::ESGetToken lutCorrsToken_; + edm::ESGetToken respCorrsToken_; + edm::ESGetToken timeCorrsToken_; + edm::ESGetToken pedestalWidthsToken_; + edm::ESGetToken effectivePedestalWidthsToken_; + edm::ESGetToken gainWidthsToken_; + edm::ESGetToken channelQualityToken_; + edm::ESGetToken qieTypesToken_; + edm::ESGetToken qieDataToken_; + edm::ESGetToken sipmParametersToken_; + edm::ESGetToken topologyToken_; + edm::ESGetToken recConstantsToken_; + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(HcalMahiConditionsESProducer); diff --git a/RecoLocalCalo/HcalRecProducers/plugins/alpaka/HcalMahiPulseOffsetsESProducer.cc b/RecoLocalCalo/HcalRecProducers/plugins/alpaka/HcalMahiPulseOffsetsESProducer.cc new file mode 100644 index 0000000000000..483cff8d6ec3b --- /dev/null +++ b/RecoLocalCalo/HcalRecProducers/plugins/alpaka/HcalMahiPulseOffsetsESProducer.cc @@ -0,0 +1,44 @@ +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#include "CondFormats/HcalObjects/interface/alpaka/HcalMahiPulseOffsetsDevice.h" +#include "CondFormats/HcalObjects/interface/HcalMahiPulseOffsetsSoA.h" +#include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h" + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + class HcalMahiPulseOffsetsESProducer : public ESProducer { + public: + HcalMahiPulseOffsetsESProducer(edm::ParameterSet const& iConfig) : ESProducer(iConfig) { + std::vector offsets = iConfig.getParameter>("pulseOffsets"); + + product = std::make_unique(offsets.size(), cms::alpakatools::host()); + + auto view = product->view(); + + for (uint32_t i = 0; i < offsets.size(); i++) { + view[i] = offsets[i]; + } + setWhatProduced(this); + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add>("pulseOffsets", {-3, -2, -1, 0, 1, 2, 3, 4}); + descriptions.addWithDefaultLabel(desc); + } + + std::shared_ptr produce(JobConfigurationGPURecord const& iRecord) { + return product; + } + + private: + std::shared_ptr product; + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(HcalMahiPulseOffsetsESProducer); diff --git a/RecoLocalCalo/HcalRecProducers/plugins/alpaka/HcalSiPMCharacteristicsESProducer.cc b/RecoLocalCalo/HcalRecProducers/plugins/alpaka/HcalSiPMCharacteristicsESProducer.cc new file mode 100644 index 0000000000000..9bfcea18a0edd --- /dev/null +++ b/RecoLocalCalo/HcalRecProducers/plugins/alpaka/HcalSiPMCharacteristicsESProducer.cc @@ -0,0 +1,64 @@ +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/Utilities/interface/ESGetToken.h" + +#include "CondFormats/HcalObjects/interface/alpaka/HcalSiPMCharacteristicsDevice.h" +#include "CondFormats/HcalObjects/interface/HcalSiPMCharacteristicsSoA.h" +#include "CondFormats/DataRecord/interface/HcalSiPMCharacteristicsRcd.h" + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + class HcalSiPMCharacteristicsESProducer : public ESProducer { + public: + HcalSiPMCharacteristicsESProducer(edm::ParameterSet const& iConfig) : ESProducer(iConfig) { + auto cc = setWhatProduced(this); + sipmCharacteristicsToken_ = cc.consumes(); + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + descriptions.addWithDefaultLabel(desc); + } + + std::unique_ptr produce(HcalSiPMCharacteristicsRcd const& iRecord) { + auto const& sipmCharacteristics = iRecord.get(sipmCharacteristicsToken_); + + size_t const totalItems = sipmCharacteristics.getTypes(); + + auto product = std::make_unique(totalItems, cms::alpakatools::host()); + + auto view = product->view(); + + for (uint32_t i = 0; i < sipmCharacteristics.getTypes(); i++) { + auto vi = view[i]; + auto const type = sipmCharacteristics.getType(i); + + // type index starts with 1 .. 6 + if (static_cast(type) != i + 1) + throw cms::Exception("HcalSiPMCharacteristics") + << "Wrong assumption for HcalSiPMcharacteristics type values, " + << "should be type value <- type index + 1" << std::endl + << "Observed type value = " << type << " and index = " << i << std::endl; + + vi.precisionItem() = HcalSiPMCharacteristics::PrecisionItem(type, + sipmCharacteristics.getPixels(type), + sipmCharacteristics.getNonLinearities(type)[0], + sipmCharacteristics.getNonLinearities(type)[1], + sipmCharacteristics.getNonLinearities(type)[2], + sipmCharacteristics.getCrossTalk(type), + sipmCharacteristics.getAuxi1(type), + sipmCharacteristics.getAuxi2(type)); + } + return product; + } + + private: + edm::ESGetToken sipmCharacteristicsToken_; + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(HcalSiPMCharacteristicsESProducer); diff --git a/RecoLocalCalo/HcalRecProducers/plugins/alpaka/Mahi.dev.cc b/RecoLocalCalo/HcalRecProducers/plugins/alpaka/Mahi.dev.cc new file mode 100644 index 0000000000000..54f9e29e4a7a7 --- /dev/null +++ b/RecoLocalCalo/HcalRecProducers/plugins/alpaka/Mahi.dev.cc @@ -0,0 +1,1445 @@ +#include +#include +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/atomicMaxPair.h" + +#include "DataFormats/CaloRecHit/interface/MultifitComputations.h" +// needed to compile with USER_CXXFLAGS="-DCOMPUTE_TDC_TIME" +#include "DataFormats/HcalRecHit/interface/HcalSpecialTimes.h" +#include "FWCore/Utilities/interface/CMSUnrollLoop.h" +// TODO reuse some of the HCAL constats from +//#include "RecoLocalCalo/HcalRecAlgos/interface/HcalConstants.h" + +#include "Mahi.h" + +#ifdef HCAL_MAHI_GPUDEBUG +#define DETID_TO_DEBUG 1125647428 +#endif + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using namespace cms::alpakatools; + namespace hcal::reconstruction { + namespace mahi { + + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float uint_as_float(uint32_t val) { +#if defined(__CUDA_ARCH__) or defined(__HIP_DEVICE_COMPILE__) + return __uint_as_float(val); +#else + return edm::bit_cast(val); +#endif + } + + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE uint32_t float_as_uint(float val) { +#if defined(__CUDA_ARCH__) or defined(__HIP_DEVICE_COMPILE__) + return __float_as_uint(val); +#else + return edm::bit_cast(val); +#endif + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float compute_time_slew_delay(float const fC, + float const tzero, + float const slope, + float const tmax) { + auto const rawDelay = tzero + slope * std::log(fC); + return rawDelay < 0 ? 0 : (rawDelay > tmax ? tmax : rawDelay); + } + + // HcalQIEShapes are hardcoded in HcalQIEData.cc basically + // + some logic to generate 128 and 256 value arrays... + ALPAKA_STATIC_ACC_MEM_CONSTANT float const qie8shape[129] = { + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, + 18, 20, 22, 24, 26, 28, 31, 34, 37, 40, 44, 48, 52, 57, 62, 57, 62, + 67, 72, 77, 82, 87, 92, 97, 102, 107, 112, 117, 122, 127, 132, 142, 152, 162, + 172, 182, 192, 202, 217, 232, 247, 262, 282, 302, 322, 347, 372, 347, 372, 397, 422, + 447, 472, 497, 522, 547, 572, 597, 622, 647, 672, 697, 722, 772, 822, 872, 922, 972, + 1022, 1072, 1147, 1222, 1297, 1372, 1472, 1572, 1672, 1797, 1922, 1797, 1922, 2047, 2172, 2297, 2422, + 2547, 2672, 2797, 2922, 3047, 3172, 3297, 3422, 3547, 3672, 3922, 4172, 4422, 4672, 4922, 5172, 5422, + 5797, 6172, 6547, 6922, 7422, 7922, 8422, 9047, 9672, 10297}; + + ALPAKA_STATIC_ACC_MEM_CONSTANT float const qie11shape[257] = { + -0.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, + 11.5, 12.5, 13.5, 14.5, 15.5, 17.5, 19.5, 21.5, 23.5, 25.5, 27.5, 29.5, + 31.5, 33.5, 35.5, 37.5, 39.5, 41.5, 43.5, 45.5, 47.5, 49.5, 51.5, 53.5, + 55.5, 59.5, 63.5, 67.5, 71.5, 75.5, 79.5, 83.5, 87.5, 91.5, 95.5, 99.5, + 103.5, 107.5, 111.5, 115.5, 119.5, 123.5, 127.5, 131.5, 135.5, 139.5, 147.5, 155.5, + 163.5, 171.5, 179.5, 187.5, 171.5, 179.5, 187.5, 195.5, 203.5, 211.5, 219.5, 227.5, + 235.5, 243.5, 251.5, 259.5, 267.5, 275.5, 283.5, 291.5, 299.5, 315.5, 331.5, 347.5, + 363.5, 379.5, 395.5, 411.5, 427.5, 443.5, 459.5, 475.5, 491.5, 507.5, 523.5, 539.5, + 555.5, 571.5, 587.5, 603.5, 619.5, 651.5, 683.5, 715.5, 747.5, 779.5, 811.5, 843.5, + 875.5, 907.5, 939.5, 971.5, 1003.5, 1035.5, 1067.5, 1099.5, 1131.5, 1163.5, 1195.5, 1227.5, + 1259.5, 1291.5, 1355.5, 1419.5, 1483.5, 1547.5, 1611.5, 1675.5, 1547.5, 1611.5, 1675.5, 1739.5, + 1803.5, 1867.5, 1931.5, 1995.5, 2059.5, 2123.5, 2187.5, 2251.5, 2315.5, 2379.5, 2443.5, 2507.5, + 2571.5, 2699.5, 2827.5, 2955.5, 3083.5, 3211.5, 3339.5, 3467.5, 3595.5, 3723.5, 3851.5, 3979.5, + 4107.5, 4235.5, 4363.5, 4491.5, 4619.5, 4747.5, 4875.5, 5003.5, 5131.5, 5387.5, 5643.5, 5899.5, + 6155.5, 6411.5, 6667.5, 6923.5, 7179.5, 7435.5, 7691.5, 7947.5, 8203.5, 8459.5, 8715.5, 8971.5, + 9227.5, 9483.5, 9739.5, 9995.5, 10251.5, 10507.5, 11019.5, 11531.5, 12043.5, 12555.5, 13067.5, 13579.5, + 12555.5, 13067.5, 13579.5, 14091.5, 14603.5, 15115.5, 15627.5, 16139.5, 16651.5, 17163.5, 17675.5, 18187.5, + 18699.5, 19211.5, 19723.5, 20235.5, 20747.5, 21771.5, 22795.5, 23819.5, 24843.5, 25867.5, 26891.5, 27915.5, + 28939.5, 29963.5, 30987.5, 32011.5, 33035.5, 34059.5, 35083.5, 36107.5, 37131.5, 38155.5, 39179.5, 40203.5, + 41227.5, 43275.5, 45323.5, 47371.5, 49419.5, 51467.5, 53515.5, 55563.5, 57611.5, 59659.5, 61707.5, 63755.5, + 65803.5, 67851.5, 69899.5, 71947.5, 73995.5, 76043.5, 78091.5, 80139.5, 82187.5, 84235.5, 88331.5, 92427.5, + 96523.5, 100620, 104716, 108812, 112908}; + + constexpr int32_t IPHI_MAX = 72; + + ALPAKA_FN_ACC ALPAKA_FN_INLINE uint32_t did2linearIndexHB( + uint32_t const didraw, int const maxDepthHB, int const firstHBRing, int const lastHBRing, int const nEtaHB) { + HcalDetId did{didraw}; + uint32_t const value = (did.depth() - 1) + maxDepthHB * (did.iphi() - 1); + return did.ieta() > 0 + ? value + maxDepthHB * hcal::reconstruction::mahi::IPHI_MAX * (did.ieta() - firstHBRing) + : value + maxDepthHB * hcal::reconstruction::mahi::IPHI_MAX * (did.ieta() + lastHBRing + nEtaHB); + } + ALPAKA_FN_ACC ALPAKA_FN_INLINE uint32_t did2linearIndexHE(uint32_t const didraw, + int const maxDepthHE, + int const maxPhiHE, + int const firstHERing, + int const lastHERing, + int const nEtaHE) { + HcalDetId did{didraw}; + uint32_t const value = (did.depth() - 1) + maxDepthHE * (did.iphi() - 1); + return did.ieta() > 0 ? value + maxDepthHE * maxPhiHE * (did.ieta() - firstHERing) + : value + maxDepthHE * maxPhiHE * (did.ieta() + lastHERing + nEtaHE); + } + ALPAKA_FN_ACC ALPAKA_FN_INLINE uint32_t get_qiecoder_index(uint32_t const capid, uint32_t const range) { + return capid * 4 + range; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float compute_reco_correction_factor(float const par1, + float const par2, + float const par3, + float const x) { + return par3 * x * x + par2 * x + par1; + } + + // compute the charge using the adc, qie type and the appropriate qie shape array + ALPAKA_FN_ACC ALPAKA_FN_INLINE float compute_coder_charge( + int const qieType, uint8_t const adc, uint8_t const capid, float const* qieOffsets, float const* qieSlopes) { + auto const range = qieType == 0 ? (adc >> 5) & 0x3 : (adc >> 6) & 0x3; + auto const* qieShapeToUse = qieType == 0 ? qie8shape : qie11shape; + auto const nbins = qieType == 0 ? 32 : 64; + auto const center = adc % nbins == nbins - 1 ? 0.5 * (3 * qieShapeToUse[adc] - qieShapeToUse[adc - 1]) + : 0.5 * (qieShapeToUse[adc] + qieShapeToUse[adc + 1]); + auto const index = get_qiecoder_index(capid, range); + return (center - qieOffsets[index]) / qieSlopes[index]; + } + + // this is from + // https://github.com/cms-sw/cmssw/blob/master/RecoLocalCalo/HcalRecProducers/src/HBHEPhase1Reconstructor.cc#L140 + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float compute_diff_charge_gain(int const qieType, + uint8_t adc, + uint8_t const capid, + float const* qieOffsets, + float const* qieSlopes, + bool const isqie11) { + constexpr uint32_t mantissaMaskQIE8 = 0x1fu; + constexpr uint32_t mantissaMaskQIE11 = 0x3f; + auto const mantissaMask = isqie11 ? mantissaMaskQIE11 : mantissaMaskQIE8; + auto const q = compute_coder_charge(qieType, adc, capid, qieOffsets, qieSlopes); + auto const mantissa = adc & mantissaMask; + + if (mantissa == 0u || mantissa == mantissaMask - 1u) + return compute_coder_charge(qieType, adc + 1u, capid, qieOffsets, qieSlopes) - q; + else if (mantissa == 1u || mantissa == mantissaMask) + return q - compute_coder_charge(qieType, adc - 1u, capid, qieOffsets, qieSlopes); + else { + auto const qup = compute_coder_charge(qieType, adc + 1u, capid, qieOffsets, qieSlopes); + auto const qdown = compute_coder_charge(qieType, adc - 1u, capid, qieOffsets, qieSlopes); + auto const upgain = qup - q; + auto const downgain = q - qdown; + auto const averagegain = (qup - qdown) / 2.f; + if (std::abs(upgain - downgain) < 0.01f * averagegain) + return averagegain; + else { + auto const q2up = compute_coder_charge(qieType, adc + 2u, capid, qieOffsets, qieSlopes); + auto const q2down = compute_coder_charge(qieType, adc - 2u, capid, qieOffsets, qieSlopes); + auto const upgain2 = q2up - qup; + auto const downgain2 = qdown - q2down; + if (std::abs(upgain2 - upgain) < std::abs(downgain2 - downgain)) + return upgain; + else + return downgain; + } + } + } + + using PulseShapeConstElement = typename HcalPulseShapeSoA::ConstView::const_element; + // TODO: remove what's not needed + // originally from from RecoLocalCalo/HcalRecAlgos/src/PulseShapeFunctor.cc + ALPAKA_FN_ACC ALPAKA_FN_INLINE float compute_pulse_shape_value(PulseShapeConstElement const& pulseShape, + float const pulse_time, + int const sample, + int const shift) { + auto const& acc25nsVec = pulseShape.acc25nsVec(); + auto const& diff25nsItvlVec = pulseShape.diff25nsItvlVec(); + auto const& accVarLenIdxMinusOneVec = pulseShape.accVarLenIdxMinusOneVec(); + auto const& diffVarItvlIdxMinusOneVec = pulseShape.diffVarItvlIdxMinusOneVec(); + auto const& accVarLenIdxZeroVec = pulseShape.accVarLenIdxZEROVec(); + auto const& diffVarItvlIdxZeroVec = pulseShape.diffVarItvlIdxZEROVec(); + + // constants + constexpr float slew = 0.f; + constexpr auto ns_per_bx = ::hcal::constants::nsPerBX; + + // FIXME: clean up all the rounding... this is coming from original cpu version + float const i_start_float = -::hcal::constants::iniTimeShift - pulse_time - slew > 0.f + ? 0.f + : std::abs(-::hcal::constants::iniTimeShift - pulse_time - slew) + 1.f; + int i_start = static_cast(i_start_float); + float offset_start = static_cast(i_start) - ::hcal::constants::iniTimeShift - pulse_time - slew; + + // boundary + if (offset_start == 1.0f) { + offset_start = 0.f; + i_start -= 1; + } + + int const bin_start = static_cast(offset_start); + float const bin_start_up = static_cast(bin_start) + 0.5f; + int const bin_0_start = offset_start < bin_start_up ? bin_start - 1 : bin_start; + int const its_start = i_start / ns_per_bx; + int const distTo25ns_start = ns_per_bx - 1 - i_start % ns_per_bx; + auto const factor = offset_start - static_cast(bin_0_start) - 0.5; + + auto const sample_over10ts = sample + shift; + float value = 0.0f; + if (sample_over10ts == its_start) { + value = bin_0_start == -1 + ? accVarLenIdxMinusOneVec[distTo25ns_start] + factor * diffVarItvlIdxMinusOneVec[distTo25ns_start] + : accVarLenIdxZeroVec[distTo25ns_start] + factor * diffVarItvlIdxZeroVec[distTo25ns_start]; + } else if (sample_over10ts > its_start) { + int const bin_idx = distTo25ns_start + 1 + (sample_over10ts - its_start - 1) * ns_per_bx + bin_0_start; + value = acc25nsVec[bin_idx] + factor * diff25nsItvlVec[bin_idx]; + } + return value; + } + + // TODO: provide constants from configuration + // from RecoLocalCalo/HcalRecProducers/python/HBHEMahiParameters_cfi.py + constexpr int nMaxItersMin = 50; + constexpr int nMaxItersNNLS = 500; + constexpr double nnlsThresh = 1e-11; + constexpr float deltaChi2Threashold = 1e-3; + + // from RecoLocalCalo/HcalRecProducers/src/HBHEPhase1Reconstructor.cc + ALPAKA_FN_ACC float get_raw_charge(double const charge, + double const pedestal, + float const* shrChargeMinusPedestal, + float const parLin1, + float const parLin2, + float const parLin3, + int32_t const nsamplesForCompute, + int32_t const soi, + int const sipmQTSShift, + int const sipmQNTStoSum, + float const fcByPE, + int32_t const lch, + bool const isqie11) { + float rawCharge; + + if (!isqie11) + rawCharge = charge; + else { + int const first = std::max(soi + sipmQTSShift, 0); + int const last = std::min(soi + sipmQNTStoSum, nsamplesForCompute); + float sipmq = 0.0f; + for (auto ts = first; ts < last; ts++) + sipmq += shrChargeMinusPedestal[lch * nsamplesForCompute + ts]; + auto const effectivePixelsFired = sipmq / fcByPE; + auto const factor = compute_reco_correction_factor(parLin1, parLin2, parLin3, effectivePixelsFired); + rawCharge = (charge - pedestal) * factor + pedestal; + +#ifdef HCAL_MAHI_GPUDEBUG + printf("first = %d last = %d sipmQ = %f factor = %f rawCharge = %f\n", first, last, sipmq, factor, rawCharge); +#endif + } + return rawCharge; + } + + // Tried using lambda, but strange error from with nvcc + inline constexpr bool TSenergyCompare(std::pair a, std::pair b) { + return a.second > b.second; + }; + + class Kernel_prep1d_sameNumberOfSamples { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, + OProductType::View outputGPU, + IProductTypef01::ConstView f01HEDigis, + IProductTypef5::ConstView f5HBDigis, + IProductTypef3::ConstView f3HBDigis, + HcalMahiConditionsPortableDevice::ConstView mahi, + HcalSiPMCharacteristicsPortableDevice::ConstView sipmCharacteristics, + HcalRecoParamWithPulseShapeDevice::ConstView recoParamsWithPS, + bool const useEffectivePedestals, + int const sipmQTSShift, + int const sipmQNTStoSum, + int const firstSampleShift, + float const ts4Thresh, + float* amplitudes, + float* noiseTerms, + float* electronicNoiseTerms, + int8_t* soiSamples, + int const windowSize) const { + auto const nchannelsf015 = f01HEDigis.size() + f5HBDigis.size(); + auto const startingSample = compute_nsamples(f01HEDigis.stride()) - windowSize; + auto const nchannels = f01HEDigis.size() + f5HBDigis.size() + f3HBDigis.size(); + + //first index = groups of channel + auto const nchannels_per_block(alpaka::getWorkDiv(acc)[0u]); + //2nd index = groups of sample + auto const nsamplesForCompute(alpaka::getWorkDiv(acc)[1u]); + + // configure shared mem + float* shrEnergyM0PerTS = alpaka::getDynSharedMem(acc); + float* shrChargeMinusPedestal = shrEnergyM0PerTS + nsamplesForCompute * nchannels_per_block; + float* shrMethod0EnergyAccum = shrChargeMinusPedestal + nsamplesForCompute * nchannels_per_block; + float* shrEnergyM0TotalAccum = shrMethod0EnergyAccum + nchannels_per_block; + unsigned long long int* shrMethod0EnergySamplePair = + reinterpret_cast(shrEnergyM0TotalAccum + nchannels_per_block); + + //Loop over all groups of channels + for (auto group : uniform_groups_y(acc, nchannels)) { + //Loop over each channel, first compute soiSamples and shrMem for all channels + for (auto channel : uniform_group_elements_y(acc, group, nchannels)) { + auto const gch = channel.global; + auto const lch = channel.local; + + for (auto i_sample : independent_group_elements_x(acc, nsamplesForCompute)) { + auto const sampleWithinWindow = i_sample; + auto const sample = i_sample + startingSample; + auto const linearThPerBlock = i_sample + channel.local * nsamplesForCompute; + // initialize + if (sampleWithinWindow == 0) { + soiSamples[gch] = -1; + shrMethod0EnergyAccum[lch] = 0; + shrMethod0EnergySamplePair[lch] = 0; + shrEnergyM0TotalAccum[lch] = 0; + } + + // compute soiSamples + if (gch < f01HEDigis.size()) { + // NOTE: assume that soi is high only for a single guy! + // which must be the case. cpu version does not check for that + // if that is not the case, we will see that with cuda mmecheck + auto const soibit = soibit_for_sample(&(f01HEDigis.data()[gch][0]), sample); + if (soibit == 1) + soiSamples[gch] = sampleWithinWindow; + } else if (gch >= nchannelsf015) { + auto const soibit = soibit_for_sample(&(f3HBDigis.data()[gch - nchannelsf015][0]), sample); + if (soibit == 1) + soiSamples[gch] = sampleWithinWindow; + } + + // compute shrMem + auto const id = gch < f01HEDigis.size() + ? f01HEDigis.ids()[gch] + : (gch < nchannelsf015 ? f5HBDigis.ids()[gch - f01HEDigis.size()] + : f3HBDigis.ids()[gch - nchannelsf015]); + auto const did = HcalDetId{id}; + + auto const adc = + gch < f01HEDigis.size() + ? adc_for_sample(&(f01HEDigis.data()[gch][0]), sample) + : (gch < nchannelsf015 + ? adc_for_sample(&(f5HBDigis.data()[gch - f01HEDigis.size()][0]), sample) + : adc_for_sample(&(f3HBDigis.data()[gch - nchannelsf015][0]), sample)); + auto const capid = + gch < f01HEDigis.size() + ? capid_for_sample(&(f01HEDigis.data()[gch][0]), sample) + : (gch < nchannelsf015 + ? capid_for_sample(&(f5HBDigis.data()[gch - f01HEDigis.size()][0]), sample) + : capid_for_sample(&(f3HBDigis.data()[gch - nchannelsf015][0]), sample)); + + // compute hash for this did + // hash needed to convert condition arrays order (based on Topo) into digi arrays order(based on FED) + auto const hashedId = + did.subdetId() == HcalBarrel + ? did2linearIndexHB(id, mahi.maxDepthHB(), mahi.firstHBRing(), mahi.lastHBRing(), mahi.nEtaHB()) + : did2linearIndexHE(id, + mahi.maxDepthHE(), + mahi.maxPhiHE(), + mahi.firstHERing(), + mahi.lastHERing(), + mahi.nEtaHE()) + + mahi.offsetForHashes(); + + // conditions based on the hash + auto const qieType = mahi.qieTypes_values()[hashedId] > 0 ? 1 : 0; // 2 types at this point + auto const* qieOffsets = mahi.qieCoders_offsets()[hashedId].data(); + auto const* qieSlopes = mahi.qieCoders_slopes()[hashedId].data(); + auto const pedestal = mahi.pedestals_value()[hashedId][capid]; + + // compute charge + auto const charge = compute_coder_charge(qieType, adc, capid, qieOffsets, qieSlopes); + + shrChargeMinusPedestal[linearThPerBlock] = charge - pedestal; + } + } + alpaka::syncBlockThreads(acc); + + //Loop over each channel, compute input for multifit using shrChargeMinusPedestal + for (auto channel : uniform_group_elements_y(acc, group, nchannels)) { + auto const gch = channel.global; + auto const lch = channel.local; + for (auto i_sample : independent_group_elements_x(acc, nsamplesForCompute)) { + auto const sampleWithinWindow = i_sample; + auto const sample = i_sample + startingSample; + + // initialize all output buffers + if (sampleWithinWindow == 0) { + outputGPU.detId()[gch] = 0; + outputGPU.energyM0()[gch] = 0; + outputGPU.timeM0()[gch] = 0; + outputGPU.energy()[gch] = 0; + outputGPU.chi2()[gch] = 0; + } + + // offset output + auto* amplitudesForChannel = amplitudes + nsamplesForCompute * gch; + auto* noiseTermsForChannel = noiseTerms + nsamplesForCompute * gch; + auto* electronicNoiseTermsForChannel = electronicNoiseTerms + nsamplesForCompute * gch; + + // get event input quantities + auto const stride = gch < f01HEDigis.size() + ? f01HEDigis.stride() + : (gch < f5HBDigis.size() ? f5HBDigis.stride() : f3HBDigis.stride()); + auto const nsamples = + gch < f01HEDigis.size() + ? compute_nsamples(stride) + : (gch < nchannelsf015 ? compute_nsamples(stride) : compute_nsamples(stride)); + + ALPAKA_ASSERT_ACC(nsamples == nsamplesForCompute || nsamples - startingSample == nsamplesForCompute); + + auto const id = gch < f01HEDigis.size() + ? f01HEDigis.ids()[gch] + : (gch < nchannelsf015 ? f5HBDigis.ids()[gch - f01HEDigis.size()] + : f3HBDigis.ids()[gch - nchannelsf015]); + auto const did = HcalDetId{id}; + + auto const adc = + gch < f01HEDigis.size() + ? adc_for_sample(&(f01HEDigis.data()[gch][0]), sample) + : (gch < nchannelsf015 + ? adc_for_sample(&(f5HBDigis.data()[gch - f01HEDigis.size()][0]), sample) + : adc_for_sample(&(f3HBDigis.data()[gch - nchannelsf015][0]), sample)); + auto const capid = + gch < f01HEDigis.size() + ? capid_for_sample(&(f01HEDigis.data()[gch][0]), sample) + : (gch < nchannelsf015 + ? capid_for_sample(&(f5HBDigis.data()[gch - f01HEDigis.size()][0]), sample) + : capid_for_sample(&(f3HBDigis.data()[gch - nchannelsf015][0]), sample)); + + // compute hash for this did + // hash needed to convert condition arrays order (based on Topo) into digi arrays order(based on FED) + auto const hashedId = + did.subdetId() == HcalBarrel + ? did2linearIndexHB(id, mahi.maxDepthHB(), mahi.firstHBRing(), mahi.lastHBRing(), mahi.nEtaHB()) + : did2linearIndexHE(id, + mahi.maxDepthHE(), + mahi.maxPhiHE(), + mahi.firstHERing(), + mahi.lastHERing(), + mahi.nEtaHE()) + + mahi.offsetForHashes(); + + // conditions based on the hash + auto const qieType = mahi.qieTypes_values()[hashedId] > 0 ? 1 : 0; // 2 types at this point + auto const* qieOffsets = mahi.qieCoders_offsets()[hashedId].data(); + auto const* qieSlopes = mahi.qieCoders_slopes()[hashedId].data(); + auto const* pedestalWidthsForChannel = + useEffectivePedestals && (gch < f01HEDigis.size() || gch >= nchannelsf015) + ? mahi.effectivePedestalWidths()[hashedId].data() + : mahi.pedestals_width()[hashedId].data(); + + auto const gain = mahi.gains_value()[hashedId][capid]; + auto const gain0 = mahi.gains_value()[hashedId][0]; + auto const respCorrection = mahi.respCorrs_values()[hashedId]; + auto const pedestal = mahi.pedestals_value()[hashedId][capid]; + auto const pedestalWidth = pedestalWidthsForChannel[capid]; + // if needed, only use effective pedestals for f01 + auto const pedestalToUseForMethod0 = + useEffectivePedestals && (gch < f01HEDigis.size() || gch >= nchannelsf015) + ? mahi.effectivePedestals()[hashedId][capid] + : pedestal; + auto const sipmType = mahi.sipmPar_type()[hashedId]; + auto const fcByPE = mahi.sipmPar_fcByPE()[hashedId]; + auto const recoParam1 = recoParamsWithPS.recoParamView().param1()[hashedId]; + auto const recoParam2 = recoParamsWithPS.recoParamView().param2()[hashedId]; + +#ifdef HCAL_MAHI_GPUDEBUG + printf("qieType = %d qieOffset0 = %f qieOffset1 = %f qieSlope0 = %f qieSlope1 = %f\n", + qieType, + qieOffsets[0], + qieOffsets[1], + qieSlopes[0], + qieSlopes[1]); +#endif + + // compute charge + auto const charge = compute_coder_charge(qieType, adc, capid, qieOffsets, qieSlopes); + + int32_t const soi = + gch < f01HEDigis.size() + ? soiSamples[gch] + : (gch < nchannelsf015 ? f5HBDigis.npresamples()[gch - f01HEDigis.size()] : soiSamples[gch]); + + bool badSOI = (soi < 0 or static_cast(soi) >= nsamplesForCompute); + if (badSOI and sampleWithinWindow == 0) { +#ifdef GPU_DEBUG + printf("Found HBHE channel %d with invalid SOI %d\n", gch, soi); +#endif + // mark the channel as bad + outputGPU.chi2()[gch] = -9999.f; + } + + // type index starts from 1 .. 6 + // precicionItem index starts from 0 .. 5 + auto const precisionItem = sipmCharacteristics.precisionItem()[sipmType - 1]; + auto const parLin1 = precisionItem.parLin1_; + auto const parLin2 = precisionItem.parLin2_; + auto const parLin3 = precisionItem.parLin3_; + + //int32_t const soi = gch >= nchannelsf01HE + // ? npresamplesf5HB[gch - nchannelsf01HE] + // : soiSamples[gch]; + // this is here just to make things uniform... + if (gch >= f01HEDigis.size() && gch < nchannelsf015 && sampleWithinWindow == 0) + soiSamples[gch] = f5HBDigis.npresamples()[gch - f01HEDigis.size()]; + + // + // compute various quantities (raw charge and tdc stuff) + // NOTE: this branch will be divergent only for a single warp that + // sits on the boundary when flavor 01 channels end and flavor 5 start + // + float const rawCharge = get_raw_charge(charge, + pedestal, + shrChargeMinusPedestal, + parLin1, + parLin2, + parLin3, + nsamplesForCompute, + soi, + sipmQTSShift, + sipmQNTStoSum, + fcByPE, + lch, + gch < f01HEDigis.size() || gch >= nchannelsf015); + + auto const dfc = compute_diff_charge_gain( + qieType, adc, capid, qieOffsets, qieSlopes, gch < f01HEDigis.size() || gch >= nchannelsf015); + +#ifdef COMPUTE_TDC_TIME + float tdcTime; + if (gch >= f01HEDigis.size() && gch < nchannelsf015) { + tdcTime = HcalSpecialTimes::UNKNOWN_T_NOTDC; + } else { + if (gch < f01HEDigis.size()) + tdcTime = + HcalSpecialTimes::getTDCTime(tdc_for_sample(&(f01HEDigis.data()[gch][0]), sample)); + else if (gch >= nchannelsf015) + tdcTime = HcalSpecialTimes::getTDCTime( + tdc_for_sample(&(f3HBDigis.data()[gch - nchannelsf015][0]), sample)); + } +#endif // COMPUTE_TDC_TIME + + // compute method 0 quantities + // TODO: need to apply containment + // TODO: need to apply time slew + // TODO: for < run 3, apply HBM legacy energy correction + auto const nsamplesToAdd = recoParam1 < 10 ? recoParam2 : (recoParam1 >> 14) & 0xF; + auto const startSampleTmp = soi + firstSampleShift; + auto const startSample = startSampleTmp < 0 ? 0 : startSampleTmp; + auto const endSample = + startSample + nsamplesToAdd < nsamplesForCompute ? startSample + nsamplesToAdd : nsamplesForCompute; + // NOTE: gain is a small number < 10^-3, multiply it last + auto const energym0_per_ts = gain * ((rawCharge - pedestalToUseForMethod0) * respCorrection); + auto const energym0_per_ts_gain0 = gain0 * ((rawCharge - pedestalToUseForMethod0) * respCorrection); + // store to shared mem + shrEnergyM0PerTS[lch * nsamplesForCompute + sampleWithinWindow] = energym0_per_ts; + + alpaka::atomicAdd( + acc, &shrEnergyM0TotalAccum[lch], energym0_per_ts_gain0, alpaka::hierarchy::Threads{}); + +#ifdef HCAL_MAHI_GPUDEBUG + printf( + "id = %u sample = %d gch = %d hashedId = %u adc = %u capid = %u\n" + " charge = %f rawCharge = %f dfc = %f pedestal = %f\n" + " gain = %f respCorrection = %f energym0_per_ts = %f\n", + id, + sample, + gch, + hashedId, + adc, + capid, + charge, + rawCharge, + dfc, + pedestalToUseForMethod0, + gain, + respCorrection, + energym0_per_ts); + printf("startSample = %d endSample = %d param1 = %u param2 = %u\n", + startSample, + endSample, + recoParam1, + recoParam2); +#endif + //Find the max energy of lch channel and the corresponding TS + if (sampleWithinWindow >= static_cast(startSample) && + sampleWithinWindow < static_cast(endSample)) { + //sum the energys of all TS + alpaka::atomicAdd(acc, &shrMethod0EnergyAccum[lch], energym0_per_ts, alpaka::hierarchy::Threads{}); + // pair TS and E for all TSs, find max pair according to E + // TODO: Non-deterministic behavior for TS with equal energy + atomicMaxPair( + acc, &shrMethod0EnergySamplePair[lch], {sampleWithinWindow, energym0_per_ts}, TSenergyCompare); + } + + // NOTE: must take soi, as values for that thread are used... + // NOTE: does not run if soi is bad, because it does not match any sampleWithinWindow + if (sampleWithinWindow == static_cast(soi)) { + // Channel quality check + // https://github.com/cms-sw/cmssw/blob/master/RecoLocalCalo/HcalRecAlgos/plugins/HcalChannelPropertiesEP.cc#L107-L109 + // https://github.com/cms-sw/cmssw/blob/6d2f66057131baacc2fcbdd203588c41c885b42c/CondCore/HcalPlugins/plugins/HcalChannelQuality_PayloadInspector.cc#L30 + // const bool taggedBadByDb = severity.dropChannel(digistatus->getValue()); + // do not run MAHI if taggedBadByDb = true + auto const digiStatus_ = mahi.channelQuality_status()[hashedId]; + const bool taggedBadByDb = (digiStatus_ / 32770); + + if (taggedBadByDb) + outputGPU.chi2()[gch] = -9999.f; + + // check as in cpu version if mahi is not needed + // FIXME: KNOWN ISSUE: observed a problem when rawCharge and pedestal + // are basically equal and generate -0.00000... + // needs to be treated properly + if (!(shrEnergyM0TotalAccum[lch] > 0 && energym0_per_ts_gain0 > ts4Thresh)) { + // do not need to run mahi minimization + //outputEnergy[gch] = 0; energy already inited to 0 + outputGPU.chi2()[gch] = -9999.f; + } + } + // + // preparations for mahi fit + // + auto const amplitude = rawCharge - pedestalToUseForMethod0; + auto const noiseADC = (1. / std::sqrt(12)) * dfc; + auto const noisePhotoSq = amplitude > pedestalWidth ? (amplitude * fcByPE) : 0.f; + auto const noiseTerm = noiseADC * noiseADC + noisePhotoSq + pedestalWidth * pedestalWidth; + + // store to global memory + amplitudesForChannel[sampleWithinWindow] = amplitude; + noiseTermsForChannel[sampleWithinWindow] = noiseTerm; + electronicNoiseTermsForChannel[sampleWithinWindow] = pedestalWidth; + + } //end sample loop + } // end channel loop + alpaka::syncBlockThreads(acc); + + // compute energy using shrMethod0EnergySamplePair + for (auto channel : uniform_group_elements_y(acc, group, nchannels)) { + auto const gch = channel.global; + auto const lch = channel.local; + for (auto i_sample : independent_group_elements_x(acc, nsamplesForCompute)) { + auto const sampleWithinWindow = i_sample; + + int32_t const soi = + gch < f01HEDigis.size() + ? soiSamples[gch] + : (gch < nchannelsf015 ? f5HBDigis.npresamples()[gch - f01HEDigis.size()] : soiSamples[gch]); + + auto const id = gch < f01HEDigis.size() + ? f01HEDigis.ids()[gch] + : (gch < nchannelsf015 ? f5HBDigis.ids()[gch - f01HEDigis.size()] + : f3HBDigis.ids()[gch - nchannelsf015]); + auto const did = HcalDetId{id}; + auto const hashedId = + did.subdetId() == HcalBarrel + ? did2linearIndexHB(id, mahi.maxDepthHB(), mahi.firstHBRing(), mahi.lastHBRing(), mahi.nEtaHB()) + : did2linearIndexHE(id, + mahi.maxDepthHE(), + mahi.maxPhiHE(), + mahi.firstHERing(), + mahi.lastHERing(), + mahi.nEtaHE()) + + mahi.offsetForHashes(); + + auto const recoParam1 = recoParamsWithPS.recoParamView().param1()[hashedId]; + auto const recoParam2 = recoParamsWithPS.recoParamView().param2()[hashedId]; + + auto const nsamplesToAdd = recoParam1 < 10 ? recoParam2 : (recoParam1 >> 14) & 0xF; + // NOTE: must take soi, as values for that thread are used... + // NOTE: does not run if soi is bad, because it does not match any sampleWithinWindow + if (sampleWithinWindow == static_cast(soi)) { + auto const method0_energy = shrMethod0EnergyAccum[lch]; + auto const val = shrMethod0EnergySamplePair[lch]; + int const max_sample = (val >> 32) & 0xffffffff; + + float const max_energy = uint_as_float(static_cast(val & 0xffffffff)); + + float const max_energy_1 = static_cast(max_sample) < nsamplesForCompute - 1 + ? shrEnergyM0PerTS[lch * nsamplesForCompute + max_sample + 1] + : 0.f; + float const position = nsamplesToAdd < nsamplesForCompute ? max_sample - soi : max_sample; + auto const sum = max_energy + max_energy_1; + // FIXME: for full comparison with cpu method 0 timing, + // need to correct by slew + // requires an accumulator -> more shared mem -> omit here unless + // really needed + float const time = + max_energy > 0.f && max_energy_1 > 0.f ? 25.f * (position + max_energy_1 / sum) : 25.f * position; + + // store method0 quantities to global mem + outputGPU.detId()[gch] = id; + outputGPU.energyM0()[gch] = method0_energy; + outputGPU.timeM0()[gch] = time; + +#ifdef HCAL_MAHI_GPUDEBUG + printf("tsTOT = %f tstrig = %f ts4Thresh = %f\n", + shrEnergyM0TotalAccum[lch], + energym0_per_ts_gain0, + ts4Thresh); +#endif + +#ifdef HCAL_MAHI_GPUDEBUG + printf(" method0_energy = %f max_sample = %d max_energy = %f time = %f\n", + method0_energy, + max_sample, + max_energy, + time); +#endif + } +#ifdef HCAL_MAHI_GPUDEBUG + printf( + "charge(%d) = %f pedestal(%d) = %f dfc(%d) = %f pedestalWidth(%d) = %f noiseADC(%d) = %f " + "noisPhoto(%d) =%f outputGPU chi2[gch] = %f \n", + sample, + rawCharge, + sample, + pedestalToUseForMethod0, + sample, + dfc, + sample, + pedestalWidth, + sample, + noiseADC, + sample, + noisePhotoSq, + outputGPU.chi2()[gch]); +#endif + + } // loop for sample + } // loop for channels + } // loop for channgel groups + } + }; //Kernel_prep1d_sameNumberOfSamples + + class Kernel_prep_pulseMatrices_sameNumberOfSamples { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, + float* pulseMatrices, + float* pulseMatricesM, + float* pulseMatricesP, + HcalMahiPulseOffsetsPortableDevice::ConstView pulseOffsets, + float const* amplitudes, + IProductTypef01::ConstView f01HEDigis, + IProductTypef5::ConstView f5HBDigis, + IProductTypef3::ConstView f3HBDigis, + int8_t* soiSamples, + HcalMahiConditionsPortableDevice::ConstView mahi, + HcalRecoParamWithPulseShapeDevice::ConstView recoParamsWithPS, + float const meanTime, + float const timeSigmaSiPM, + float const timeSigmaHPD, + bool const applyTimeSlew, + float const tzeroTimeSlew, + float const slopeTimeSlew, + float const tmaxTimeSlew) const { + //2nd index = pulse + auto const npulses(alpaka::getWorkDiv(acc)[1u]); + //3rd index = sample + auto const nsamples(alpaka::getWorkDiv(acc)[2u]); + + auto const nchannels = f01HEDigis.size() + f5HBDigis.size() + f3HBDigis.size(); + auto const nchannelsf015 = f01HEDigis.size() + f5HBDigis.size(); + + //Loop over each channel + for (auto channel : uniform_elements_z(acc, nchannels)) { + //Loop over pulses + for (auto ipulse : independent_group_elements_y(acc, npulses)) { + //Loop over sample + for (auto sample : independent_group_elements_x(acc, nsamples)) { + // conditions + auto const id = channel < f01HEDigis.size() + ? f01HEDigis.ids()[channel] + : (channel < nchannelsf015 ? f5HBDigis.ids()[channel - f01HEDigis.size()] + : f3HBDigis.ids()[channel - nchannelsf015]); + auto const deltaT = + channel >= f01HEDigis.size() && channel < nchannelsf015 ? timeSigmaHPD : timeSigmaSiPM; + + // compute hash for this did + // hash needed to convert condition arrays order (based on Topo) into digi arrays order(based on FED) + auto const did = DetId{id}; + auto const hashedId = + did.subdetId() == HcalBarrel + ? did2linearIndexHB(id, mahi.maxDepthHB(), mahi.firstHBRing(), mahi.lastHBRing(), mahi.nEtaHB()) + : did2linearIndexHE(id, + mahi.maxDepthHE(), + mahi.maxPhiHE(), + mahi.firstHERing(), + mahi.lastHERing(), + mahi.nEtaHE()) + + mahi.offsetForHashes(); + auto const pulseShape = recoParamsWithPS.getPulseShape(hashedId); + + // offset output arrays + auto* pulseMatrix = pulseMatrices + nsamples * npulses * channel; + auto* pulseMatrixM = pulseMatricesM + nsamples * npulses * channel; + auto* pulseMatrixP = pulseMatricesP + nsamples * npulses * channel; + + // amplitude per ipulse + int const soi = soiSamples[channel]; + int const pulseOffset = pulseOffsets.offsets()[ipulse]; + auto const amplitude = amplitudes[channel * nsamples + pulseOffset + soi]; + + if (amplitude <= 0.f) { + pulseMatrix[ipulse * nsamples + sample] = 0.f; + pulseMatrixM[ipulse * nsamples + sample] = 0.f; + pulseMatrixP[ipulse * nsamples + sample] = 0.f; + continue; + } +#ifdef HCAL_MAHI_GPUDEBUG +#ifdef HCAL_MAHI_GPUDEBUG_FILTERDETID + if (id != DETID_TO_DEBUG) + return; +#endif + if (sample == 0 && ipulse == 0) { + for (int i = 0; i < 8; i++) + printf("amplitude(%d) = %f\n", i, amplitudes[channel * nsamples + i]); + printf("acc25nsVec and diff25nsItvlVec for recoPulseShapeId = %u\n", recoPulseShapeId); + for (int i = 0; i < 256; i++) { + printf("acc25nsVec(%d) = %f diff25nsItvlVec(%d) = %f\n", i, acc25nsVec[i], i, diff25nsItvlVec[i]); + } + printf("accVarLenIdxZEROVec and accVarLenIdxMinusOneVec\n"); + for (int i = 0; i < 25; i++) { + printf("accVarLenIdxZEROVec(%d) = %f accVarLenIdxMinusOneVec(%d) = %f\n", + i, + accVarLenIdxZeroVec[i], + i, + accVarLenIdxMinusOneVec[i]); + } + printf("diffVarItvlIdxZEROVec and diffVarItvlIdxMinusOneVec\n"); + for (int i = 0; i < 25; i++) { + printf("diffVarItvlIdxZEROVec(%d) = %f diffVarItvlIdxMinusOneVec(%d) = %f\n", + i, + diffVarItvlIdxZeroVec[i], + i, + diffVarItvlIdxMinusOneVec[i]); + } + } +#endif + + auto t0 = meanTime; + if (applyTimeSlew) { + if (amplitude <= 1.0f) + t0 += compute_time_slew_delay(1.0, tzeroTimeSlew, slopeTimeSlew, tmaxTimeSlew); + else + t0 += compute_time_slew_delay(amplitude, tzeroTimeSlew, slopeTimeSlew, tmaxTimeSlew); + } + auto const t0m = -deltaT + t0; + auto const t0p = deltaT + t0; + +#ifdef HCAL_MAHI_GPUDEBUG + if (sample == 0 && ipulse == 0) { + printf("time values: %f %f %f\n", t0, t0m, t0p); + } + + if (sample == 0 && ipulse == 0) { + for (int i = 0; i < hcal::constants::maxSamples; i++) { + auto const value = compute_pulse_shape_value(pulseShape, t0, i, 0); + } + printf("\n"); + for (int i = 0; i < hcal::constants::maxSamples; i++) { + auto const value = compute_pulse_shape_value(pulseShape, t0p, i, 0); + } + printf("\n"); + for (int i = 0; i < hcal::constants::maxSamples; i++) { + auto const value = compute_pulse_shape_value(pulseShape, t0m, i, 0); + } + } +#endif + + // FIXME: shift should be treated properly, + // here assume 8 time slices and 8 samples + auto const shift = 4 - soi; // as in cpu version! + + int32_t const idx = sample - pulseOffset; + auto const value = idx >= 0 && static_cast(idx) < nsamples + ? compute_pulse_shape_value(pulseShape, t0, idx, shift) + : 0; + auto const value_t0m = idx >= 0 && static_cast(idx) < nsamples + ? compute_pulse_shape_value(pulseShape, t0m, idx, shift) + : 0; + auto const value_t0p = idx >= 0 && static_cast(idx) < nsamples + ? compute_pulse_shape_value(pulseShape, t0p, idx, shift) + : 0; + + // store to global + pulseMatrix[ipulse * nsamples + sample] = value; + pulseMatrixM[ipulse * nsamples + sample] = value_t0m; + pulseMatrixP[ipulse * nsamples + sample] = value_t0p; + + } // loop over sample + } // loop over pulse + } // loop over channels + } + }; // Kernel_prep_pulseMatrices_sameNumberOfSamples + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void update_covariance( + calo::multifit::ColumnVector const& resultAmplitudesVector, + calo::multifit::MapSymM& covarianceMatrix, + Eigen::Map> const& pulseMatrix, + Eigen::Map> const& pulseMatrixM, + Eigen::Map> const& pulseMatrixP) { + CMS_UNROLL_LOOP + for (int ipulse = 0; ipulse < NPULSES; ipulse++) { + auto const resultAmplitude = resultAmplitudesVector(ipulse); + if (resultAmplitude == 0) + continue; + +#ifdef HCAL_MAHI_GPUDEBUG + printf("pulse cov array for ibx = %d\n", ipulse); +#endif + + // preload a column + float pmcol[NSAMPLES], pmpcol[NSAMPLES], pmmcol[NSAMPLES]; + CMS_UNROLL_LOOP + for (int counter = 0; counter < NSAMPLES; counter++) { + pmcol[counter] = pulseMatrix.coeffRef(counter, ipulse); + pmpcol[counter] = pulseMatrixP.coeffRef(counter, ipulse); + pmmcol[counter] = pulseMatrixM.coeffRef(counter, ipulse); + } + + auto const ampl2 = resultAmplitude * resultAmplitude; + CMS_UNROLL_LOOP + for (int col = 0; col < NSAMPLES; col++) { + auto const valueP_col = pmpcol[col]; + auto const valueM_col = pmmcol[col]; + auto const value_col = pmcol[col]; + auto const tmppcol = valueP_col - value_col; + auto const tmpmcol = valueM_col - value_col; + + // diagonal + auto tmp_value = 0.5 * (tmppcol * tmppcol + tmpmcol * tmpmcol); + covarianceMatrix(col, col) += ampl2 * tmp_value; + + // FIXME: understand if this actually gets unrolled + CMS_UNROLL_LOOP + for (int row = col + 1; row < NSAMPLES; row++) { + float const valueP_row = pmpcol[row]; //pulseMatrixP(j, ipulseReal); + float const value_row = pmcol[row]; //pulseMatrix(j, ipulseReal); + float const valueM_row = pmmcol[row]; //pulseMatrixM(j, ipulseReal); + + float tmpprow = valueP_row - value_row; + float tmpmrow = valueM_row - value_row; + + auto const covValue = 0.5 * (tmppcol * tmpprow + tmpmcol * tmpmrow); + + covarianceMatrix(row, col) += ampl2 * covValue; + } + } + } + } + + template + class Kernel_minimize { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, + OProductType::View outputGPU, + float const* amplitudes, + float* pulseMatrices, + float* pulseMatricesM, + float* pulseMatricesP, + HcalMahiPulseOffsetsPortableDevice::ConstView pulseOffsetsView, + float* noiseTerms, + float* electronicNoiseTerms, + int8_t* soiSamples, + HcalMahiConditionsPortableDevice::ConstView mahi, + bool const useEffectivePedestals, + IProductTypef01::ConstView f01HEDigis, + IProductTypef5::ConstView f5HBDigis, + IProductTypef3::ConstView f3HBDigis) const { + // can be relaxed if needed - minor updates are needed in that case! + static_assert(NPULSES == NSAMPLES); + + auto const nchannels = f01HEDigis.size() + f5HBDigis.size() + f3HBDigis.size(); + + auto const nchannelsf015 = f01HEDigis.size() + f5HBDigis.size(); + + auto const threadsPerBlock(alpaka::getWorkDiv(acc)[0u]); + + //Loop over all groups of channels + for (auto group : uniform_groups_x(acc, nchannels)) { + //Loop over each channel + for (auto channel : uniform_group_elements_x(acc, group, nchannels)) { + auto const gch = channel.global; + auto const lch = channel.local; + + // if chi2 is set to -9999 do not run minimization + if (outputGPU.chi2()[gch] == -9999.f) + continue; + + // configure shared mem + float* shrmem = alpaka::getDynSharedMem(acc); + float* shrMatrixLFnnlsStorage = shrmem + calo::multifit::MapSymM::total * lch; + float* shrAtAStorage = shrmem + calo::multifit::MapSymM::total * (lch + threadsPerBlock); + + // conditions for pedestal widths + auto const id = gch < f01HEDigis.size() ? f01HEDigis.ids()[gch] + : (gch < nchannelsf015 ? f5HBDigis.ids()[gch - f01HEDigis.size()] + : f3HBDigis.ids()[gch - nchannelsf015]); + auto const did = DetId{id}; + auto const hashedId = + did.subdetId() == HcalBarrel + ? did2linearIndexHB(id, mahi.maxDepthHB(), mahi.firstHBRing(), mahi.lastHBRing(), mahi.nEtaHB()) + : did2linearIndexHE(id, + mahi.maxDepthHE(), + mahi.maxPhiHE(), + mahi.firstHERing(), + mahi.lastHERing(), + mahi.nEtaHE()) + + mahi.offsetForHashes(); + + // conditions based on the hash + auto const* pedestalWidthsForChannel = + useEffectivePedestals && (gch < f01HEDigis.size() || gch >= nchannelsf015) + ? mahi.effectivePedestalWidths()[hashedId].data() + : mahi.pedestals_width()[hashedId].data(); + auto const averagePedestalWidth2 = 0.25 * (pedestalWidthsForChannel[0] * pedestalWidthsForChannel[0] + + pedestalWidthsForChannel[1] * pedestalWidthsForChannel[1] + + pedestalWidthsForChannel[2] * pedestalWidthsForChannel[2] + + pedestalWidthsForChannel[3] * pedestalWidthsForChannel[3]); + + // FIXME on cpu ts 0 capid was used - does it make any difference + auto const gain = mahi.gains_value()[hashedId][0]; + + auto const respCorrection = mahi.respCorrs_values()[hashedId]; + auto const noisecorr = mahi.sipmPar_auxi2()[hashedId]; + +#ifdef HCAL_MAHI_GPUDEBUG +#ifdef HCAL_MAHI_GPUDEBUG_FILTERDETID + if (id != DETID_TO_DEBUG) + return; +#endif +#endif + + calo::multifit::ColumnVector pulseOffsets; + CMS_UNROLL_LOOP + for (int i = 0; i < NPULSES; ++i) + pulseOffsets(i) = i; + + // output amplitudes/weights + calo::multifit::ColumnVector resultAmplitudesVector = + calo::multifit::ColumnVector::Zero(); + + // map views + Eigen::Map> inputAmplitudesView{amplitudes + gch * NSAMPLES}; + Eigen::Map> noiseTermsView{noiseTerms + gch * NSAMPLES}; + Eigen::Map> noiseElectronicView{electronicNoiseTerms + + gch * NSAMPLES}; + Eigen::Map> glbPulseMatrixMView{ + pulseMatricesM + gch * NSAMPLES * NPULSES}; + Eigen::Map> glbPulseMatrixPView{ + pulseMatricesP + gch * NSAMPLES * NPULSES}; + Eigen::Map> glbPulseMatrixView{ + pulseMatrices + gch * NSAMPLES * NPULSES}; +#ifdef HCAL_MAHI_GPUDEBUG + for (int i = 0; i < NSAMPLES; i++) + printf("inputValues(%d) = %f noiseTerms(%d) = %f\n", i, inputAmplitudesView(i), i, noiseTermsView(i)); + for (int i = 0; i < NSAMPLES; i++) { + for (int j = 0; j < NPULSES; j++) + printf("%f ", glbPulseMatrixView(i, j)); + printf("\n"); + } + printf("\n"); + for (int i = 0; i < NSAMPLES; i++) { + for (int j = 0; j < NPULSES; j++) + printf("%f ", glbPulseMatrixMView(i, j)); + printf("\n"); + } + printf("\n"); + for (int i = 0; i < NSAMPLES; i++) { + for (int j = 0; j < NPULSES; j++) + printf("%f ", glbPulseMatrixPView(i, j)); + printf("\n"); + } +#endif + + int npassive = 0; + float chi2 = 0, previous_chi2 = 0.f, chi2_2itersback = 0.f; + for (int iter = 1; iter < nMaxItersMin; iter++) { + //float covarianceMatrixStorage[MapSymM::total]; + // NOTE: only works when NSAMPLES == NPULSES + // if does not hold -> slightly rearrange shared mem to still reuse + // shared memory + float* covarianceMatrixStorage = shrMatrixLFnnlsStorage; + calo::multifit::MapSymM covarianceMatrix{covarianceMatrixStorage}; + CMS_UNROLL_LOOP + for (int counter = 0; counter < calo::multifit::MapSymM::total; counter++) + covarianceMatrixStorage[counter] = (noisecorr != 0.f) ? 0.f : averagePedestalWidth2; + CMS_UNROLL_LOOP + for (unsigned int counter = 0; counter < calo::multifit::MapSymM::stride; counter++) { + covarianceMatrix(counter, counter) += noiseTermsView.coeffRef(counter); + if (counter != 0) + covarianceMatrix(counter, counter - 1) += + noisecorr * noiseElectronicView.coeffRef(counter - 1) * noiseElectronicView.coeffRef(counter); + } + + // update covariance matrix + update_covariance(resultAmplitudesVector, + covarianceMatrix, + glbPulseMatrixView, + glbPulseMatrixMView, + glbPulseMatrixPView); + +#ifdef HCAL_MAHI_GPUDEBUG + printf("covariance matrix\n"); + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 8; j++) + printf("%f ", covarianceMatrix(i, j)); + printf("\n"); + } +#endif + + // compute Cholesky Decomposition L matrix + //matrixDecomposition.compute(covarianceMatrix); + //auto const& matrixL = matrixDecomposition.matrixL(); + float matrixLStorage[calo::multifit::MapSymM::total]; + calo::multifit::MapSymM matrixL{matrixLStorage}; + calo::multifit::compute_decomposition_unrolled(matrixL, covarianceMatrix); + + // + // replace eigen + // + //auto const& A = matrixDecomposition + // .matrixL() + // .solve(pulseMatrixView); + calo::multifit::ColMajorMatrix A; + calo::multifit::solve_forward_subst_matrix(A, glbPulseMatrixView, matrixL); + + // + // remove eigen + // + //auto const& b = matrixL + // .solve(inputAmplitudesView); + // + float reg_b[NSAMPLES]; + calo::multifit::solve_forward_subst_vector(reg_b, inputAmplitudesView, matrixL); + + // TODO: we do not really need to change these matrcies + // will be fixed in the optimized version + //ColMajorMatrix AtA = A.transpose() * A; + //ColumnVector Atb = A.transpose() * b; + //ColMajorMatrix AtA; + //float AtAStorage[MapSymM::total]; + calo::multifit::MapSymM AtA{shrAtAStorage}; + calo::multifit::ColumnVector Atb; + CMS_UNROLL_LOOP + for (int icol = 0; icol < NPULSES; icol++) { + float reg_ai[NSAMPLES]; + + // load column icol + CMS_UNROLL_LOOP + for (int counter = 0; counter < NSAMPLES; counter++) + reg_ai[counter] = A(counter, icol); + + // compute diagonal + float sum = 0.f; + CMS_UNROLL_LOOP + for (int counter = 0; counter < NSAMPLES; counter++) + sum += reg_ai[counter] * reg_ai[counter]; + + // store + AtA(icol, icol) = sum; + + // go thru the other columns + CMS_UNROLL_LOOP + for (int j = icol + 1; j < NPULSES; j++) { + // load column j + float reg_aj[NSAMPLES]; + CMS_UNROLL_LOOP + for (int counter = 0; counter < NSAMPLES; counter++) + reg_aj[counter] = A(counter, j); + + // accum + float sum = 0.f; + CMS_UNROLL_LOOP + for (int counter = 0; counter < NSAMPLES; counter++) + sum += reg_aj[counter] * reg_ai[counter]; + + // store + //AtA(icol, j) = sum; + AtA(j, icol) = sum; + } + + // Atb accum + float sum_atb = 0; + CMS_UNROLL_LOOP + for (int counter = 0; counter < NSAMPLES; counter++) + sum_atb += reg_ai[counter] * reg_b[counter]; + + // store atb + Atb(icol) = sum_atb; + } + +#ifdef HCAL_MAHI_GPUDEBUG + printf("AtA\n"); + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 8; j++) + printf("%f ", AtA(i, j)); + printf("\n"); + } + printf("Atb\n"); + for (int i = 0; i < 8; i++) + printf("%f ", Atb(i)); + printf("\n"); + printf("result Amplitudes before nnls\n"); + for (int i = 0; i < 8; i++) + printf("%f ", resultAmplitudesVector(i)); + printf("\n"); +#endif + + // for fnnls + calo::multifit::MapSymM matrixLForFnnls{shrMatrixLFnnlsStorage}; + + // run fast nnls + calo::multifit::fnnls(AtA, + Atb, + resultAmplitudesVector, + npassive, + pulseOffsets, + matrixLForFnnls, + nnlsThresh, + nMaxItersNNLS, + 10, + 10); + +#ifdef HCAL_MAHI_GPUDEBUG + printf("result Amplitudes\n"); + for (int i = 0; i < 8; i++) + printf("resultAmplitudes(%d) = %f\n", i, resultAmplitudesVector(i)); +#endif + + calo::multifit::calculateChiSq( + matrixL, glbPulseMatrixView, resultAmplitudesVector, inputAmplitudesView, chi2); + + auto const deltaChi2 = std::abs(chi2 - previous_chi2); + if (chi2 == chi2_2itersback && chi2 < previous_chi2) + break; + + // update + chi2_2itersback = previous_chi2; + previous_chi2 = chi2; + + // exit condition + if (deltaChi2 < deltaChi2Threashold) + break; + } + +#ifdef HCAL_MAHI_GPUDEBUG + for (int i = 0; i < NPULSES; i++) + printf("pulseOffsets(%d) = %d outputAmplitudes(%d) = %f\n", + i, + pulseOffsets(i), + i, + resultAmplitudesVector(i)); + printf("chi2 = %f\n", chi2); +#endif + + outputGPU.chi2()[gch] = chi2; + auto const idx_for_energy = std::abs(pulseOffsetsView.offsets()[0]); + outputGPU.energy()[gch] = (gain * resultAmplitudesVector(idx_for_energy)) * respCorrection; + + } // loop over channels + } //loop over group of channels + } + }; // Kernel_minimize + + } // namespace mahi + + void runMahiAsync(Queue& queue, + IProductTypef01::ConstView const& f01HEDigis, + IProductTypef5::ConstView const& f5HBDigis, + IProductTypef3::ConstView const& f3HBDigis, + OProductType::View outputGPU, + HcalMahiConditionsPortableDevice::ConstView const& mahi, + HcalSiPMCharacteristicsPortableDevice::ConstView const& sipmCharacteristics, + HcalRecoParamWithPulseShapeDevice::ConstView const& recoParamsWithPS, + HcalMahiPulseOffsetsPortableDevice::ConstView const& mahiPulseOffsets, + ConfigParameters const& configParameters) { + auto const totalChannels = + f01HEDigis.metadata().size() + f5HBDigis.metadata().size() + f3HBDigis.metadata().size(); + // FIXME: the number of channels in output might change given that some channesl might be filtered out + + // TODO: this can be lifted by implementing a separate kernel + // similar to the default one, but properly handling the diff in #sample + // or modifying existing one + // TODO: assert startingSample = f01nsamples - windowSize to be 0 or 2 + // assert f01nsamples == f5nsamples + // assert f01nsamples == f3nsamples + int constexpr windowSize = 8; + + //compute work division + uint32_t nchannels_per_block = configParameters.kprep1dChannelsPerBlock; + auto const blocks_y = cms::alpakatools::divide_up_by(totalChannels, nchannels_per_block); + + Vec2D const blocks_2d{blocks_y, 1u}; // {y, x} coordiantes + Vec2D const threads_2d{nchannels_per_block, windowSize}; + auto workDivPrep2D = cms::alpakatools::make_workdiv(blocks_2d, threads_2d); + + //Device buffer for output + auto amplitudes = cms::alpakatools::make_device_buffer(queue, totalChannels * windowSize); + auto noiseTerms = cms::alpakatools::make_device_buffer(queue, totalChannels * windowSize); + auto electronicNoiseTerms = cms::alpakatools::make_device_buffer(queue, totalChannels * windowSize); + auto soiSamples = cms::alpakatools::make_device_buffer(queue, totalChannels * windowSize); + + alpaka::exec(queue, + workDivPrep2D, + mahi::Kernel_prep1d_sameNumberOfSamples{}, + outputGPU, + f01HEDigis, + f5HBDigis, + f3HBDigis, + mahi, + sipmCharacteristics, + recoParamsWithPS, + configParameters.useEffectivePedestals, + configParameters.sipmQTSShift, + configParameters.sipmQNTStoSum, + configParameters.firstSampleShift, + configParameters.ts4Thresh, + amplitudes.data(), + noiseTerms.data(), + electronicNoiseTerms.data(), + soiSamples.data(), + windowSize); + + //// 1024 is the max threads per block for gtx1080 + //// FIXME: Take this from Alpaka in a way that does not need to query deviceProperty for every event + uint32_t const channelsPerBlock = 1024 / (windowSize * mahiPulseOffsets.metadata().size()); + + //launch 1D blocks of 3D threads + auto const blocks_z = cms::alpakatools::divide_up_by(totalChannels, channelsPerBlock); + Vec3D const blocks_3d{blocks_z, 1u, 1u}; // 1D block in z {z,y,x} coordinates + Vec3D const threads_3d{channelsPerBlock, mahiPulseOffsets.metadata().size(), windowSize}; + + auto workDivPrep3D = cms::alpakatools::make_workdiv(blocks_3d, threads_3d); + + //Device buffer for output + auto pulseMatrices = cms::alpakatools::make_device_buffer( + queue, totalChannels * windowSize * mahiPulseOffsets.metadata().size()); + auto pulseMatricesM = cms::alpakatools::make_device_buffer( + queue, totalChannels * windowSize * mahiPulseOffsets.metadata().size()); + auto pulseMatricesP = cms::alpakatools::make_device_buffer( + queue, totalChannels * windowSize * mahiPulseOffsets.metadata().size()); + + alpaka::exec(queue, + workDivPrep3D, + mahi::Kernel_prep_pulseMatrices_sameNumberOfSamples{}, + pulseMatrices.data(), + pulseMatricesM.data(), + pulseMatricesP.data(), + mahiPulseOffsets, + amplitudes.data(), + f01HEDigis, + f5HBDigis, + f3HBDigis, + soiSamples.data(), + mahi, + recoParamsWithPS, + configParameters.meanTime, + configParameters.timeSigmaSiPM, + configParameters.timeSigmaHPD, + configParameters.applyTimeSlew, + configParameters.tzeroTimeSlew, + configParameters.slopeTimeSlew, + configParameters.tmaxTimeSlew); + + uint32_t threadsPerBlock = configParameters.kernelMinimizeThreads[0]; + auto blocks_1d = cms::alpakatools::divide_up_by(totalChannels, threadsPerBlock); + + auto workDivPrep1D = cms::alpakatools::make_workdiv(blocks_1d, threadsPerBlock); + + alpaka::exec(queue, + workDivPrep1D, + mahi::Kernel_minimize<8, 8>{}, + outputGPU, + amplitudes.data(), + pulseMatrices.data(), + pulseMatricesM.data(), + pulseMatricesP.data(), + mahiPulseOffsets, + noiseTerms.data(), + electronicNoiseTerms.data(), + soiSamples.data(), + mahi, + configParameters.useEffectivePedestals, + f01HEDigis, + f5HBDigis, + f3HBDigis); + } + + } // namespace hcal::reconstruction +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace alpaka::trait { + using namespace ALPAKA_ACCELERATOR_NAMESPACE::hcal::reconstruction::mahi; + + //! The trait for getting the size of the block shared dynamic memory for Kernel_prep_1d_and_initialize. + template + struct BlockSharedMemDynSizeBytes { + //! \return The size of the shared memory allocated for a block. + template + ALPAKA_FN_HOST_ACC static auto getBlockSharedMemDynSizeBytes(Kernel_prep1d_sameNumberOfSamples const&, + TVec const& threadsPerBlock, + TVec const& elemsPerThread, + TArgs const&...) -> std::size_t { + // return the amount of dynamic shared memory needed + + // threadsPerBlock[1] = threads2d.x = windowSize = 8 + // threadsPerBlock[0] = threads2d.y = nchannels_per_block = 32 + // elemsPerThread = 1 + std::size_t bytes = threadsPerBlock[0u] * elemsPerThread[0u] * + ((2 * threadsPerBlock[1u] * elemsPerThread[1u] + 2) * sizeof(float) + sizeof(uint64_t)); + return bytes; + } + }; + + //! The trait for getting the size of the block shared dynamic memory for kernel_minimize. + template + struct BlockSharedMemDynSizeBytes, TAcc> { + //! \return The size of the shared memory allocated for a block. + template + ALPAKA_FN_HOST_ACC static auto getBlockSharedMemDynSizeBytes(Kernel_minimize const&, + TVec const& threadsPerBlock, + TVec const& elemsPerThread, + TArgs const&...) -> std::size_t { + // return the amount of dynamic shared memory needed + + std::size_t bytes = + 2 * threadsPerBlock[0u] * elemsPerThread[0u] * (calo::multifit::MapSymM::total * sizeof(float)); + return bytes; + } + }; + +} // namespace alpaka::trait diff --git a/RecoLocalCalo/HcalRecProducers/plugins/alpaka/Mahi.h b/RecoLocalCalo/HcalRecProducers/plugins/alpaka/Mahi.h new file mode 100644 index 0000000000000..baa13effe532a --- /dev/null +++ b/RecoLocalCalo/HcalRecProducers/plugins/alpaka/Mahi.h @@ -0,0 +1,57 @@ +#ifndef RecoLocalCalo_HcalRecProducers_plugins_alpaka_Mahi_h +#define RecoLocalCalo_HcalRecProducers_plugins_alpaka_Mahi_h + +#include + +#include "DataFormats/HcalDigi/interface/alpaka/HcalDigiDeviceCollection.h" +#include "DataFormats/HcalRecHit/interface/alpaka/HcalRecHitDeviceCollection.h" +#include "CondFormats/HcalObjects/interface/alpaka/HcalMahiConditionsDevice.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" +#include "CondFormats/HcalObjects/interface/alpaka/HcalMahiConditionsDevice.h" +#include "CondFormats/HcalObjects/interface/alpaka/HcalSiPMCharacteristicsDevice.h" +#include "CondFormats/HcalObjects/interface/alpaka/HcalRecoParamWithPulseShapeDevice.h" +#include "CondFormats/HcalObjects/interface/alpaka/HcalMahiPulseOffsetsDevice.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::hcal::reconstruction { + + struct ConfigParameters { + uint32_t maxTimeSamples; + uint32_t kprep1dChannelsPerBlock; + int sipmQTSShift; + int sipmQNTStoSum; + int firstSampleShift; + bool useEffectivePedestals; + + float meanTime; + float timeSigmaSiPM, timeSigmaHPD; + float ts4Thresh; + + std::array kernelMinimizeThreads; + + // FIXME: + // - add "getters" to HcalTimeSlew calib formats + // - add ES Producer to consume what is produced above not to replicate. + // which ones to use is hardcoded, therefore no need to send those to the device + bool applyTimeSlew; + float tzeroTimeSlew, slopeTimeSlew, tmaxTimeSlew; + }; + + using IProductTypef01 = hcal::Phase1DigiDeviceCollection; + using IProductTypef5 = hcal::Phase0DigiDeviceCollection; + using IProductTypef3 = hcal::Phase1DigiDeviceCollection; + using OProductType = hcal::RecHitDeviceCollection; + + void runMahiAsync(Queue& queue, + IProductTypef01::ConstView const& f01HEDigis, + IProductTypef5::ConstView const& f5HBDigis, + IProductTypef3::ConstView const& f3HBDigis, + OProductType::View outputGPU, + HcalMahiConditionsPortableDevice::ConstView const& mahi, + HcalSiPMCharacteristicsPortableDevice::ConstView const& sipmCharacteristics, + HcalRecoParamWithPulseShapeDevice::ConstView const& recoParams, + HcalMahiPulseOffsetsPortableDevice::ConstView const& mahiPulseOffsets, + ConfigParameters const& configParameters); + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::hcal::reconstruction +#endif diff --git a/RecoLocalCalo/HcalRecProducers/python/hbheRecHitProducerPortableTask_cff.py b/RecoLocalCalo/HcalRecProducers/python/hbheRecHitProducerPortableTask_cff.py new file mode 100644 index 0000000000000..17bec3f3caabe --- /dev/null +++ b/RecoLocalCalo/HcalRecProducers/python/hbheRecHitProducerPortableTask_cff.py @@ -0,0 +1,57 @@ +import FWCore.ParameterSet.Config as cms + +# Run 3 HCAL workflow on GPU + +# EventSetup modules used by HBHERecHitProducerPortable + +from RecoLocalCalo.HcalRecProducers.hcalMahiConditionsESProducer_cfi import hcalMahiConditionsESProducer +from RecoLocalCalo.HcalRecProducers.hcalMahiPulseOffsetsESProducer_cfi import hcalMahiPulseOffsetsESProducer +from RecoLocalCalo.HcalRecProducers.hcalSiPMCharacteristicsESProducer_cfi import hcalSiPMCharacteristicsESProducer +from RecoLocalCalo.HcalRecAlgos.hcalRecoParamWithPulseShapeESProducer_cfi import hcalRecoParamWithPulseShapeESProducer + +hcalMahiPulseOffSetAlpakaESRcdSource = cms.ESSource('EmptyESSource', + recordName = cms.string('JobConfigurationGPURecord'), + iovIsRunNotTime = cms.bool(True), + firstValid = cms.vuint32(1) +) + +# convert the HBHE digis into SoA format +from EventFilter.HcalRawToDigi.hcalDigisSoAProducer_cfi import hcalDigisSoAProducer as _hcalDigisSoAProducer +hcalDigisPortable = _hcalDigisSoAProducer.clone( + digisLabelF01HE = "f01HEDigis", + digisLabelF5HB = "f5HBDigis", + digisLabelF3HB = "f3HBDigis" +) + +from HeterogeneousCore.AlpakaCore.functions import * +hcalDigisSerial = makeSerialClone(hcalDigisPortable) + +# run the HCAL local reconstruction (MAHI) on GPU +from RecoLocalCalo.HcalRecProducers.hbheRecHitProducerPortable_cfi import hbheRecHitProducerPortable as _hbheRecHitProducerPortable +hbheRecHitProducerPortable = _hbheRecHitProducerPortable.clone( + digisLabelF01HE = ("hcalDigisPortable", "f01HEDigis"), + digisLabelF5HB = ("hcalDigisPortable", "f5HBDigis"), + digisLabelF3HB = ("hcalDigisPortable","f3HBDigis"), + recHitsLabelM0HBHE = "", + mahiPulseOffSets = "hcalMahiPulseOffsetsESProducer:" +) +hbheRecHitProducerSerial = makeSerialClone(hbheRecHitProducerPortable, + digisLabelF01HE = ("hcalDigisSerial","f01HEDigis"), + digisLabelF5HB = ("hcalDigisSerial","f5HBDigis"), + digisLabelF3HB = ("hcalDigisSerial","f3HBDigis") +) + +# Tasks and Sequences +hbheRecHitProducerPortableTask = cms.Task( + hcalMahiConditionsESProducer, + hcalMahiPulseOffSetAlpakaESRcdSource, + hcalMahiPulseOffsetsESProducer, + hcalRecoParamWithPulseShapeESProducer, + hcalSiPMCharacteristicsESProducer, + hcalDigisPortable, + hcalDigisSerial, + hbheRecHitProducerPortable, + hbheRecHitProducerSerial +) + +hbheRecHitProducerPortableSequence = cms.Sequence(hbheRecHitProducerPortableTask) diff --git a/RecoLocalCalo/HcalRecProducers/src/HcalRecHitSoAToLegacy.cc b/RecoLocalCalo/HcalRecProducers/src/HcalRecHitSoAToLegacy.cc new file mode 100644 index 0000000000000..471437ba8fca7 --- /dev/null +++ b/RecoLocalCalo/HcalRecProducers/src/HcalRecHitSoAToLegacy.cc @@ -0,0 +1,70 @@ +#include +#include + +#include "DataFormats/HcalRecHit/interface/HcalRecHitHostCollection.h" +#include "DataFormats/HcalRecHit/interface/HcalRecHitCollections.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/Utilities/interface/EDPutToken.h" + +class HcalRecHitSoAToLegacy : public edm::stream::EDProducer<> { +public: + explicit HcalRecHitSoAToLegacy(edm::ParameterSet const& ps); + ~HcalRecHitSoAToLegacy() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions&); + +private: + void produce(edm::Event&, edm::EventSetup const&) override; + +private: + const edm::EDGetTokenT recHitsTokenIn_; + const edm::EDPutTokenT recHitsLegacyTokenOut_; +}; + +void HcalRecHitSoAToLegacy::fillDescriptions(edm::ConfigurationDescriptions& confDesc) { + edm::ParameterSetDescription desc; + + desc.add("src", edm::InputTag{"hbheRecHitProducerPortable"}); + + confDesc.addWithDefaultLabel(desc); +} + +HcalRecHitSoAToLegacy::HcalRecHitSoAToLegacy(const edm::ParameterSet& ps) + : recHitsTokenIn_{consumes(ps.getParameter("src"))}, + recHitsLegacyTokenOut_{produces()} {} + +void HcalRecHitSoAToLegacy::produce(edm::Event& event, edm::EventSetup const& setup) { + // populate the legacy collection + auto recHitsLegacy = std::make_unique(); + + // get input from host SoA + auto const& hcalRechitSoAView = event.get(recHitsTokenIn_).const_view(); + + recHitsLegacy->reserve(hcalRechitSoAView.metadata().size()); + + for (auto i = 0; i < hcalRechitSoAView.metadata().size(); i++) { + auto const& rechit = hcalRechitSoAView[i]; + // skip bad channels + if (rechit.chi2() < 0) + continue; + + // build a legacy rechit with the computed detid and MAHI energy + recHitsLegacy->emplace_back(HcalDetId{rechit.detId()}, + rechit.energy(), + 0 // timeRising + ); + // update the legacy rechit with the Chi2 and M0 values + recHitsLegacy->back().setChiSquared(rechit.chi2()); + recHitsLegacy->back().setRawEnergy(rechit.energyM0()); + } + + // put the legacy collection + event.put(recHitsLegacyTokenOut_, std::move(recHitsLegacy)); +} + +DEFINE_FWK_MODULE(HcalRecHitSoAToLegacy);