diff --git a/DataFormats/ParticleFlowReco/interface/PFClusterHostCollection.h b/DataFormats/ParticleFlowReco/interface/PFClusterHostCollection.h new file mode 100644 index 0000000000000..72d8ea2706868 --- /dev/null +++ b/DataFormats/ParticleFlowReco/interface/PFClusterHostCollection.h @@ -0,0 +1,13 @@ +#ifndef DataFormats_ParticleFlowReco_interface_PFClusterHostCollection_h +#define DataFormats_ParticleFlowReco_interface_PFClusterHostCollection_h + +#include "DataFormats/ParticleFlowReco/interface/PFClusterSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace reco { + + using PFClusterHostCollection = PortableHostCollection; + +} // namespace reco + +#endif // DataFormats_ParticleFlowReco_interface_PFClusterHostCollection_h diff --git a/DataFormats/ParticleFlowReco/interface/PFClusterSoA.h b/DataFormats/ParticleFlowReco/interface/PFClusterSoA.h new file mode 100644 index 0000000000000..7663454cc42b3 --- /dev/null +++ b/DataFormats/ParticleFlowReco/interface/PFClusterSoA.h @@ -0,0 +1,29 @@ +#ifndef DataFormats_ParticleFlowReco_interface_PFClusterSoA_h +#define DataFormats_ParticleFlowReco_interface_PFClusterSoA_h + +#include "DataFormats/SoATemplate/interface/SoACommon.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoAView.h" + +namespace reco { + + GENERATE_SOA_LAYOUT(PFClusterSoALayout, + SOA_COLUMN(int, depth), + SOA_COLUMN(int, seedRHIdx), + SOA_COLUMN(int, topoId), + SOA_COLUMN(int, rhfracSize), + SOA_COLUMN(int, rhfracOffset), + SOA_COLUMN(float, energy), + SOA_COLUMN(float, x), + SOA_COLUMN(float, y), + SOA_COLUMN(float, z), + SOA_COLUMN(int, topoRHCount), + SOA_SCALAR(int, nTopos), + SOA_SCALAR(int, nSeeds), + SOA_SCALAR(int, nRHFracs), + SOA_SCALAR(int, size) // nRH + ) + using PFClusterSoA = PFClusterSoALayout<>; +} // namespace reco + +#endif // DataFormats_ParticleFlowReco_interface_PFClusterSoA_h diff --git a/DataFormats/ParticleFlowReco/interface/PFRecHitFractionHostCollection.h b/DataFormats/ParticleFlowReco/interface/PFRecHitFractionHostCollection.h new file mode 100644 index 0000000000000..747026710f898 --- /dev/null +++ b/DataFormats/ParticleFlowReco/interface/PFRecHitFractionHostCollection.h @@ -0,0 +1,11 @@ +#ifndef DataFormats_ParticleFlowReco_interface_PFRecHitFractionHostCollection_h +#define DataFormats_ParticleFlowReco_interface_PFRecHitFractionHostCollection_h + +#include "DataFormats/ParticleFlowReco/interface/PFRecHitFractionSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace reco { + using PFRecHitFractionHostCollection = PortableHostCollection; +} + +#endif // DataFormats_ParticleFlowReco_interface_PFRecHitFractionHostCollection_h diff --git a/DataFormats/ParticleFlowReco/interface/PFRecHitFractionSoA.h b/DataFormats/ParticleFlowReco/interface/PFRecHitFractionSoA.h new file mode 100644 index 0000000000000..00903b2985487 --- /dev/null +++ b/DataFormats/ParticleFlowReco/interface/PFRecHitFractionSoA.h @@ -0,0 +1,18 @@ +#ifndef DataFormats_ParticleFlowReco_interface_PFRecHitFractionSoA_h +#define DataFormats_ParticleFlowReco_interface_PFRecHitFractionSoA_h + +#include "DataFormats/SoATemplate/interface/SoACommon.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoAView.h" + +namespace reco { + + GENERATE_SOA_LAYOUT(PFRecHitFractionSoALayout, + SOA_COLUMN(float, frac), + SOA_COLUMN(int, pfrhIdx), + SOA_COLUMN(int, pfcIdx)) + + using PFRecHitFractionSoA = PFRecHitFractionSoALayout<>; +} // namespace reco + +#endif // DataFormats_ParticleFlowReco_interface_PFRecHitFractionSoA_h diff --git a/DataFormats/ParticleFlowReco/interface/alpaka/CaloRecHitDeviceCollection.h b/DataFormats/ParticleFlowReco/interface/alpaka/CaloRecHitDeviceCollection.h index 0116e8408add9..3e2c54ca22135 100644 --- a/DataFormats/ParticleFlowReco/interface/alpaka/CaloRecHitDeviceCollection.h +++ b/DataFormats/ParticleFlowReco/interface/alpaka/CaloRecHitDeviceCollection.h @@ -12,4 +12,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::reco { } // namespace ALPAKA_ACCELERATOR_NAMESPACE::reco +// check that the portable device collection for the host device is the same as the portable host collection +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(reco::CaloRecHitDeviceCollection, reco::CaloRecHitHostCollection); + #endif // DataFormats_ParticleFlowReco_interface_alpaka_CaloRecHitDeviceCollection_h diff --git a/DataFormats/ParticleFlowReco/interface/alpaka/PFClusterDeviceCollection.h b/DataFormats/ParticleFlowReco/interface/alpaka/PFClusterDeviceCollection.h new file mode 100644 index 0000000000000..695b1f540c51b --- /dev/null +++ b/DataFormats/ParticleFlowReco/interface/alpaka/PFClusterDeviceCollection.h @@ -0,0 +1,18 @@ +#ifndef DataFormats_ParticleFlowReco_interface_alpaka_PFClusterDeviceCollection_h +#define DataFormats_ParticleFlowReco_interface_alpaka_PFClusterDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/ParticleFlowReco/interface/PFClusterSoA.h" +#include "DataFormats/ParticleFlowReco/interface/PFClusterHostCollection.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::reco { + + using ::reco::PFClusterHostCollection; + + using PFClusterDeviceCollection = PortableCollection<::reco::PFClusterSoA>; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::reco + +// check that the portable device collection for the host device is the same as the portable host collection +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(reco::PFClusterDeviceCollection, reco::PFClusterHostCollection); + +#endif // DataFormats_ParticleFlowReco_interface_alpaka_PFClusterDeviceCollection_h diff --git a/DataFormats/ParticleFlowReco/interface/alpaka/PFRecHitDeviceCollection.h b/DataFormats/ParticleFlowReco/interface/alpaka/PFRecHitDeviceCollection.h index 3ab6a82b3ad2e..70b5bb6d94093 100644 --- a/DataFormats/ParticleFlowReco/interface/alpaka/PFRecHitDeviceCollection.h +++ b/DataFormats/ParticleFlowReco/interface/alpaka/PFRecHitDeviceCollection.h @@ -13,4 +13,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::reco { } // namespace ALPAKA_ACCELERATOR_NAMESPACE::reco +// check that the portable device collection for the host device is the same as the portable host collection +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(reco::PFRecHitDeviceCollection, reco::PFRecHitHostCollection); + #endif // DataFormats_ParticleFlowReco_interface_alpaka_PFRecHitDeviceCollection_h diff --git a/DataFormats/ParticleFlowReco/interface/alpaka/PFRecHitFractionDeviceCollection.h b/DataFormats/ParticleFlowReco/interface/alpaka/PFRecHitFractionDeviceCollection.h new file mode 100644 index 0000000000000..f59631c7ff229 --- /dev/null +++ b/DataFormats/ParticleFlowReco/interface/alpaka/PFRecHitFractionDeviceCollection.h @@ -0,0 +1,18 @@ +#ifndef DataFormats_ParticleFlowReco_interface_alpaka_PFRecHitFractionDeviceCollection_h +#define DataFormats_ParticleFlowReco_interface_alpaka_PFRecHitFractionDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/ParticleFlowReco/interface/PFRecHitFractionSoA.h" +#include "DataFormats/ParticleFlowReco/interface/PFRecHitFractionHostCollection.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::reco { + + using ::reco::PFRecHitFractionHostCollection; + + using PFRecHitFractionDeviceCollection = PortableCollection<::reco::PFRecHitFractionSoA>; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::reco + +// check that the portable device collection for the host device is the same as the portable host collection +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(reco::PFRecHitFractionDeviceCollection, reco::PFRecHitFractionHostCollection); + +#endif // DataFormats_ParticleFlowReco_interface_alpaka_PFRecHitFractionDeviceCollection_h diff --git a/DataFormats/ParticleFlowReco/src/alpaka/classes_cuda.h b/DataFormats/ParticleFlowReco/src/alpaka/classes_cuda.h index dfb7c13f09d75..74c3591453e22 100644 --- a/DataFormats/ParticleFlowReco/src/alpaka/classes_cuda.h +++ b/DataFormats/ParticleFlowReco/src/alpaka/classes_cuda.h @@ -2,5 +2,9 @@ #include "DataFormats/Common/interface/Wrapper.h" #include "DataFormats/ParticleFlowReco/interface/CaloRecHitSoA.h" #include "DataFormats/ParticleFlowReco/interface/PFRecHitSoA.h" +#include "DataFormats/ParticleFlowReco/interface/PFClusterSoA.h" +#include "DataFormats/ParticleFlowReco/interface/PFRecHitFractionSoA.h" #include "DataFormats/ParticleFlowReco/interface/alpaka/CaloRecHitDeviceCollection.h" #include "DataFormats/ParticleFlowReco/interface/alpaka/PFRecHitDeviceCollection.h" +#include "DataFormats/ParticleFlowReco/interface/alpaka/PFClusterDeviceCollection.h" +#include "DataFormats/ParticleFlowReco/interface/alpaka/PFRecHitFractionDeviceCollection.h" diff --git a/DataFormats/ParticleFlowReco/src/alpaka/classes_cuda_def.xml b/DataFormats/ParticleFlowReco/src/alpaka/classes_cuda_def.xml index c7ebfa6a0907a..758656fe0df20 100644 --- a/DataFormats/ParticleFlowReco/src/alpaka/classes_cuda_def.xml +++ b/DataFormats/ParticleFlowReco/src/alpaka/classes_cuda_def.xml @@ -6,5 +6,13 @@ + + + + + + + + diff --git a/DataFormats/ParticleFlowReco/src/alpaka/classes_rocm.h b/DataFormats/ParticleFlowReco/src/alpaka/classes_rocm.h index dfb7c13f09d75..74c3591453e22 100644 --- a/DataFormats/ParticleFlowReco/src/alpaka/classes_rocm.h +++ b/DataFormats/ParticleFlowReco/src/alpaka/classes_rocm.h @@ -2,5 +2,9 @@ #include "DataFormats/Common/interface/Wrapper.h" #include "DataFormats/ParticleFlowReco/interface/CaloRecHitSoA.h" #include "DataFormats/ParticleFlowReco/interface/PFRecHitSoA.h" +#include "DataFormats/ParticleFlowReco/interface/PFClusterSoA.h" +#include "DataFormats/ParticleFlowReco/interface/PFRecHitFractionSoA.h" #include "DataFormats/ParticleFlowReco/interface/alpaka/CaloRecHitDeviceCollection.h" #include "DataFormats/ParticleFlowReco/interface/alpaka/PFRecHitDeviceCollection.h" +#include "DataFormats/ParticleFlowReco/interface/alpaka/PFClusterDeviceCollection.h" +#include "DataFormats/ParticleFlowReco/interface/alpaka/PFRecHitFractionDeviceCollection.h" diff --git a/DataFormats/ParticleFlowReco/src/alpaka/classes_rocm_def.xml b/DataFormats/ParticleFlowReco/src/alpaka/classes_rocm_def.xml index 169feda6dc59f..5057d843a8b53 100644 --- a/DataFormats/ParticleFlowReco/src/alpaka/classes_rocm_def.xml +++ b/DataFormats/ParticleFlowReco/src/alpaka/classes_rocm_def.xml @@ -6,4 +6,12 @@ + + + + + + + + diff --git a/DataFormats/ParticleFlowReco/src/classes_serial.h b/DataFormats/ParticleFlowReco/src/classes_serial.h index 18e35ca251c61..cf1fb1569a577 100644 --- a/DataFormats/ParticleFlowReco/src/classes_serial.h +++ b/DataFormats/ParticleFlowReco/src/classes_serial.h @@ -3,3 +3,8 @@ #include "DataFormats/ParticleFlowReco/interface/CaloRecHitSoA.h" #include "DataFormats/ParticleFlowReco/interface/PFRecHitHostCollection.h" #include "DataFormats/ParticleFlowReco/interface/PFRecHitSoA.h" + +#include "DataFormats/ParticleFlowReco/interface/PFClusterSoA.h" +#include "DataFormats/ParticleFlowReco/interface/PFClusterHostCollection.h" +#include "DataFormats/ParticleFlowReco/interface/PFRecHitFractionSoA.h" +#include "DataFormats/ParticleFlowReco/interface/PFRecHitFractionHostCollection.h" diff --git a/DataFormats/ParticleFlowReco/src/classes_serial_def.xml b/DataFormats/ParticleFlowReco/src/classes_serial_def.xml index 9a97cc3b50c0b..1eb1e7c34c123 100644 --- a/DataFormats/ParticleFlowReco/src/classes_serial_def.xml +++ b/DataFormats/ParticleFlowReco/src/classes_serial_def.xml @@ -30,4 +30,36 @@ ]]> + + + + + + + + + + + + + + + + diff --git a/HeterogeneousCore/AlpakaInterface/interface/atomicMaxF.h b/HeterogeneousCore/AlpakaInterface/interface/atomicMaxF.h new file mode 100644 index 0000000000000..726f769f70a49 --- /dev/null +++ b/HeterogeneousCore/AlpakaInterface/interface/atomicMaxF.h @@ -0,0 +1,33 @@ +#ifndef HeterogeneousCore_AlpakaCore_interface_atomicMaxF_h +#define HeterogeneousCore_AlpakaCore_interface_atomicMaxF_h +#include + +#include "FWCore/Utilities/interface/bit_cast.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +#if defined(__CUDA_ARCH__) or defined(__HIP_DEVICE_COMPILE__) +template >> +static __device__ __forceinline__ float atomicMaxF(const TAcc& acc, float* address, float val) { + int ret = __float_as_int(*address); + while (val > __int_as_float(ret)) { + int old = ret; + if ((ret = atomicCAS((int*)address, old, __float_as_int(val))) == old) + break; + } + return __int_as_float(ret); +} +#else +template >> +ALPAKA_FN_ACC ALPAKA_FN_INLINE static float atomicMaxF(const TAcc& acc, float* address, float val) { + // CPU implementation uses edm::bit_cast + int ret = edm::bit_cast(*address); + while (val > edm::bit_cast(ret)) { + int old = ret; + if ((ret = alpaka::atomicCas(acc, (int*)address, old, edm::bit_cast(val))) == old) + break; + } + return edm::bit_cast(ret); +} +#endif // __CUDA_ARCH__ or __HIP_DEVICE_COMPILE__ + +#endif // HeterogeneousCore_AlpakaCore_interface_atomicMaxF_h diff --git a/RecoParticleFlow/PFClusterProducer/BuildFile.xml b/RecoParticleFlow/PFClusterProducer/BuildFile.xml index 06ef529ae273b..712bc52ed5539 100644 --- a/RecoParticleFlow/PFClusterProducer/BuildFile.xml +++ b/RecoParticleFlow/PFClusterProducer/BuildFile.xml @@ -18,6 +18,15 @@ + + + + + + + + + diff --git a/RecoParticleFlow/PFClusterProducer/interface/PFClusterParamsHostCollection.h b/RecoParticleFlow/PFClusterProducer/interface/PFClusterParamsHostCollection.h new file mode 100644 index 0000000000000..775adb4a8638e --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/interface/PFClusterParamsHostCollection.h @@ -0,0 +1,14 @@ +#ifndef RecoParticleFlow_PFClusterProducer_interface_PFClusterParamsHostCollection_h +#define RecoParticleFlow_PFClusterProducer_interface_PFClusterParamsHostCollection_h + +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +#include "RecoParticleFlow/PFClusterProducer/interface/PFClusterParamsSoA.h" + +namespace reco { + + using PFClusterParamsHostCollection = PortableHostCollection; + +} + +#endif diff --git a/RecoParticleFlow/PFClusterProducer/interface/PFClusterParamsSoA.h b/RecoParticleFlow/PFClusterProducer/interface/PFClusterParamsSoA.h new file mode 100644 index 0000000000000..dded4580d0e1a --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/interface/PFClusterParamsSoA.h @@ -0,0 +1,49 @@ +#ifndef RecoParticleFlow_PFClusterProducer_interface_PFRecHitHBHEParamsSoA_h +#define RecoParticleFlow_PFClusterProducer_interface_PFRecHitHBHEParamsSoA_h + +#include "DataFormats/SoATemplate/interface/SoACommon.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoAView.h" + +namespace reco { + + GENERATE_SOA_LAYOUT(PFClusterParamsSoALayout, + SOA_SCALAR(int32_t, nNeigh), + SOA_SCALAR(float, seedPt2ThresholdEB), + SOA_SCALAR(float, seedPt2ThresholdEE), + SOA_COLUMN(float, seedEThresholdEB_vec), + SOA_COLUMN(float, seedEThresholdEE_vec), + SOA_COLUMN(float, topoEThresholdEB_vec), + SOA_COLUMN(float, topoEThresholdEE_vec), + SOA_SCALAR(float, showerSigma2), + SOA_SCALAR(float, minFracToKeep), + SOA_SCALAR(float, minFracTot), + SOA_SCALAR(uint32_t, maxIterations), + SOA_SCALAR(bool, excludeOtherSeeds), + SOA_SCALAR(float, stoppingTolerance), + SOA_SCALAR(float, minFracInCalc), + SOA_SCALAR(float, minAllowedNormalization), + SOA_COLUMN(float, recHitEnergyNormInvEB_vec), + SOA_COLUMN(float, recHitEnergyNormInvEE_vec), + SOA_SCALAR(float, barrelTimeResConsts_corrTermLowE), + SOA_SCALAR(float, barrelTimeResConsts_threshLowE), + SOA_SCALAR(float, barrelTimeResConsts_noiseTerm), + SOA_SCALAR(float, barrelTimeResConsts_constantTermLowE2), + SOA_SCALAR(float, barrelTimeResConsts_noiseTermLowE), + SOA_SCALAR(float, barrelTimeResConsts_threshHighE), + SOA_SCALAR(float, barrelTimeResConsts_constantTerm2), + SOA_SCALAR(float, barrelTimeResConsts_resHighE2), + SOA_SCALAR(float, endcapTimeResConsts_corrTermLowE), + SOA_SCALAR(float, endcapTimeResConsts_threshLowE), + SOA_SCALAR(float, endcapTimeResConsts_noiseTerm), + SOA_SCALAR(float, endcapTimeResConsts_constantTermLowE2), + SOA_SCALAR(float, endcapTimeResConsts_noiseTermLowE), + SOA_SCALAR(float, endcapTimeResConsts_threshHighE), + SOA_SCALAR(float, endcapTimeResConsts_constantTerm2), + SOA_SCALAR(float, endcapTimeResConsts_resHighE2)) + + using PFClusterParamsSoA = PFClusterParamsSoALayout<>; + +} // namespace reco + +#endif diff --git a/RecoParticleFlow/PFClusterProducer/interface/PFClusteringEdgeVarsSoA.h b/RecoParticleFlow/PFClusterProducer/interface/PFClusteringEdgeVarsSoA.h new file mode 100644 index 0000000000000..72da0070624b4 --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/interface/PFClusteringEdgeVarsSoA.h @@ -0,0 +1,17 @@ +#ifndef RecoParticleFlow_PFRecHitProducer_interface_PFClusteringEdgeVarsSoA_h +#define RecoParticleFlow_PFRecHitProducer_interface_PFClusteringEdgeVarsSoA_h + +#include "DataFormats/SoATemplate/interface/SoACommon.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoAView.h" + +namespace reco { + + GENERATE_SOA_LAYOUT(PFClusteringEdgeVarsSoALayout, + SOA_COLUMN(int, pfrh_edgeIdx), // needs nRH + 1 allocation + SOA_COLUMN(int, pfrh_edgeList)) // needs nRH + maxNeighbors allocation + + using PFClusteringEdgeVarsSoA = PFClusteringEdgeVarsSoALayout<>; +} // namespace reco + +#endif diff --git a/RecoParticleFlow/PFClusterProducer/interface/PFClusteringVarsSoA.h b/RecoParticleFlow/PFClusterProducer/interface/PFClusteringVarsSoA.h new file mode 100644 index 0000000000000..1e88366543ff4 --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/interface/PFClusteringVarsSoA.h @@ -0,0 +1,31 @@ +#ifndef RecoParticleFlow_PFClusterProducer_interface_PFClusteringVarsSoA_h +#define RecoParticleFlow_PFClusterProducer_interface_PFClusteringVarsSoA_h + +#include "DataFormats/SoATemplate/interface/SoACommon.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoAView.h" + +namespace reco { + + GENERATE_SOA_LAYOUT(PFClusteringVarsSoALayout, + SOA_COLUMN(int, pfrh_topoId), + SOA_COLUMN(int, pfrh_isSeed), + SOA_COLUMN(int, pfrh_passTopoThresh), + SOA_COLUMN(int, topoSeedCount), + SOA_COLUMN(int, topoRHCount), + SOA_COLUMN(int, seedFracOffsets), + SOA_COLUMN(int, topoSeedOffsets), + SOA_COLUMN(int, topoSeedList), + SOA_SCALAR(int, pcrhFracSize), + SOA_COLUMN(int, rhCount), + SOA_SCALAR(int, nEdges), + SOA_COLUMN(int, rhcount), + SOA_SCALAR(int, nTopos), + SOA_COLUMN(int, topoIds), + SOA_SCALAR(int, nRHFracs), + SOA_COLUMN(int, rhIdxToSeedIdx)) + + using PFClusteringVarsSoA = PFClusteringVarsSoALayout<>; +} // namespace reco + +#endif diff --git a/RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusterParamsDeviceCollection.h b/RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusterParamsDeviceCollection.h new file mode 100644 index 0000000000000..228246a940101 --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusterParamsDeviceCollection.h @@ -0,0 +1,21 @@ +#ifndef RecoParticleFlow_PFClusterProducer_interface_alpaka_PFClusterParamsDeviceCollection_h +#define RecoParticleFlow_PFClusterProducer_interface_alpaka_PFClusterParamsDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +#include "RecoParticleFlow/PFClusterProducer/interface/PFClusterParamsHostCollection.h" +#include "RecoParticleFlow/PFClusterProducer/interface/PFClusterParamsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::reco { + + using ::reco::PFClusterParamsHostCollection; + + using PFClusterParamsDeviceCollection = PortableCollection<::reco::PFClusterParamsSoA>; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::reco + +// check that the portable device collection for the host device is the same as the portable host collection +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(reco::PFClusterParamsDeviceCollection, reco::PFClusterParamsHostCollection); + +#endif diff --git a/RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusteringEdgeVarsDeviceCollection.h b/RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusteringEdgeVarsDeviceCollection.h new file mode 100644 index 0000000000000..d2c8943af08d1 --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusteringEdgeVarsDeviceCollection.h @@ -0,0 +1,16 @@ +#ifndef RecoParticleFlow_PFRecHitProducer_interface_alpaka_PFClusteringEdgeVarsDevice_h +#define RecoParticleFlow_PFRecHitProducer_interface_alpaka_PFClusteringEdgeVarsDevice_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +#include "RecoParticleFlow/PFClusterProducer/interface/PFClusteringEdgeVarsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::reco { + + using PFClusteringEdgeVarsDeviceCollection = PortableCollection<::reco::PFClusteringEdgeVarsSoA>; + // needs nRH + maxNeighbors allocation + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::reco + +#endif diff --git a/RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusteringVarsDeviceCollection.h b/RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusteringVarsDeviceCollection.h new file mode 100644 index 0000000000000..ab20ccae7d7b4 --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusteringVarsDeviceCollection.h @@ -0,0 +1,15 @@ +#ifndef RecoParticleFlow_PFRecHitProducer_interface_alpaka_PFClusteringVarsDevice_h +#define RecoParticleFlow_PFRecHitProducer_interface_alpaka_PFClusteringVarsDevice_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +#include "RecoParticleFlow/PFClusterProducer/interface/PFClusteringVarsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::reco { + + using PFClusteringVarsDeviceCollection = PortableCollection<::reco::PFClusteringVarsSoA>; + // needs nRH allocation +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::reco + +#endif diff --git a/RecoParticleFlow/PFClusterProducer/plugins/BuildFile.xml b/RecoParticleFlow/PFClusterProducer/plugins/BuildFile.xml index 48efc1b5ef274..220bee86bde6e 100644 --- a/RecoParticleFlow/PFClusterProducer/plugins/BuildFile.xml +++ b/RecoParticleFlow/PFClusterProducer/plugins/BuildFile.xml @@ -2,6 +2,7 @@ + @@ -11,9 +12,11 @@ + + @@ -38,3 +41,23 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/RecoParticleFlow/PFClusterProducer/plugins/LegacyPFClusterProducer.cc b/RecoParticleFlow/PFClusterProducer/plugins/LegacyPFClusterProducer.cc new file mode 100644 index 0000000000000..36204501d432d --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/plugins/LegacyPFClusterProducer.cc @@ -0,0 +1,237 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/Utilities/interface/StreamID.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Utilities/interface/EDPutToken.h" + +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/DetId/interface/DetId.h" +#include "DataFormats/ParticleFlowReco/interface/PFRecHitHostCollection.h" +#include "DataFormats/ParticleFlowReco/interface/PFClusterHostCollection.h" +#include "DataFormats/ParticleFlowReco/interface/PFRecHitFractionHostCollection.h" +#include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h" +#include "RecoParticleFlow/PFClusterProducer/interface/PFCPositionCalculatorBase.h" +#include "RecoParticleFlow/PFClusterProducer/interface/PFClusterParamsHostCollection.h" + +class LegacyPFClusterProducer : public edm::stream::EDProducer<> { +public: + LegacyPFClusterProducer(edm::ParameterSet const& config) + : pfClusterSoAToken_(consumes(config.getParameter("src"))), + pfRecHitFractionSoAToken_(consumes(config.getParameter("src"))), + InputPFRecHitSoA_Token_{consumes(config.getParameter("PFRecHitsLabelIn"))}, + pfClusParamsToken_(esConsumes(config.getParameter("pfClusterParams"))), + legacyPfClustersToken_(produces()), + recHitsLabel_(consumes(config.getParameter("recHitsSource"))) { + edm::ConsumesCollector cc = consumesCollector(); + + //setup pf cluster builder if requested + const edm::ParameterSet& pfcConf = config.getParameterSet("pfClusterBuilder"); + if (!pfcConf.empty()) { + if (pfcConf.exists("positionCalc")) { + const edm::ParameterSet& acConf = pfcConf.getParameterSet("positionCalc"); + const std::string& algoac = acConf.getParameter("algoName"); + positionCalc_ = PFCPositionCalculatorFactory::get()->create(algoac, acConf, cc); + } + + if (pfcConf.exists("allCellsPositionCalc")) { + const edm::ParameterSet& acConf = pfcConf.getParameterSet("allCellsPositionCalc"); + const std::string& algoac = acConf.getParameter("algoName"); + allCellsPositionCalc_ = PFCPositionCalculatorFactory::get()->create(algoac, acConf, cc); + } + } + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("src"); + desc.add("PFRecHitsLabelIn"); + desc.add("pfClusterParams"); + desc.add("recHitsSource"); + { + edm::ParameterSetDescription pfClusterBuilder; + pfClusterBuilder.add("maxIterations", 5); + pfClusterBuilder.add("minFracTot", 1e-20); + pfClusterBuilder.add("minFractionToKeep", 1e-7); + pfClusterBuilder.add("excludeOtherSeeds", true); + pfClusterBuilder.add("showerSigma", 10.); + pfClusterBuilder.add("stoppingTolerance", 1e-8); + pfClusterBuilder.add("timeSigmaEB", 10.); + pfClusterBuilder.add("timeSigmaEE", 10.); + pfClusterBuilder.add("maxNSigmaTime", 10.); + pfClusterBuilder.add("minChi2Prob", 0.); + pfClusterBuilder.add("clusterTimeResFromSeed", false); + pfClusterBuilder.add("algoName", ""); + { + edm::ParameterSetDescription validator; + validator.add("detector", ""); + validator.add>("depths", {}); + validator.add>("recHitEnergyNorm", {}); + std::vector vDefaults(2); + vDefaults[0].addParameter("detector", "HCAL_BARREL1"); + vDefaults[0].addParameter>("depths", {1, 2, 3, 4}); + vDefaults[0].addParameter>("recHitEnergyNorm", {0.1, 0.2, 0.3, 0.3}); + vDefaults[1].addParameter("detector", "HCAL_ENDCAP"); + vDefaults[1].addParameter>("depths", {1, 2, 3, 4, 5, 6, 7}); + vDefaults[1].addParameter>("recHitEnergyNorm", {0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2}); + pfClusterBuilder.addVPSet("recHitEnergyNorms", validator, vDefaults); + } + { + edm::ParameterSetDescription bar; + bar.add("algoName", "Basic2DGenericPFlowPositionCalc"); + bar.add("minFractionInCalc", 1e-9); + bar.add("posCalcNCrystals", 5); + { + edm::ParameterSetDescription validator; + validator.add("detector", ""); + validator.add>("depths", {}); + validator.add>("logWeightDenominator", {}); + std::vector vDefaults(2); + vDefaults[0].addParameter("detector", "HCAL_BARREL1"); + vDefaults[0].addParameter>("depths", {1, 2, 3, 4}); + vDefaults[0].addParameter>("logWeightDenominator", {0.1, 0.2, 0.3, 0.3}); + vDefaults[1].addParameter("detector", "HCAL_ENDCAP"); + vDefaults[1].addParameter>("depths", {1, 2, 3, 4, 5, 6, 7}); + vDefaults[1].addParameter>("logWeightDenominator", {0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2}); + bar.addVPSet("logWeightDenominatorByDetector", validator, vDefaults); + } + bar.add("minAllowedNormalization", 1e-9); + pfClusterBuilder.add("positionCalc", bar); + } + { + edm::ParameterSetDescription bar; + bar.add("algoName", "Basic2DGenericPFlowPositionCalc"); + bar.add("minFractionInCalc", 1e-9); + bar.add("posCalcNCrystals", -1); + { + edm::ParameterSetDescription validator; + validator.add("detector", ""); + validator.add>("depths", {}); + validator.add>("logWeightDenominator", {}); + std::vector vDefaults(2); + vDefaults[0].addParameter("detector", "HCAL_BARREL1"); + vDefaults[0].addParameter>("depths", {1, 2, 3, 4}); + vDefaults[0].addParameter>("logWeightDenominator", {0.1, 0.2, 0.3, 0.3}); + vDefaults[1].addParameter("detector", "HCAL_ENDCAP"); + vDefaults[1].addParameter>("depths", {1, 2, 3, 4, 5, 6, 7}); + vDefaults[1].addParameter>("logWeightDenominator", {0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2}); + bar.addVPSet("logWeightDenominatorByDetector", validator, vDefaults); + } + bar.add("minAllowedNormalization", 1e-9); + pfClusterBuilder.add("allCellsPositionCalc", bar); + } + { + edm::ParameterSetDescription bar; + bar.add("corrTermLowE", 0.); + bar.add("threshLowE", 6.); + bar.add("noiseTerm", 21.86); + bar.add("constantTermLowE", 4.24); + bar.add("noiseTermLowE", 8.); + bar.add("threshHighE", 15.); + bar.add("constantTerm", 2.82); + pfClusterBuilder.add("timeResolutionCalcBarrel", bar); + } + { + edm::ParameterSetDescription bar; + bar.add("corrTermLowE", 0.); + bar.add("threshLowE", 6.); + bar.add("noiseTerm", 21.86); + bar.add("constantTermLowE", 4.24); + bar.add("noiseTermLowE", 8.); + bar.add("threshHighE", 15.); + bar.add("constantTerm", 2.82); + pfClusterBuilder.add("timeResolutionCalcEndcap", bar); + } + { + edm::ParameterSetDescription bar; + pfClusterBuilder.add("positionReCalc", bar); + } + { + edm::ParameterSetDescription bar; + pfClusterBuilder.add("energyCorrector", bar); + } + desc.add("pfClusterBuilder", pfClusterBuilder); + } + descriptions.addWithDefaultLabel(desc); + } + +private: + void produce(edm::Event&, const edm::EventSetup&) override; + const edm::EDGetTokenT pfClusterSoAToken_; + const edm::EDGetTokenT pfRecHitFractionSoAToken_; + const edm::EDGetTokenT InputPFRecHitSoA_Token_; + const edm::ESGetToken pfClusParamsToken_; + const edm::EDPutTokenT legacyPfClustersToken_; + const edm::EDGetTokenT recHitsLabel_; + // the actual algorithm + std::unique_ptr positionCalc_; + std::unique_ptr allCellsPositionCalc_; +}; + +void LegacyPFClusterProducer::produce(edm::Event& event, const edm::EventSetup& setup) { + const reco::PFRecHitHostCollection& pfRecHits = event.get(InputPFRecHitSoA_Token_); + + auto const& pfClusterSoA = event.get(pfClusterSoAToken_).const_view(); + auto const& pfRecHitFractionSoA = event.get(pfRecHitFractionSoAToken_).const_view(); + + int nRH = pfRecHits.view().size(); + reco::PFClusterCollection out; + out.reserve(nRH); + + auto const rechitsHandle = event.getHandle(recHitsLabel_); + + // Build PFClusters in legacy format + std::vector nTopoSeeds(nRH, 0); + + for (int i = 0; i < pfClusterSoA.nSeeds(); i++) { + nTopoSeeds[pfClusterSoA[i].topoId()]++; + } + + // Looping over SoA PFClusters to produce legacy PFCluster collection + for (int i = 0; i < pfClusterSoA.nSeeds(); i++) { + unsigned int n = pfClusterSoA[i].seedRHIdx(); + reco::PFCluster temp; + temp.setSeed((*rechitsHandle)[n].detId()); // Pulling the detId of this PFRecHit from the legacy format input + int offset = pfClusterSoA[i].rhfracOffset(); + for (int k = offset; k < (offset + pfClusterSoA[i].rhfracSize()) && k >= 0; + k++) { // Looping over PFRecHits in the same topo cluster + if (pfRecHitFractionSoA[k].pfrhIdx() < nRH && pfRecHitFractionSoA[k].pfrhIdx() > -1 && + pfRecHitFractionSoA[k].frac() > 0.0) { + const reco::PFRecHitRef& refhit = reco::PFRecHitRef(rechitsHandle, pfRecHitFractionSoA[k].pfrhIdx()); + temp.addRecHitFraction(reco::PFRecHitFraction(refhit, pfRecHitFractionSoA[k].frac())); + } + } + + // Now PFRecHitFraction of this PFCluster is set. Now compute calculateAndSetPosition (energy, position etc) + if (nTopoSeeds[pfClusterSoA[i].topoId()] == 1 && allCellsPositionCalc_) { + allCellsPositionCalc_->calculateAndSetPosition( + temp, nullptr); // temporarily use nullptr until we can properly set GT thresholds + } else { + positionCalc_->calculateAndSetPosition( + temp, nullptr); // temporarily use nullptr until we can properly set GT thresholds + } + out.emplace_back(std::move(temp)); + } + + event.emplace(legacyPfClustersToken_, std::move(out)); +} + +#include "FWCore/Framework/interface/MakerMacros.h" +DEFINE_FWK_MODULE(LegacyPFClusterProducer); diff --git a/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterECLCC.h b/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterECLCC.h new file mode 100644 index 0000000000000..b1fc0a35f4396 --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterECLCC.h @@ -0,0 +1,174 @@ +#ifndef RecoParticleFlow_PFClusterProducer_plugins_alpaka_PFClusterECLCC_h +#define RecoParticleFlow_PFClusterProducer_plugins_alpaka_PFClusterECLCC_h + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusteringVarsDeviceCollection.h" +#include "RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusteringEdgeVarsDeviceCollection.h" + +// The following comment block is required in using the ECL-CC algorithm for topological clustering + +/* + ECL-CC code: ECL-CC is a connected components graph algorithm. The CUDA + implementation thereof is quite fast. It operates on graphs stored in + binary CSR format. + + Copyright (c) 2017-2020, Texas State University. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Texas State University nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL TEXAS STATE UNIVERSITY BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + Authors: Jayadharini Jaiganesh and Martin Burtscher + + URL: The latest version of this code is available at + https://userweb.cs.txstate.edu/~burtscher/research/ECL-CC/. + + Publication: This work is described in detail in the following paper. + Jayadharini Jaiganesh and Martin Burtscher. A High-Performance Connected + Components Implementation for GPUs. Proceedings of the 2018 ACM International + Symposium on High-Performance Parallel and Distributed Computing, pp. 92-104. + June 2018. +*/ + +/* + The code is modified for the specific use-case of generating topological clusters + for PFClustering. It is adapted to work with the Alpaka portability library. The + kernels for processing vertices at warp and block level granularity have been + removed since the degree of vertices in our inputs is only ever 8; the number of + neighbors. +*/ + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + /* intermediate pointer jumping */ + + ALPAKA_FN_ACC inline int representative(const int idx, + reco::PFClusteringVarsDeviceCollection::View pfClusteringVars) { + int curr = pfClusteringVars[idx].pfrh_topoId(); + if (curr != idx) { + int next, prev = idx; + while (curr > (next = pfClusteringVars[curr].pfrh_topoId())) { + pfClusteringVars[prev].pfrh_topoId() = next; + prev = curr; + curr = next; + } + } + return curr; + } + + // Initial step of ECL-CC. Uses ID of first neighbour in edgeList with a smaller ID + class ECLCCInit { + public: + template >> + ALPAKA_FN_ACC void operator()(const TAcc& acc, + reco::PFRecHitHostCollection::ConstView pfRecHits, + reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, + reco::PFClusteringEdgeVarsDeviceCollection::View pfClusteringEdgeVars) const { + const int nRH = pfRecHits.size(); + for (int v : cms::alpakatools::elements_with_stride(acc, nRH)) { + const int beg = pfClusteringEdgeVars[v].pfrh_edgeIdx(); + const int end = pfClusteringEdgeVars[v + 1].pfrh_edgeIdx(); + int m = v; + int i = beg; + while ((m == v) && (i < end)) { + m = std::min(m, pfClusteringEdgeVars[i].pfrh_edgeList()); + i++; + } + pfClusteringVars[v].pfrh_topoId() = m; + } + } + }; + + // First edge processing kernel of ECL-CC + // Processes vertices + class ECLCCCompute1 { + public: + template >> + ALPAKA_FN_ACC void operator()(const TAcc& acc, + reco::PFRecHitHostCollection::ConstView pfRecHits, + reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, + reco::PFClusteringEdgeVarsDeviceCollection::View pfClusteringEdgeVars) const { + const int nRH = pfRecHits.size(); + + for (int v : cms::alpakatools::elements_with_stride(acc, nRH)) { + const int vstat = pfClusteringVars[v].pfrh_topoId(); + if (v != vstat) { + const int beg = pfClusteringEdgeVars[v].pfrh_edgeIdx(); + const int end = pfClusteringEdgeVars[v + 1].pfrh_edgeIdx(); + int vstat = representative(v, pfClusteringVars); + for (int i = beg; i < end; i++) { + const int nli = pfClusteringEdgeVars[i].pfrh_edgeList(); + if (v > nli) { + int ostat = representative(nli, pfClusteringVars); + bool repeat; + do { + repeat = false; + if (vstat != ostat) { + int ret; + if (vstat < ostat) { + if ((ret = alpaka::atomicCas(acc, &pfClusteringVars[ostat].pfrh_topoId(), ostat, vstat)) != ostat) { + ostat = ret; + repeat = true; + } + } else { + if ((ret = alpaka::atomicCas(acc, &pfClusteringVars[vstat].pfrh_topoId(), vstat, ostat)) != vstat) { + vstat = ret; + repeat = true; + } + } + } + } while (repeat); + } + } + } + } + } + }; + + /* link all vertices to sink */ + class ECLCCFlatten { + public: + template >> + ALPAKA_FN_ACC void operator()(const TAcc& acc, + reco::PFRecHitHostCollection::ConstView pfRecHits, + reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, + reco::PFClusteringEdgeVarsDeviceCollection::View pfClusteringEdgeVars) const { + const int nRH = pfRecHits.size(); + + for (int v : cms::alpakatools::elements_with_stride(acc, nRH)) { + int next, vstat = pfClusteringVars[v].pfrh_topoId(); + const int old = vstat; + while (vstat > (next = pfClusteringVars[vstat].pfrh_topoId())) { + vstat = next; + } + if (old != vstat) + pfClusteringVars[v].pfrh_topoId() = vstat; + } + } + }; + + // ECL-CC ends + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif diff --git a/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterParamsESProducer.cc b/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterParamsESProducer.cc new file mode 100644 index 0000000000000..8a5da486752ce --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterParamsESProducer.cc @@ -0,0 +1,249 @@ +#include "FWCore/Framework/interface/EventSetupRecordIntervalFinder.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/Exception.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h" +#include "RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusterParamsDeviceCollection.h" + +#include + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class PFClusterParamsESProducer : public ESProducer { + public: + PFClusterParamsESProducer(edm::ParameterSet const& iConfig) : ESProducer(iConfig) { + constexpr static uint32_t kMaxDepth_barrel = 4; + constexpr static uint32_t kMaxDepth_endcap = 7; + product = std::make_shared(std::max(kMaxDepth_barrel, kMaxDepth_endcap), + cms::alpakatools::host()); + auto view = product->view(); + + // seedFinder + auto const& sfConf = iConfig.getParameterSet("seedFinder"); + view.nNeigh() = sfConf.getParameter("nNeighbours"); + auto const& seedFinderConfs = sfConf.getParameterSetVector("thresholdsByDetector"); + for (auto const& pset : seedFinderConfs) { + auto const& det = pset.getParameter("detector"); + auto seedPt2Threshold = std::pow(pset.getParameter("seedingThresholdPt"), 2.); + auto const& thresholds = pset.getParameter>("seedingThreshold"); + if (det == "HCAL_BARREL1") { + if (thresholds.size() != kMaxDepth_barrel) + throw cms::Exception("Configuration") << "Invalid size (" << thresholds.size() << " != " << kMaxDepth_barrel + << ") for \"\" vector of det = \"" << det << "\""; + view.seedPt2ThresholdEB() = seedPt2Threshold; + for (size_t idx = 0; idx < thresholds.size(); ++idx) { + view.seedEThresholdEB_vec()[idx] = thresholds[idx]; + } + } else if (det == "HCAL_ENDCAP") { + if (thresholds.size() != kMaxDepth_endcap) + throw cms::Exception("Configuration") << "Invalid size (" << thresholds.size() << " != " << kMaxDepth_endcap + << ") for \"\" vector of det = \"" << det << "\""; + view.seedPt2ThresholdEE() = seedPt2Threshold; + for (size_t idx = 0; idx < thresholds.size(); ++idx) { + view.seedEThresholdEE_vec()[idx] = thresholds[idx]; + } + } else { + throw cms::Exception("Configuration") << "Unknown detector when parsing seedFinder: " << det; + } + } + + // initialClusteringStep + auto const& initConf = iConfig.getParameterSet("initialClusteringStep"); + auto const& topoThresholdConf = initConf.getParameterSetVector("thresholdsByDetector"); + for (auto const& pset : topoThresholdConf) { + auto const& det = pset.getParameter("detector"); + auto const& thresholds = pset.getParameter>("gatheringThreshold"); + if (det == "HCAL_BARREL1") { + if (thresholds.size() != kMaxDepth_barrel) + throw cms::Exception("Configuration") << "Invalid size (" << thresholds.size() << " != " << kMaxDepth_barrel + << ") for \"\" vector of det = \"" << det << "\""; + for (size_t idx = 0; idx < thresholds.size(); ++idx) { + view.topoEThresholdEB_vec()[idx] = thresholds[idx]; + } + } else if (det == "HCAL_ENDCAP") { + if (thresholds.size() != kMaxDepth_endcap) + throw cms::Exception("Configuration") << "Invalid size (" << thresholds.size() << " != " << kMaxDepth_endcap + << ") for \"\" vector of det = \"" << det << "\""; + for (size_t idx = 0; idx < thresholds.size(); ++idx) { + view.topoEThresholdEE_vec()[idx] = thresholds[idx]; + } + } else { + throw cms::Exception("Configuration") << "Unknown detector when parsing initClusteringStep: " << det; + } + } + + // pfClusterBuilder + auto const& pfClusterPSet = iConfig.getParameterSet("pfClusterBuilder"); + view.showerSigma2() = std::pow(pfClusterPSet.getParameter("showerSigma"), 2.); + view.minFracToKeep() = pfClusterPSet.getParameter("minFractionToKeep"); + view.minFracTot() = pfClusterPSet.getParameter("minFracTot"); + view.maxIterations() = pfClusterPSet.getParameter("maxIterations"); + view.excludeOtherSeeds() = pfClusterPSet.getParameter("excludeOtherSeeds"); + view.stoppingTolerance() = pfClusterPSet.getParameter("stoppingTolerance"); + auto const& pcPSet = pfClusterPSet.getParameterSet("positionCalc"); + view.minFracInCalc() = pcPSet.getParameter("minFractionInCalc"); + view.minAllowedNormalization() = pcPSet.getParameter("minAllowedNormalization"); + + auto const& recHitEnergyNormConf = pfClusterPSet.getParameterSetVector("recHitEnergyNorms"); + for (auto const& pset : recHitEnergyNormConf) { + auto const& recHitNorms = pset.getParameter>("recHitEnergyNorm"); + auto const& det = pset.getParameter("detector"); + if (det == "HCAL_BARREL1") { + if (recHitNorms.size() != kMaxDepth_barrel) + throw cms::Exception("Configuration") + << "Invalid size (" << recHitNorms.size() << " != " << kMaxDepth_barrel + << ") for \"\" vector of det = \"" << det << "\""; + for (size_t idx = 0; idx < recHitNorms.size(); ++idx) { + view.recHitEnergyNormInvEB_vec()[idx] = 1. / recHitNorms[idx]; + } + } else if (det == "HCAL_ENDCAP") { + if (recHitNorms.size() != kMaxDepth_endcap) + throw cms::Exception("Configuration") + << "Invalid size (" << recHitNorms.size() << " != " << kMaxDepth_endcap + << ") for \"\" vector of det = \"" << det << "\""; + for (size_t idx = 0; idx < recHitNorms.size(); ++idx) { + view.recHitEnergyNormInvEE_vec()[idx] = 1. / recHitNorms[idx]; + } + } else { + throw cms::Exception("Configuration") << "Unknown detector when parsing recHitEnergyNorms: " << det; + } + } + + auto const& barrelTimeResConf = pfClusterPSet.getParameterSet("timeResolutionCalcBarrel"); + view.barrelTimeResConsts_corrTermLowE() = barrelTimeResConf.getParameter("corrTermLowE"); + view.barrelTimeResConsts_threshLowE() = barrelTimeResConf.getParameter("threshLowE"); + view.barrelTimeResConsts_noiseTerm() = barrelTimeResConf.getParameter("noiseTerm"); + view.barrelTimeResConsts_constantTermLowE2() = + std::pow(barrelTimeResConf.getParameter("constantTermLowE"), 2.); + view.barrelTimeResConsts_noiseTermLowE() = barrelTimeResConf.getParameter("noiseTermLowE"); + view.barrelTimeResConsts_threshHighE() = barrelTimeResConf.getParameter("threshHighE"); + view.barrelTimeResConsts_constantTerm2() = std::pow(barrelTimeResConf.getParameter("constantTerm"), 2.); + view.barrelTimeResConsts_resHighE2() = + std::pow(view.barrelTimeResConsts_noiseTerm() / view.barrelTimeResConsts_threshHighE(), 2.) + + view.barrelTimeResConsts_constantTerm2(); + + auto const& endcapTimeResConf = pfClusterPSet.getParameterSet("timeResolutionCalcEndcap"); + view.endcapTimeResConsts_corrTermLowE() = endcapTimeResConf.getParameter("corrTermLowE"); + view.endcapTimeResConsts_threshLowE() = endcapTimeResConf.getParameter("threshLowE"); + view.endcapTimeResConsts_noiseTerm() = endcapTimeResConf.getParameter("noiseTerm"); + view.endcapTimeResConsts_constantTermLowE2() = + std::pow(endcapTimeResConf.getParameter("constantTermLowE"), 2.); + view.endcapTimeResConsts_noiseTermLowE() = endcapTimeResConf.getParameter("noiseTermLowE"); + view.endcapTimeResConsts_threshHighE() = endcapTimeResConf.getParameter("threshHighE"); + view.endcapTimeResConsts_constantTerm2() = std::pow(endcapTimeResConf.getParameter("constantTerm"), 2.); + view.endcapTimeResConsts_resHighE2() = + std::pow(view.endcapTimeResConsts_noiseTerm() / view.endcapTimeResConsts_threshHighE(), 2.) + + view.endcapTimeResConsts_constantTerm2(); + + setWhatProduced(this); + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription psetDesc; + { + auto const psetName = "seedFinder"; + edm::ParameterSetDescription foo; + foo.add("nNeighbours", 4); + { + edm::ParameterSetDescription validator; + validator.add("detector", ""); + validator.add>("seedingThreshold", {}); + validator.add("seedingThresholdPt", 0.); + std::vector vDefaults(2); + vDefaults[0].addParameter("detector", "HCAL_BARREL1"); + vDefaults[0].addParameter>("seedingThreshold", {0.125, 0.25, 0.35, 0.35}); + vDefaults[0].addParameter("seedingThresholdPt", 0.); + vDefaults[1].addParameter("detector", "HCAL_ENDCAP"); + vDefaults[1].addParameter>("seedingThreshold", + {0.1375, 0.275, 0.275, 0.275, 0.275, 0.275, 0.275}); + vDefaults[1].addParameter("seedingThresholdPt", 0.); + foo.addVPSet("thresholdsByDetector", validator, vDefaults); + } + psetDesc.add(psetName, foo); + } + { + auto const psetName = "initialClusteringStep"; + edm::ParameterSetDescription foo; + { + edm::ParameterSetDescription validator; + validator.add("detector", ""); + validator.add>("gatheringThreshold", {}); + std::vector vDefaults(2); + vDefaults[0].addParameter("detector", "HCAL_BARREL1"); + vDefaults[0].addParameter>("gatheringThreshold", {0.1, 0.2, 0.3, 0.3}); + vDefaults[1].addParameter("detector", "HCAL_ENDCAP"); + vDefaults[1].addParameter>("gatheringThreshold", {0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2}); + foo.addVPSet("thresholdsByDetector", validator, vDefaults); + } + psetDesc.add(psetName, foo); + } + { + auto const psetName = "pfClusterBuilder"; + edm::ParameterSetDescription foo; + foo.add("maxIterations", 50); + foo.add("minFracTot", 1e-20); + foo.add("minFractionToKeep", 1e-7); + foo.add("excludeOtherSeeds", true); + foo.add("showerSigma", 10.); + foo.add("stoppingTolerance", 1e-8); + { + edm::ParameterSetDescription validator; + validator.add("detector", ""); + validator.add>("recHitEnergyNorm", {}); + std::vector vDefaults(2); + vDefaults[0].addParameter("detector", "HCAL_BARREL1"); + vDefaults[0].addParameter>("recHitEnergyNorm", {0.1, 0.2, 0.3, 0.3}); + vDefaults[1].addParameter("detector", "HCAL_ENDCAP"); + vDefaults[1].addParameter>("recHitEnergyNorm", {0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2}); + foo.addVPSet("recHitEnergyNorms", validator, vDefaults); + } + { + edm::ParameterSetDescription bar; + bar.add("minFractionInCalc", 1e-9); + bar.add("minAllowedNormalization", 1e-9); + foo.add("positionCalc", bar); + } + { + edm::ParameterSetDescription bar; + bar.add("corrTermLowE", 0.); + bar.add("threshLowE", 6.); + bar.add("noiseTerm", 21.86); + bar.add("constantTermLowE", 4.24); + bar.add("noiseTermLowE", 8.); + bar.add("threshHighE", 15.); + bar.add("constantTerm", 2.82); + foo.add("timeResolutionCalcBarrel", bar); + } + { + edm::ParameterSetDescription bar; + bar.add("corrTermLowE", 0.); + bar.add("threshLowE", 6.); + bar.add("noiseTerm", 21.86); + bar.add("constantTermLowE", 4.24); + bar.add("noiseTermLowE", 8.); + bar.add("threshHighE", 15.); + bar.add("constantTerm", 2.82); + foo.add("timeResolutionCalcEndcap", bar); + } + psetDesc.add(psetName, foo); + } + + descriptions.addWithDefaultLabel(psetDesc); + } + + std::shared_ptr produce(JobConfigurationGPURecord const& iRecord) { + return product; + } + + private: + std::shared_ptr product; + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(PFClusterParamsESProducer); diff --git a/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducer.cc b/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducer.cc new file mode 100644 index 0000000000000..e4291f8e705c8 --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducer.cc @@ -0,0 +1,69 @@ +#include +#include +#include "DataFormats/ParticleFlowReco/interface/PFRecHitHostCollection.h" +#include "FWCore/Utilities/interface/StreamID.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/EDProducer.h" +#include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h" +#include "RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusterParamsDeviceCollection.h" +#include "RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducerKernel.h" +#include "RecoParticleFlow/PFClusterProducer/interface/PFCPositionCalculatorBase.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + class PFClusterSoAProducer : public stream::EDProducer<> { + public: + PFClusterSoAProducer(edm::ParameterSet const& config) + : pfClusParamsToken(esConsumes(config.getParameter("pfClusterParams"))), + inputPFRecHitSoA_Token_{consumes(config.getParameter("pfRecHits"))}, + outputPFClusterSoA_Token_{produces()}, + outputPFRHFractionSoA_Token_{produces()}, + synchronise_(config.getParameter("synchronise")), + pfRecHitFractionAllocation_(config.getParameter("pfRecHitFractionAllocation")) {} + + void produce(device::Event& event, device::EventSetup const& setup) override { + const reco::PFClusterParamsDeviceCollection& params = setup.getData(pfClusParamsToken); + const reco::PFRecHitHostCollection& pfRecHits = event.get(inputPFRecHitSoA_Token_); + const int nRH = pfRecHits->size(); + + reco::PFClusteringVarsDeviceCollection pfClusteringVars{nRH, event.queue()}; + reco::PFClusteringEdgeVarsDeviceCollection pfClusteringEdgeVars{(nRH * 8), event.queue()}; + reco::PFClusterDeviceCollection pfClusters{nRH, event.queue()}; + reco::PFRecHitFractionDeviceCollection pfrhFractions{nRH * pfRecHitFractionAllocation_, event.queue()}; + + PFClusterProducerKernel kernel(event.queue(), pfRecHits); + kernel.execute( + event.queue(), params, pfClusteringVars, pfClusteringEdgeVars, pfRecHits, pfClusters, pfrhFractions); + + if (synchronise_) + alpaka::wait(event.queue()); + + event.emplace(outputPFClusterSoA_Token_, std::move(pfClusters)); + event.emplace(outputPFRHFractionSoA_Token_, std::move(pfrhFractions)); + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("pfRecHits"); + desc.add("pfClusterParams"); + desc.add("synchronise"); + desc.add("pfRecHitFractionAllocation", 120); + descriptions.addWithDefaultLabel(desc); + } + + private: + const device::ESGetToken pfClusParamsToken; + const edm::EDGetTokenT inputPFRecHitSoA_Token_; + const device::EDPutToken outputPFClusterSoA_Token_; + const device::EDPutToken outputPFRHFractionSoA_Token_; + const bool synchronise_; + const int pfRecHitFractionAllocation_; + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(PFClusterSoAProducer); diff --git a/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducerKernel.dev.cc b/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducerKernel.dev.cc new file mode 100644 index 0000000000000..6b0f4dc27d88b --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducerKernel.dev.cc @@ -0,0 +1,1513 @@ +#include + +#include "FWCore/Utilities/interface/bit_cast.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/atomicMaxF.h" + +#include "DataFormats/ParticleFlowReco/interface/PFLayer.h" +#include "RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducerKernel.h" +#include "RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterECLCC.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using namespace cms::alpakatools; + + using namespace reco::pfClustering; + + static constexpr int threadsPerBlockForClustering = 512; + static constexpr uint32_t blocksForExoticClusters = 4; + + // cutoffFraction -> Is a rechit almost entirely attributed to one cluster + // cutoffDistance -> Is a rechit close enough to a cluster to be associated + // Values are from RecoParticleFlow/PFClusterProducer/plugins/Basic2DGenericPFlowClusterizer.cc + static constexpr float cutoffDistance = 100.; + static constexpr float cutoffFraction = 0.9999; + + static constexpr uint32_t kHBHalf = 1296; + static constexpr uint32_t maxTopoInput = 2 * kHBHalf; + + // Calculation of dR2 for Clustering + ALPAKA_FN_ACC ALPAKA_FN_INLINE static float dR2(Position4 pos1, Position4 pos2) { + float mag1 = sqrtf(pos1.x * pos1.x + pos1.y * pos1.y + pos1.z * pos1.z); + float cosTheta1 = mag1 > 0.0 ? pos1.z / mag1 : 1.0; + float eta1 = 0.5f * logf((1.0f + cosTheta1) / (1.0f - cosTheta1)); + float phi1 = atan2f(pos1.y, pos1.x); + + float mag2 = sqrtf(pos2.x * pos2.x + pos2.y * pos2.y + pos2.z * pos2.z); + float cosTheta2 = mag2 > 0.0 ? pos2.z / mag2 : 1.0; + float eta2 = 0.5f * logf((1.0f + cosTheta2) / (1.0f - cosTheta2)); + float phi2 = atan2f(pos2.y, pos2.x); + + float deta = eta2 - eta1; + constexpr const float fPI = M_PI; + float dphi = std::abs(std::abs(phi2 - phi1) - fPI) - fPI; + return (deta * deta + dphi * dphi); + } + + // Get index of seed + ALPAKA_FN_ACC static auto getSeedRhIdx(int* seeds, int seedNum) { return seeds[seedNum]; } + + // Get rechit fraction of a given rechit for a given seed + ALPAKA_FN_ACC static auto getRhFrac(reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, + int topoSeedBegin, + reco::PFRecHitFractionDeviceCollection::View fracView, + int seedNum, + int rhNum) { + int seedIdx = pfClusteringVars[topoSeedBegin + seedNum].topoSeedList(); + return fracView[pfClusteringVars[seedIdx].seedFracOffsets() + rhNum].frac(); + } + + // Cluster position calculation + template + ALPAKA_FN_ACC static void updateClusterPos(reco::PFClusterParamsDeviceCollection::ConstView pfClusParams, + Position4& pos4, + float frac, + int rhInd, + reco::PFRecHitDeviceCollection::ConstView pfRecHits, + float rhENormInv) { + Position4 rechitPos = Position4{pfRecHits[rhInd].x(), pfRecHits[rhInd].y(), pfRecHits[rhInd].z(), 1.0}; + const auto rh_energy = pfRecHits[rhInd].energy() * frac; + const auto norm = (frac < pfClusParams.minFracInCalc() ? 0.0f : std::max(0.0f, logf(rh_energy * rhENormInv))); + if constexpr (debug) + printf("\t\t\trechit %d: norm = %f\tfrac = %f\trh_energy = %f\tpos = (%f, %f, %f)\n", + rhInd, + norm, + frac, + rh_energy, + rechitPos.x, + rechitPos.y, + rechitPos.z); + + pos4.x += rechitPos.x * norm; + pos4.y += rechitPos.y * norm; + pos4.z += rechitPos.z * norm; + pos4.w += norm; // position_norm + } + + // Processing single seed clusters + // Device function designed to be called by all threads of a given block + template >> + ALPAKA_FN_ACC static void hcalFastCluster_singleSeed(const TAcc& acc, + reco::PFClusterParamsDeviceCollection::ConstView pfClusParams, + int topoId, // from selection + int nRHTopo, // from selection + reco::PFRecHitDeviceCollection::ConstView pfRecHits, + reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, + reco::PFClusterDeviceCollection::View clusterView, + reco::PFRecHitFractionDeviceCollection::View fracView) { + int tid = alpaka::getIdx(acc)[0u]; // thread index is rechit number + // Declaration of shared variables + int& i = alpaka::declareSharedVar(acc); + int& nRHOther = alpaka::declareSharedVar(acc); + unsigned int& iter = alpaka::declareSharedVar(acc); + float& tol = alpaka::declareSharedVar(acc); + float& clusterEnergy = alpaka::declareSharedVar(acc); + float& rhENormInv = alpaka::declareSharedVar(acc); + float& seedEnergy = alpaka::declareSharedVar(acc); + Position4& clusterPos = alpaka::declareSharedVar(acc); + Position4& prevClusterPos = alpaka::declareSharedVar(acc); + Position4& seedPos = alpaka::declareSharedVar(acc); + bool& notDone = alpaka::declareSharedVar(acc); + if (once_per_block(acc)) { + i = pfClusteringVars[pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList(); // i is the seed rechit index + nRHOther = nRHTopo - 1; // number of non-seed rechits + seedPos = Position4{pfRecHits[i].x(), pfRecHits[i].y(), pfRecHits[i].z(), 1.}; + clusterPos = seedPos; // Initial cluster position is just the seed + prevClusterPos = seedPos; + seedEnergy = pfRecHits[i].energy(); + clusterEnergy = seedEnergy; + tol = pfClusParams.stoppingTolerance(); // stopping tolerance * tolerance scaling + + if (pfRecHits[i].layer() == PFLayer::HCAL_BARREL1) + rhENormInv = pfClusParams.recHitEnergyNormInvEB_vec()[pfRecHits[i].depth() - 1]; + else if (pfRecHits[i].layer() == PFLayer::HCAL_ENDCAP) + rhENormInv = pfClusParams.recHitEnergyNormInvEE_vec()[pfRecHits[i].depth() - 1]; + else { + rhENormInv = 0.; + printf("Rechit %d has invalid layer %d!\n", i, pfRecHits[i].layer()); + } + + iter = 0; + notDone = true; + } + alpaka::syncBlockThreads(acc); // all threads call sync + + int j = -1; // j is the rechit index for this thread + int rhFracOffset = -1; + Position4 rhPos; + float rhEnergy = -1., rhPosNorm = -1.; + + if (tid < nRHOther) { + rhFracOffset = + pfClusteringVars[i].seedFracOffsets() + tid + 1; // Offset for this rechit in pcrhfrac, pcrhfracidx arrays + j = fracView[rhFracOffset].pfrhIdx(); // rechit index for this thread + rhPos = Position4{pfRecHits[j].x(), pfRecHits[j].y(), pfRecHits[j].z(), 1.}; + rhEnergy = pfRecHits[j].energy(); + rhPosNorm = fmaxf(0., logf(rhEnergy * rhENormInv)); + } + alpaka::syncBlockThreads(acc); // all threads call sync + + do { + if constexpr (debug) { + if (once_per_block(acc)) + printf("\n--- Now on iter %d for topoId %d ---\n", iter, topoId); + } + float dist2 = -1., d2 = -1., fraction = -1.; + if (tid < nRHOther) { + // Rechit distance calculation + dist2 = (clusterPos.x - rhPos.x) * (clusterPos.x - rhPos.x) + + (clusterPos.y - rhPos.y) * (clusterPos.y - rhPos.y) + + (clusterPos.z - rhPos.z) * (clusterPos.z - rhPos.z); + + d2 = dist2 / pfClusParams.showerSigma2(); + fraction = clusterEnergy * rhENormInv * expf(-0.5 * d2); + + // For single seed clusters, rechit fraction is either 1 (100%) or -1 (not included) + if (fraction > pfClusParams.minFracTot() && d2 < cutoffDistance) + fraction = 1.; + else + fraction = -1.; + fracView[rhFracOffset].frac() = fraction; + } + alpaka::syncBlockThreads(acc); // all threads call sync + + if constexpr (debug) { + if (once_per_block(acc)) + printf("Computing cluster position for topoId %d\n", topoId); + } + + if (once_per_block(acc)) { + // Reset cluster position and energy + clusterPos = seedPos; + clusterEnergy = seedEnergy; + } + alpaka::syncBlockThreads(acc); // all threads call sync + + // Recalculate cluster position and energy + if (fraction > -0.5) { + alpaka::atomicAdd(acc, &clusterEnergy, rhEnergy, alpaka::hierarchy::Threads{}); + alpaka::atomicAdd(acc, &clusterPos.x, rhPos.x * rhPosNorm, alpaka::hierarchy::Threads{}); + alpaka::atomicAdd(acc, &clusterPos.y, rhPos.y * rhPosNorm, alpaka::hierarchy::Threads{}); + alpaka::atomicAdd(acc, &clusterPos.z, rhPos.z * rhPosNorm, alpaka::hierarchy::Threads{}); + alpaka::atomicAdd(acc, &clusterPos.w, rhPosNorm, alpaka::hierarchy::Threads{}); // position_norm + } + alpaka::syncBlockThreads(acc); // all threads call sync + + if (once_per_block(acc)) { + // Normalize the seed postiion + if (clusterPos.w >= pfClusParams.minAllowedNormalization()) { + // Divide by position norm + clusterPos.x /= clusterPos.w; + clusterPos.y /= clusterPos.w; + clusterPos.z /= clusterPos.w; + + if constexpr (debug) + printf("\tPF cluster (seed %d) energy = %f\tposition = (%f, %f, %f)\n", + i, + clusterEnergy, + clusterPos.x, + clusterPos.y, + clusterPos.z); + } else { + if constexpr (debug) + printf("\tPF cluster (seed %d) position norm (%f) less than minimum (%f)\n", + i, + clusterPos.w, + pfClusParams.minAllowedNormalization()); + clusterPos.x = 0.; + clusterPos.y = 0.; + clusterPos.z = 0.; + } + float diff2 = dR2(prevClusterPos, clusterPos); + if constexpr (debug) + printf("\tPF cluster (seed %d) has diff2 = %f\n", i, diff2); + prevClusterPos = clusterPos; // Save clusterPos + + float tol2 = tol * tol; + iter++; + notDone = (diff2 > tol2) && (iter < pfClusParams.maxIterations()); + if constexpr (debug) { + if (diff2 > tol2) + printf("\tTopoId %d has diff2 = %f greater than squared tolerance %f (continuing)\n", topoId, diff2, tol2); + else if constexpr (debug) + printf("\tTopoId %d has diff2 = %f LESS than squared tolerance %f (terminating!)\n", topoId, diff2, tol2); + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + } while (notDone); // shared variable condition ensures synchronization is well defined + if (once_per_block(acc)) { // Cluster is finalized, assign cluster information to te SoA + int rhIdx = + pfClusteringVars[pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList(); // i is the seed rechit index + int seedIdx = pfClusteringVars[rhIdx].rhIdxToSeedIdx(); + clusterView[seedIdx].energy() = clusterEnergy; + clusterView[seedIdx].x() = clusterPos.x; + clusterView[seedIdx].y() = clusterPos.y; + clusterView[seedIdx].z() = clusterPos.z; + } + } + + // Processing clusters up to 100 seeds and 512 non-seed rechits using shared memory accesses + // Device function designed to be called by all threads of a given block + template >> + ALPAKA_FN_ACC static void hcalFastCluster_multiSeedParallel( + const TAcc& acc, + reco::PFClusterParamsDeviceCollection::ConstView pfClusParams, + int topoId, // from selection + int nSeeds, // from selection + int nRHTopo, // from selection + reco::PFRecHitDeviceCollection::ConstView pfRecHits, + reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, + reco::PFClusterDeviceCollection::View clusterView, + reco::PFRecHitFractionDeviceCollection::View fracView) { + int tid = alpaka::getIdx( // Thread index corresponds to a single rechit of the topo cluster + acc)[0u]; + + int& nRHNotSeed = alpaka::declareSharedVar(acc); + int& topoSeedBegin = alpaka::declareSharedVar(acc); + int& stride = alpaka::declareSharedVar(acc); + int& iter = alpaka::declareSharedVar(acc); + float& tol = alpaka::declareSharedVar(acc); + float& diff2 = alpaka::declareSharedVar(acc); + float& rhENormInv = alpaka::declareSharedVar(acc); + bool& notDone = alpaka::declareSharedVar(acc); + auto& clusterPos = alpaka::declareSharedVar(acc); + auto& prevClusterPos = alpaka::declareSharedVar(acc); + auto& clusterEnergy = alpaka::declareSharedVar(acc); + auto& rhFracSum = alpaka::declareSharedVar(acc); + auto& seeds = alpaka::declareSharedVar(acc); + auto& rechits = alpaka::declareSharedVar(acc); + + if (once_per_block(acc)) { + nRHNotSeed = nRHTopo - nSeeds + 1; // 1 + (# rechits per topoId that are NOT seeds) + topoSeedBegin = pfClusteringVars[topoId].topoSeedOffsets(); + tol = pfClusParams.stoppingTolerance() * + powf(fmaxf(1.0, nSeeds - 1), 2.0); // stopping tolerance * tolerance scaling + stride = alpaka::getWorkDiv(acc)[0u]; + iter = 0; + notDone = true; + + int i = pfClusteringVars[topoSeedBegin].topoSeedList(); + if (pfRecHits[i].layer() == PFLayer::HCAL_BARREL1) + rhENormInv = pfClusParams.recHitEnergyNormInvEB_vec()[pfRecHits[i].depth() - 1]; + else if (pfRecHits[i].layer() == PFLayer::HCAL_ENDCAP) + rhENormInv = pfClusParams.recHitEnergyNormInvEE_vec()[pfRecHits[i].depth() - 1]; + else + printf("Rechit %d has invalid layer %d!\n", i, pfRecHits[i].layer()); + } + alpaka::syncBlockThreads(acc); // all threads call sync + + if (tid < nSeeds) + seeds[tid] = pfClusteringVars[topoSeedBegin + tid].topoSeedList(); + if (tid < nRHNotSeed - 1) + rechits[tid] = + fracView[pfClusteringVars[pfClusteringVars[topoSeedBegin].topoSeedList()].seedFracOffsets() + tid + 1] + .pfrhIdx(); + + alpaka::syncBlockThreads(acc); // all threads call sync + + if constexpr (debug) { + if (once_per_block(acc)) { + printf("\n===========================================================================================\n"); + printf("Processing topo cluster %d with nSeeds = %d nRHTopo = %d and seeds (", topoId, nSeeds, nRHTopo); + for (int s = 0; s < nSeeds; s++) { + if (s != 0) + printf(", "); + printf("%d", getSeedRhIdx(seeds, s)); + } + if (nRHTopo == nSeeds) { + printf(")\n\n"); + } else { + printf(") and other rechits ("); + for (int r = 1; r < nRHNotSeed; r++) { + if (r != 1) + printf(", "); + if (r <= 0) { + printf("Invalid rhNum (%d) for get RhFracIdx!\n", r); + } + printf("%d", rechits[r - 1]); + } + printf(")\n\n"); + } + } + alpaka::syncBlockThreads(acc); // all (or none) threads call sync + } + + // Set initial cluster position (energy) to seed rechit position (energy) + if (tid < nSeeds) { + int i = getSeedRhIdx(seeds, tid); + clusterPos[tid] = Position4{pfRecHits[i].x(), pfRecHits[i].y(), pfRecHits[i].z(), 1.0}; + prevClusterPos[tid] = clusterPos[tid]; + clusterEnergy[tid] = pfRecHits[i].energy(); + for (int r = 0; r < (nRHNotSeed - 1); r++) { + fracView[pfClusteringVars[i].seedFracOffsets() + r + 1].pfrhIdx() = rechits[r]; + fracView[pfClusteringVars[i].seedFracOffsets() + r + 1].frac() = -1.; + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + + int rhThreadIdx = -1; + Position4 rhThreadPos; + if (tid < (nRHNotSeed - 1)) { + rhThreadIdx = rechits[tid]; // Index when thread represents rechit + rhThreadPos = Position4{pfRecHits[rhThreadIdx].x(), pfRecHits[rhThreadIdx].y(), pfRecHits[rhThreadIdx].z(), 1.}; + } + + // Neighbors when threadIdx represents seed + int seedThreadIdx = -1; + Neighbours4 seedNeighbors = Neighbours4{-9, -9, -9, -9}; + float seedEnergy = -1.; + Position4 seedInitClusterPos = Position4{0., 0., 0., 0.}; + if (tid < nSeeds) { + if constexpr (debug) + printf("tid: %d\n", tid); + seedThreadIdx = getSeedRhIdx(seeds, tid); + seedNeighbors = Neighbours4{pfRecHits[seedThreadIdx].neighbours()(0), + pfRecHits[seedThreadIdx].neighbours()(1), + pfRecHits[seedThreadIdx].neighbours()(2), + pfRecHits[seedThreadIdx].neighbours()(3)}; + seedEnergy = pfRecHits[seedThreadIdx].energy(); + + // Compute initial cluster position shift for seed + updateClusterPos(pfClusParams, seedInitClusterPos, 1., seedThreadIdx, pfRecHits, rhENormInv); + } + + do { + if constexpr (debug) { + if (once_per_block(acc)) + printf("\n--- Now on iter %d for topoId %d ---\n", iter, topoId); + } + + // Reset rhFracSum + rhFracSum[tid] = 0.; + if (once_per_block(acc)) + diff2 = -1; + + if (tid < (nRHNotSeed - 1)) { + for (int s = 0; s < nSeeds; s++) { + float dist2 = (clusterPos[s].x - rhThreadPos.x) * (clusterPos[s].x - rhThreadPos.x) + + (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) + + (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z); + + float d2 = dist2 / pfClusParams.showerSigma2(); + float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5 * d2); + + rhFracSum[tid] += fraction; + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + + if (tid < (nRHNotSeed - 1)) { + for (int s = 0; s < nSeeds; s++) { + int i = seeds[s]; + float dist2 = (clusterPos[s].x - rhThreadPos.x) * (clusterPos[s].x - rhThreadPos.x) + + (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) + + (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z); + + float d2 = dist2 / pfClusParams.showerSigma2(); + float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5 * d2); + + if (rhFracSum[tid] > pfClusParams.minFracTot()) { + float fracpct = fraction / rhFracSum[tid]; + if (fracpct > cutoffFraction || (d2 < cutoffDistance && fracpct > pfClusParams.minFracToKeep())) { + fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = fracpct; + } else { + fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = -1; + } + } else { + fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = -1; + } + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + + if constexpr (debug) { + if (once_per_block(acc)) + printf("Computing cluster position for topoId %d\n", topoId); + } + + // Reset cluster position and energy + if (tid < nSeeds) { + clusterPos[tid] = seedInitClusterPos; + clusterEnergy[tid] = seedEnergy; + if constexpr (debug) { + printf("Cluster %d (seed %d) has energy %f\tpos = (%f, %f, %f, %f)\n", + tid, + seeds[tid], + clusterEnergy[tid], + clusterPos[tid].x, + clusterPos[tid].y, + clusterPos[tid].z, + clusterPos[tid].w); + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + + // Recalculate position + if (tid < nSeeds) { + for (int r = 0; r < nRHNotSeed - 1; r++) { + int j = rechits[r]; + float frac = getRhFrac(pfClusteringVars, topoSeedBegin, fracView, tid, r + 1); + + if (frac > -0.5) { + clusterEnergy[tid] += frac * pfRecHits[j].energy(); + + if (nSeeds == 1 || j == seedNeighbors.x || j == seedNeighbors.y || j == seedNeighbors.z || + j == seedNeighbors.w) + updateClusterPos(pfClusParams, clusterPos[tid], frac, j, pfRecHits, rhENormInv); + } + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + + // Position normalization + if (tid < nSeeds) { + if (clusterPos[tid].w >= pfClusParams.minAllowedNormalization()) { + // Divide by position norm + clusterPos[tid].x /= clusterPos[tid].w; + clusterPos[tid].y /= clusterPos[tid].w; + clusterPos[tid].z /= clusterPos[tid].w; + + if constexpr (debug) + printf("\tCluster %d (seed %d) energy = %f\tposition = (%f, %f, %f)\n", + tid, + seedThreadIdx, + clusterEnergy[tid], + clusterPos[tid].x, + clusterPos[tid].y, + clusterPos[tid].z); + } else { + if constexpr (debug) + printf("\tCluster %d (seed %d) position norm (%f) less than minimum (%f)\n", + tid, + seedThreadIdx, + clusterPos[tid].w, + pfClusParams.minAllowedNormalization()); + clusterPos[tid].x = 0.0; + clusterPos[tid].y = 0.0; + clusterPos[tid].z = 0.0; + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + + if (tid < nSeeds) { + float delta2 = dR2(prevClusterPos[tid], clusterPos[tid]); + if constexpr (debug) + printf("\tCluster %d (seed %d) has delta2 = %f\n", tid, seeds[tid], delta2); + atomicMaxF(acc, &diff2, delta2); + prevClusterPos[tid] = clusterPos[tid]; // Save clusterPos + } + alpaka::syncBlockThreads(acc); // all threads call sync + + if (once_per_block(acc)) { + float tol2 = tol * tol; + iter++; + notDone = (diff2 > tol2) && ((unsigned int)iter < pfClusParams.maxIterations()); + if constexpr (debug) { + if (diff2 > tol2) + printf("\tTopoId %d has diff2 = %f greater than squared tolerance %f (continuing)\n", topoId, diff2, tol2); + else if constexpr (debug) + printf("\tTopoId %d has diff2 = %f LESS than squared tolerance %f (terminating!)\n", topoId, diff2, tol2); + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + } while (notDone); // shared variable condition ensures synchronization is well defined + if (once_per_block(acc)) + // Fill PFCluster-level info + if (tid < nSeeds) { + int rhIdx = pfClusteringVars[tid + pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList(); + int seedIdx = pfClusteringVars[rhIdx].rhIdxToSeedIdx(); + clusterView[seedIdx].energy() = clusterEnergy[tid]; + clusterView[seedIdx].x() = clusterPos[tid].x; + clusterView[seedIdx].y() = clusterPos[tid].y; + clusterView[seedIdx].z() = clusterPos[tid].z; + } + } + + // Process very large exotic clusters, from nSeeds > 400 and non-seeds > 1500 + // Uses global memory access + // Device function designed to be called by all threads of a given block + template >> + ALPAKA_FN_ACC static void hcalFastCluster_exotic(const TAcc& acc, + reco::PFClusterParamsDeviceCollection::ConstView pfClusParams, + int topoId, + int nSeeds, + int nRHTopo, + reco::PFRecHitDeviceCollection::ConstView pfRecHits, + reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, + reco::PFClusterDeviceCollection::View clusterView, + reco::PFRecHitFractionDeviceCollection::View fracView, + Position4* __restrict__ globalClusterPos, + Position4* __restrict__ globalPrevClusterPos, + float* __restrict__ globalClusterEnergy, + float* __restrict__ globalRhFracSum, + int* __restrict__ globalSeeds, + int* __restrict__ globalRechits) { + int& nRHNotSeed = alpaka::declareSharedVar(acc); + int& blockIdx = alpaka::declareSharedVar(acc); + int& topoSeedBegin = alpaka::declareSharedVar(acc); + int& stride = alpaka::declareSharedVar(acc); + int& iter = alpaka::declareSharedVar(acc); + float& tol = alpaka::declareSharedVar(acc); + float& diff2 = alpaka::declareSharedVar(acc); + float& rhENormInv = alpaka::declareSharedVar(acc); + bool& notDone = alpaka::declareSharedVar(acc); + + blockIdx = maxTopoInput * alpaka::getIdx(acc)[0u]; + Position4* clusterPos = globalClusterPos + blockIdx; + Position4* prevClusterPos = globalPrevClusterPos + blockIdx; + float* clusterEnergy = globalClusterEnergy + blockIdx; + float* rhFracSum = globalRhFracSum + blockIdx; + int* seeds = globalSeeds + blockIdx; + int* rechits = globalRechits + blockIdx; + + if (once_per_block(acc)) { + nRHNotSeed = nRHTopo - nSeeds + 1; // 1 + (# rechits per topoId that are NOT seeds) + topoSeedBegin = pfClusteringVars[topoId].topoSeedOffsets(); + tol = pfClusParams.stoppingTolerance() * + powf(fmaxf(1.0, nSeeds - 1), 2.0); // stopping tolerance * tolerance scaling + stride = alpaka::getWorkDiv(acc)[0u]; + iter = 0; + notDone = true; + + int i = pfClusteringVars[topoSeedBegin].topoSeedList(); + if (pfRecHits[i].layer() == PFLayer::HCAL_BARREL1) + rhENormInv = pfClusParams.recHitEnergyNormInvEB_vec()[pfRecHits[i].depth() - 1]; + else if (pfRecHits[i].layer() == PFLayer::HCAL_ENDCAP) + rhENormInv = pfClusParams.recHitEnergyNormInvEE_vec()[pfRecHits[i].depth() - 1]; + else + printf("Rechit %d has invalid layer %d!\n", i, pfRecHits[i].layer()); + } + alpaka::syncBlockThreads(acc); // all threads call sync + + for (int n = alpaka::getIdx(acc)[0u]; n < nRHTopo; n += stride) { + if (n < nSeeds) + seeds[n] = pfClusteringVars[topoSeedBegin + n].topoSeedList(); + if (n < nRHNotSeed - 1) + rechits[n] = + fracView[pfClusteringVars[pfClusteringVars[topoSeedBegin].topoSeedList()].seedFracOffsets() + n + 1] + .pfrhIdx(); + } + alpaka::syncBlockThreads(acc); // all threads call sync + + if constexpr (debug) { + if (once_per_block(acc)) { + printf("\n===========================================================================================\n"); + printf("Processing topo cluster %d with nSeeds = %d nRHTopo = %d and seeds (", topoId, nSeeds, nRHTopo); + for (int s = 0; s < nSeeds; s++) { + if (s != 0) + printf(", "); + printf("%d", getSeedRhIdx(seeds, s)); + } + if (nRHTopo == nSeeds) { + printf(")\n\n"); + } else { + printf(") and other rechits ("); + for (int r = 1; r < nRHNotSeed; r++) { + if (r != 1) + printf(", "); + if (r <= 0) { + printf("Invalid rhNum (%d) for get RhFracIdx!\n", r); + } + printf("%d", rechits[r - 1]); + } + printf(")\n\n"); + } + } + alpaka::syncBlockThreads(acc); // all (or none) threads call sync + } + + // Set initial cluster position (energy) to seed rechit position (energy) + for (int s = alpaka::getIdx(acc)[0u]; s < nSeeds; s += stride) { + int i = seeds[s]; + clusterPos[s] = Position4{pfRecHits[i].x(), pfRecHits[i].y(), pfRecHits[i].z(), 1.0}; + prevClusterPos[s] = clusterPos[s]; + clusterEnergy[s] = pfRecHits[i].energy(); + for (int r = 0; r < (nRHNotSeed - 1); r++) { + fracView[pfClusteringVars[i].seedFracOffsets() + r + 1].pfrhIdx() = rechits[r]; + fracView[pfClusteringVars[i].seedFracOffsets() + r + 1].frac() = -1.; + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + + do { + if constexpr (debug) { + if (once_per_block(acc)) + printf("\n--- Now on iter %d for topoId %d ---\n", iter, topoId); + } + + if (once_per_block(acc)) + diff2 = -1; + // Reset rhFracSum + for (int tid = alpaka::getIdx(acc)[0u]; tid < nRHNotSeed - 1; tid += stride) { + rhFracSum[tid] = 0.; + int rhThreadIdx = rechits[tid]; + Position4 rhThreadPos = + Position4{pfRecHits[rhThreadIdx].x(), pfRecHits[rhThreadIdx].y(), pfRecHits[rhThreadIdx].z(), 1.}; + for (int s = 0; s < nSeeds; s++) { + float dist2 = (clusterPos[s].x - rhThreadPos.x) * (clusterPos[s].x - rhThreadPos.x) + + (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) + + (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z); + + float d2 = dist2 / pfClusParams.showerSigma2(); + float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5 * d2); + + rhFracSum[tid] += fraction; + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + + for (int tid = alpaka::getIdx(acc)[0u]; tid < nRHNotSeed - 1; tid += stride) { + int rhThreadIdx = rechits[tid]; + Position4 rhThreadPos = + Position4{pfRecHits[rhThreadIdx].x(), pfRecHits[rhThreadIdx].y(), pfRecHits[rhThreadIdx].z(), 1.}; + for (int s = 0; s < nSeeds; s++) { + int i = seeds[s]; + float dist2 = (clusterPos[s].x - rhThreadPos.x) * (clusterPos[s].x - rhThreadPos.x) + + (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) + + (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z); + + float d2 = dist2 / pfClusParams.showerSigma2(); + float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5 * d2); + + if (rhFracSum[tid] > pfClusParams.minFracTot()) { + float fracpct = fraction / rhFracSum[tid]; + if (fracpct > cutoffFraction || (d2 < cutoffDistance && fracpct > pfClusParams.minFracToKeep())) { + fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = fracpct; + } else { + fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = -1; + } + } else { + fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = -1; + } + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + + if constexpr (debug) { + if (once_per_block(acc)) + printf("Computing cluster position for topoId %d\n", topoId); + } + + // Reset cluster position and energy + for (int s = alpaka::getIdx(acc)[0u]; s < nSeeds; s += stride) { + int seedRhIdx = getSeedRhIdx(seeds, s); + float norm = logf(pfRecHits[seedRhIdx].energy() * rhENormInv); + clusterPos[s] = Position4{ + pfRecHits[seedRhIdx].x() * norm, pfRecHits[seedRhIdx].y() * norm, pfRecHits[seedRhIdx].z() * norm, norm}; + clusterEnergy[s] = pfRecHits[seedRhIdx].energy(); + if constexpr (debug) { + printf("Cluster %d (seed %d) has energy %f\tpos = (%f, %f, %f, %f)\n", + s, + seeds[s], + clusterEnergy[s], + clusterPos[s].x, + clusterPos[s].y, + clusterPos[s].z, + clusterPos[s].w); + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + + // Recalculate position + for (int s = alpaka::getIdx(acc)[0u]; s < nSeeds; s += stride) { + int seedRhIdx = getSeedRhIdx(seeds, s); + for (int r = 0; r < nRHNotSeed - 1; r++) { + int j = rechits[r]; + float frac = getRhFrac(pfClusteringVars, topoSeedBegin, fracView, s, r + 1); + + if (frac > -0.5) { + clusterEnergy[s] += frac * pfRecHits[j].energy(); + + if (nSeeds == 1 || j == pfRecHits[seedRhIdx].neighbours()(0) || j == pfRecHits[seedRhIdx].neighbours()(1) || + j == pfRecHits[seedRhIdx].neighbours()(2) || j == pfRecHits[seedRhIdx].neighbours()(3)) + updateClusterPos(pfClusParams, clusterPos[s], frac, j, pfRecHits, rhENormInv); + } + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + + // Position normalization + for (int s = alpaka::getIdx(acc)[0u]; s < nSeeds; s += stride) { + if (clusterPos[s].w >= pfClusParams.minAllowedNormalization()) { + // Divide by position norm + clusterPos[s].x /= clusterPos[s].w; + clusterPos[s].y /= clusterPos[s].w; + clusterPos[s].z /= clusterPos[s].w; + + if constexpr (debug) + printf("\tCluster %d (seed %d) energy = %f\tposition = (%f, %f, %f)\n", + s, + seeds[s], + clusterEnergy[s], + clusterPos[s].x, + clusterPos[s].y, + clusterPos[s].z); + } else { + if constexpr (debug) + printf("\tCluster %d (seed %d) position norm (%f) less than minimum (%f)\n", + s, + seeds[s], + clusterPos[s].w, + pfClusParams.minAllowedNormalization()); + clusterPos[s].x = 0.0; + clusterPos[s].y = 0.0; + clusterPos[s].z = 0.0; + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + + for (int s = alpaka::getIdx(acc)[0u]; s < nSeeds; s += stride) { + float delta2 = dR2(prevClusterPos[s], clusterPos[s]); + if constexpr (debug) + printf("\tCluster %d (seed %d) has delta2 = %f\n", s, seeds[s], delta2); + atomicMaxF(acc, &diff2, delta2); + prevClusterPos[s] = clusterPos[s]; // Save clusterPos + } + alpaka::syncBlockThreads(acc); // all threads call sync + + if (once_per_block(acc)) { + float tol2 = tol * tol; + iter++; + notDone = (diff2 > tol2) && ((unsigned int)iter < pfClusParams.maxIterations()); + if constexpr (debug) { + if (diff2 > tol2) + printf("\tTopoId %d has diff2 = %f greater than squared tolerance %f (continuing)\n", topoId, diff2, tol2); + else if constexpr (debug) + printf("\tTopoId %d has diff2 = %f LESS than squared tolerance %f (terminating!)\n", topoId, diff2, tol2); + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + } while (notDone); // shared variable ensures synchronization is well defined + if (once_per_block(acc)) + for (int s = alpaka::getIdx(acc)[0u]; s < nSeeds; s += stride) { + int rhIdx = pfClusteringVars[s + pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList(); + int seedIdx = pfClusteringVars[rhIdx].rhIdxToSeedIdx(); + clusterView[seedIdx].energy() = pfRecHits[s].energy(); + clusterView[seedIdx].x() = pfRecHits[s].x(); + clusterView[seedIdx].y() = pfRecHits[s].y(); + clusterView[seedIdx].z() = pfRecHits[s].z(); + } + alpaka::syncBlockThreads(acc); // all threads call sync + } + + // Process clusters with up to 400 seeds and 1500 non seeds using shared memory + // Device function designed to be called by all threads of a given block + template >> + ALPAKA_FN_ACC static void hcalFastCluster_multiSeedIterative( + const TAcc& acc, + reco::PFClusterParamsDeviceCollection::ConstView pfClusParams, + int topoId, + int nSeeds, + int nRHTopo, + reco::PFRecHitDeviceCollection::ConstView pfRecHits, + reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, + reco::PFClusterDeviceCollection::View clusterView, + reco::PFRecHitFractionDeviceCollection::View fracView) { + int& nRHNotSeed = alpaka::declareSharedVar(acc); + int& topoSeedBegin = alpaka::declareSharedVar(acc); + int& stride = alpaka::declareSharedVar(acc); + int& iter = alpaka::declareSharedVar(acc); + float& tol = alpaka::declareSharedVar(acc); + float& diff2 = alpaka::declareSharedVar(acc); + float& rhENormInv = alpaka::declareSharedVar(acc); + bool& notDone = alpaka::declareSharedVar(acc); + + auto& clusterPos = alpaka::declareSharedVar(acc); + auto& prevClusterPos = alpaka::declareSharedVar(acc); + auto& clusterEnergy = alpaka::declareSharedVar(acc); + auto& rhFracSum = alpaka::declareSharedVar(acc); + auto& seeds = alpaka::declareSharedVar(acc); + auto& rechits = alpaka::declareSharedVar(acc); + + if (once_per_block(acc)) { + nRHNotSeed = nRHTopo - nSeeds + 1; // 1 + (# rechits per topoId that are NOT seeds) + topoSeedBegin = pfClusteringVars[topoId].topoSeedOffsets(); + tol = pfClusParams.stoppingTolerance() * // stopping tolerance * tolerance scaling + powf(fmaxf(1.0, nSeeds - 1), 2.0); + stride = alpaka::getWorkDiv(acc)[0u]; + iter = 0; + notDone = true; + + int i = pfClusteringVars[topoSeedBegin].topoSeedList(); + if (pfRecHits[i].layer() == PFLayer::HCAL_BARREL1) + rhENormInv = pfClusParams.recHitEnergyNormInvEB_vec()[pfRecHits[i].depth() - 1]; + else if (pfRecHits[i].layer() == PFLayer::HCAL_ENDCAP) + rhENormInv = pfClusParams.recHitEnergyNormInvEE_vec()[pfRecHits[i].depth() - 1]; + else + printf("Rechit %d has invalid layer %d!\n", i, pfRecHits[i].layer()); + } + alpaka::syncBlockThreads(acc); // all threads call sync + + for (int n = alpaka::getIdx(acc)[0u]; n < nRHTopo; n += stride) { + if (n < nSeeds) + seeds[n] = pfClusteringVars[topoSeedBegin + n].topoSeedList(); + if (n < nRHNotSeed - 1) + rechits[n] = + fracView[pfClusteringVars[pfClusteringVars[topoSeedBegin].topoSeedList()].seedFracOffsets() + n + 1] + .pfrhIdx(); + } + alpaka::syncBlockThreads(acc); // all threads call sync + + if constexpr (debug) { + if (once_per_block(acc)) { + printf("\n===========================================================================================\n"); + printf("Processing topo cluster %d with nSeeds = %d nRHTopo = %d and seeds (", topoId, nSeeds, nRHTopo); + for (int s = 0; s < nSeeds; s++) { + if (s != 0) + printf(", "); + printf("%d", getSeedRhIdx(seeds, s)); + } + if (nRHTopo == nSeeds) { + printf(")\n\n"); + } else { + printf(") and other rechits ("); + for (int r = 1; r < nRHNotSeed; r++) { + if (r != 1) + printf(", "); + if (r <= 0) { + printf("Invalid rhNum (%d) for get RhFracIdx!\n", r); + } + printf("%d", rechits[r - 1]); + } + printf(")\n\n"); + } + } + alpaka::syncBlockThreads(acc); // all (or none) threads call sync + } + + // Set initial cluster position (energy) to seed rechit position (energy) + for (int s = alpaka::getIdx(acc)[0u]; s < nSeeds; s += stride) { + int i = seeds[s]; + clusterPos[s] = Position4{pfRecHits[i].x(), pfRecHits[i].y(), pfRecHits[i].z(), 1.0}; + prevClusterPos[s] = clusterPos[s]; + clusterEnergy[s] = pfRecHits[i].energy(); + for (int r = 0; r < (nRHNotSeed - 1); r++) { + fracView[pfClusteringVars[i].seedFracOffsets() + r + 1].pfrhIdx() = rechits[r]; + fracView[pfClusteringVars[i].seedFracOffsets() + r + 1].frac() = -1.; + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + + do { + if constexpr (debug) { + if (once_per_block(acc)) + printf("\n--- Now on iter %d for topoId %d ---\n", iter, topoId); + } + + if (once_per_block(acc)) + diff2 = -1; + // Reset rhFracSum + for (int tid = alpaka::getIdx(acc)[0u]; tid < nRHNotSeed - 1; tid += stride) { + rhFracSum[tid] = 0.; + int rhThreadIdx = rechits[tid]; + Position4 rhThreadPos = + Position4{pfRecHits[rhThreadIdx].x(), pfRecHits[rhThreadIdx].y(), pfRecHits[rhThreadIdx].z(), 1.}; + for (int s = 0; s < nSeeds; s++) { + float dist2 = (clusterPos[s].x - rhThreadPos.x) * (clusterPos[s].x - rhThreadPos.x) + + (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) + + (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z); + + float d2 = dist2 / pfClusParams.showerSigma2(); + float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5 * d2); + + rhFracSum[tid] += fraction; + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + + for (int tid = alpaka::getIdx(acc)[0u]; tid < nRHNotSeed - 1; tid += stride) { + int rhThreadIdx = rechits[tid]; + Position4 rhThreadPos = + Position4{pfRecHits[rhThreadIdx].x(), pfRecHits[rhThreadIdx].y(), pfRecHits[rhThreadIdx].z(), 1.}; + for (int s = 0; s < nSeeds; s++) { + int i = seeds[s]; + float dist2 = (clusterPos[s].x - rhThreadPos.x) * (clusterPos[s].x - rhThreadPos.x) + + (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) + + (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z); + + float d2 = dist2 / pfClusParams.showerSigma2(); + float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5 * d2); + + if (rhFracSum[tid] > pfClusParams.minFracTot()) { + float fracpct = fraction / rhFracSum[tid]; + if (fracpct > cutoffFraction || (d2 < cutoffDistance && fracpct > pfClusParams.minFracToKeep())) { + fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = fracpct; + } else { + fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = -1; + } + } else { + fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = -1; + } + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + + if constexpr (debug) { + if (once_per_block(acc)) + printf("Computing cluster position for topoId %d\n", topoId); + } + + // Reset cluster position and energy + for (int s = alpaka::getIdx(acc)[0u]; s < nSeeds; s += stride) { + int seedRhIdx = getSeedRhIdx(seeds, s); + float norm = logf(pfRecHits[seedRhIdx].energy() * rhENormInv); + clusterPos[s] = Position4{ + pfRecHits[seedRhIdx].x() * norm, pfRecHits[seedRhIdx].y() * norm, pfRecHits[seedRhIdx].z() * norm, norm}; + clusterEnergy[s] = pfRecHits[seedRhIdx].energy(); + if constexpr (debug) { + printf("Cluster %d (seed %d) has energy %f\tpos = (%f, %f, %f, %f)\n", + s, + seeds[s], + clusterEnergy[s], + clusterPos[s].x, + clusterPos[s].y, + clusterPos[s].z, + clusterPos[s].w); + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + + // Recalculate position + for (int s = alpaka::getIdx(acc)[0u]; s < nSeeds; s += stride) { + int seedRhIdx = getSeedRhIdx(seeds, s); + for (int r = 0; r < nRHNotSeed - 1; r++) { + int j = rechits[r]; + float frac = getRhFrac(pfClusteringVars, topoSeedBegin, fracView, s, r + 1); + + if (frac > -0.5) { + clusterEnergy[s] += frac * pfRecHits[j].energy(); + + if (nSeeds == 1 || j == pfRecHits[seedRhIdx].neighbours()(0) || j == pfRecHits[seedRhIdx].neighbours()(1) || + j == pfRecHits[seedRhIdx].neighbours()(2) || j == pfRecHits[seedRhIdx].neighbours()(3)) + updateClusterPos(pfClusParams, clusterPos[s], frac, j, pfRecHits, rhENormInv); + } + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + + // Position normalization + for (int s = alpaka::getIdx(acc)[0u]; s < nSeeds; s += stride) { + if (clusterPos[s].w >= pfClusParams.minAllowedNormalization()) { + // Divide by position norm + clusterPos[s].x /= clusterPos[s].w; + clusterPos[s].y /= clusterPos[s].w; + clusterPos[s].z /= clusterPos[s].w; + + if constexpr (debug) + printf("\tCluster %d (seed %d) energy = %f\tposition = (%f, %f, %f)\n", + s, + seeds[s], + clusterEnergy[s], + clusterPos[s].x, + clusterPos[s].y, + clusterPos[s].z); + } else { + if constexpr (debug) + printf("\tCluster %d (seed %d) position norm (%f) less than minimum (%f)\n", + s, + seeds[s], + clusterPos[s].w, + pfClusParams.minAllowedNormalization()); + clusterPos[s].x = 0.0; + clusterPos[s].y = 0.0; + clusterPos[s].z = 0.0; + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + + for (int s = alpaka::getIdx(acc)[0u]; s < nSeeds; s += stride) { + float delta2 = dR2(prevClusterPos[s], clusterPos[s]); + if constexpr (debug) + printf("\tCluster %d (seed %d) has delta2 = %f\n", s, seeds[s], delta2); + atomicMaxF(acc, &diff2, delta2); + prevClusterPos[s] = clusterPos[s]; // Save clusterPos + } + alpaka::syncBlockThreads(acc); // all threads call sync + + if (once_per_block(acc)) { + float tol2 = tol * tol; + iter++; + notDone = (diff2 > tol2) && ((unsigned int)iter < pfClusParams.maxIterations()); + if constexpr (debug) { + if (diff2 > tol2) + printf("\tTopoId %d has diff2 = %f greater than tolerance %f (continuing)\n", topoId, diff2, tol2); + else if constexpr (debug) + printf("\tTopoId %d has diff2 = %f LESS than tolerance %f (terminating!)\n", topoId, diff2, tol2); + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + } while (notDone); // shared variable ensures synchronization is well defined + if (once_per_block(acc)) + for (int s = alpaka::getIdx(acc)[0u]; s < nSeeds; s += stride) { + int rhIdx = pfClusteringVars[s + pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList(); + int seedIdx = pfClusteringVars[rhIdx].rhIdxToSeedIdx(); + clusterView[seedIdx].energy() = pfRecHits[s].energy(); + clusterView[seedIdx].x() = pfRecHits[s].x(); + clusterView[seedIdx].y() = pfRecHits[s].y(); + clusterView[seedIdx].z() = pfRecHits[s].z(); + } + } + + // Seeding using local energy maxima + class SeedingTopoThresh { + public: + template >> + ALPAKA_FN_ACC void operator()(const TAcc& acc, + reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, + const reco::PFClusterParamsDeviceCollection::ConstView pfClusParams, + const reco::PFRecHitHostCollection::ConstView pfRecHits, + reco::PFClusterDeviceCollection::View clusterView, + reco::PFRecHitFractionDeviceCollection::View fracView, + uint32_t* __restrict__ nSeeds) const { + const int nRH = pfRecHits.size(); + + if (once_per_grid(acc)) { + clusterView.size() = nRH; + } + + for (auto i : elements_with_stride(acc, nRH)) { + // Initialize arrays + pfClusteringVars[i].pfrh_isSeed() = 0; + pfClusteringVars[i].rhCount() = 0; + pfClusteringVars[i].topoSeedCount() = 0; + pfClusteringVars[i].topoRHCount() = 0; + pfClusteringVars[i].seedFracOffsets() = -1; + pfClusteringVars[i].topoSeedOffsets() = -1; + pfClusteringVars[i].topoSeedList() = -1; + clusterView[i].seedRHIdx() = -1; + + int layer = pfRecHits[i].layer(); + int depthOffset = pfRecHits[i].depth() - 1; + float energy = pfRecHits[i].energy(); + Position3 pos = Position3{pfRecHits[i].x(), pfRecHits[i].y(), pfRecHits[i].z()}; + + // cmssdt.cern.ch/lxr/source/DataFormats/ParticleFlowReco/interface/PFRecHit.h#0108 + float pt2 = energy * energy * (pos.x * pos.x + pos.y * pos.y) / (pos.x * pos.x + pos.y * pos.y + pos.z * pos.z); + + // Seeding threshold test + if ((layer == PFLayer::HCAL_BARREL1 && energy > pfClusParams.seedEThresholdEB_vec()[depthOffset] && + pt2 > pfClusParams.seedPt2ThresholdEB()) || + (layer == PFLayer::HCAL_ENDCAP && energy > pfClusParams.seedEThresholdEE_vec()[depthOffset] && + pt2 > pfClusParams.seedPt2ThresholdEE())) { + pfClusteringVars[i].pfrh_isSeed() = 1; + for (int k = 0; k < 4; k++) { // Does this seed candidate have a higher energy than four neighbours + if (pfRecHits[i].neighbours()(k) < 0) + continue; + if (energy < pfRecHits[pfRecHits[i].neighbours()(k)].energy()) { + pfClusteringVars[i].pfrh_isSeed() = 0; + break; + } + } + if (pfClusteringVars[i].pfrh_isSeed()) + alpaka::atomicAdd(acc, nSeeds, 1u); + } + // Topo clustering threshold test + if ((layer == PFLayer::HCAL_ENDCAP && energy > pfClusParams.topoEThresholdEE_vec()[depthOffset]) || + (layer == PFLayer::HCAL_BARREL1 && energy > pfClusParams.topoEThresholdEB_vec()[depthOffset])) { + pfClusteringVars[i].pfrh_passTopoThresh() = true; + pfClusteringVars[i].pfrh_topoId() = i; + } else { + pfClusteringVars[i].pfrh_passTopoThresh() = false; + pfClusteringVars[i].pfrh_topoId() = -1; + } + } + } + }; + + // Preparation of topo inputs. Initializing topoId, egdeIdx, nEdges, edgeList + class PrepareTopoInputs { + public: + template >> + ALPAKA_FN_ACC void operator()(const TAcc& acc, + const reco::PFRecHitHostCollection::ConstView pfRecHits, + reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, + reco::PFClusteringEdgeVarsDeviceCollection::View pfClusteringEdgeVars, + uint32_t* __restrict__ nSeeds) const { + const int nRH = pfRecHits.size(); + + if (once_per_grid(acc)) { + pfClusteringVars.nEdges() = nRH * 8; + pfClusteringEdgeVars[nRH].pfrh_edgeIdx() = nRH * 8; + } + for (uint32_t i : cms::alpakatools::elements_with_stride(acc, nRH)) { + pfClusteringEdgeVars[i].pfrh_edgeIdx() = i * 8; + pfClusteringVars[i].pfrh_topoId() = 0; + for (int j = 0; j < 8; j++) { // checking if neighbours exist and assigning neighbours as edges + if (pfRecHits[i].neighbours()(j) == -1) + pfClusteringEdgeVars[i * 8 + j].pfrh_edgeList() = i; + else + pfClusteringEdgeVars[i * 8 + j].pfrh_edgeList() = pfRecHits[i].neighbours()(j); + } + } + + return; + } + }; + + // Contraction in a single block + class TopoClusterContraction { + public: + template >> + ALPAKA_FN_ACC void operator()(const TAcc& acc, + const reco::PFRecHitHostCollection::ConstView pfRecHits, + reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, + reco::PFClusterDeviceCollection::View clusterView, + uint32_t* __restrict__ nSeeds) const { + const int nRH = pfRecHits.size(); + int& totalSeedOffset = alpaka::declareSharedVar(acc); + int& totalSeedFracOffset = alpaka::declareSharedVar(acc); + + // rhCount, topoRHCount, topoSeedCount initialized earlier + if (once_per_block(acc)) { + pfClusteringVars.nTopos() = 0; + pfClusteringVars.nRHFracs() = 0; + totalSeedOffset = 0; + totalSeedFracOffset = 0; + pfClusteringVars.pcrhFracSize() = 0; + } + + alpaka::syncBlockThreads(acc); // all threads call sync + + // Now determine the number of seeds and rechits in each topo cluster [topoRHCount, topoSeedCount] + // Also get the list of topoIds (smallest rhIdx of each topo cluser) + for (int rhIdx = alpaka::getIdx(acc)[0u]; rhIdx < nRH; + rhIdx += alpaka::getWorkDiv(acc)[0u]) { + pfClusteringVars[rhIdx].rhIdxToSeedIdx() = -1; + int topoId = pfClusteringVars[rhIdx].pfrh_topoId(); + if (topoId > -1) { + // Valid topo cluster + alpaka::atomicAdd(acc, &pfClusteringVars[topoId].topoRHCount(), 1); + // Valid topoId not counted yet + if (topoId == rhIdx) { // For every topo cluster, there is one rechit that meets this condition. + int topoIdx = alpaka::atomicAdd(acc, &pfClusteringVars.nTopos(), 1); + pfClusteringVars[topoIdx].topoIds() = + topoId; // topoId: the smallest index of rechits that belong to a topo cluster. + } + // This is a cluster seed + if (pfClusteringVars[rhIdx].pfrh_isSeed()) { // # of seeds in this topo cluster + alpaka::atomicAdd(acc, &pfClusteringVars[topoId].topoSeedCount(), 1); + } + } + } + + alpaka::syncBlockThreads(acc); // all threads call sync + + // Determine offsets for topo ID seed array [topoSeedOffsets] + for (int topoId = alpaka::getIdx(acc)[0u]; topoId < nRH; + topoId += alpaka::getWorkDiv(acc)[0u]) { + if (pfClusteringVars[topoId].topoSeedCount() > 0) { + // This is a valid topo ID + int offset = alpaka::atomicAdd(acc, &totalSeedOffset, pfClusteringVars[topoId].topoSeedCount()); + pfClusteringVars[topoId].topoSeedOffsets() = offset; + } + } + alpaka::syncBlockThreads(acc); // all threads call sync + + // Fill arrays of rechit indicies for each seed [topoSeedList] and rhIdx->seedIdx conversion for each seed [rhIdxToSeedIdx] + // Also fill seedRHIdx, topoId, depth + for (int rhIdx = alpaka::getIdx(acc)[0u]; rhIdx < nRH; + rhIdx += alpaka::getWorkDiv(acc)[0u]) { + int topoId = pfClusteringVars[rhIdx].pfrh_topoId(); + if (pfClusteringVars[rhIdx].pfrh_isSeed()) { + // Valid topo cluster and this rhIdx corresponds to a seed + int k = alpaka::atomicAdd(acc, &pfClusteringVars[topoId].rhCount(), 1); + int seedIdx = pfClusteringVars[topoId].topoSeedOffsets() + k; + if ((unsigned int)seedIdx >= *nSeeds) + printf("Warning(contraction) %8d > %8d should not happen, check topoId: %d has %d rh\n", + seedIdx, + *nSeeds, + topoId, + k); + pfClusteringVars[seedIdx].topoSeedList() = rhIdx; + pfClusteringVars[rhIdx].rhIdxToSeedIdx() = seedIdx; + clusterView[seedIdx].topoId() = topoId; + clusterView[seedIdx].seedRHIdx() = rhIdx; + clusterView[seedIdx].depth() = pfRecHits[rhIdx].depth(); + } + } + + alpaka::syncBlockThreads(acc); // all threads call sync + + // Determine seed offsets for rechit fraction array + for (int rhIdx = alpaka::getIdx(acc)[0u]; rhIdx < nRH; + rhIdx += alpaka::getWorkDiv(acc)[0u]) { + pfClusteringVars[rhIdx].rhCount() = 1; // Reset this counter array + + int topoId = pfClusteringVars[rhIdx].pfrh_topoId(); + if (pfClusteringVars[rhIdx].pfrh_isSeed() && topoId > -1) { + // Allot the total number of rechits for this topo cluster for rh fractions + int offset = alpaka::atomicAdd(acc, &totalSeedFracOffset, pfClusteringVars[topoId].topoRHCount()); + + // Add offset for this PF cluster seed + pfClusteringVars[rhIdx].seedFracOffsets() = offset; + + // Store recHitFraction offset & size information for each seed + clusterView[pfClusteringVars[rhIdx].rhIdxToSeedIdx()].rhfracOffset() = + pfClusteringVars[rhIdx].seedFracOffsets(); + clusterView[pfClusteringVars[rhIdx].rhIdxToSeedIdx()].rhfracSize() = + pfClusteringVars[topoId].topoRHCount() - pfClusteringVars[topoId].topoSeedCount() + 1; + } + } + + alpaka::syncBlockThreads(acc); // all threads call sync + + if (once_per_block(acc)) { + pfClusteringVars.pcrhFracSize() = totalSeedFracOffset; + pfClusteringVars.nRHFracs() = totalSeedFracOffset; + clusterView.nRHFracs() = totalSeedFracOffset; + clusterView.nSeeds() = *nSeeds; + clusterView.nTopos() = pfClusteringVars.nTopos(); + + if (pfClusteringVars.pcrhFracSize() > 200000) // Warning in case the fraction is too large + printf("At the end of topoClusterContraction, found large *pcrhFracSize = %d\n", + pfClusteringVars.pcrhFracSize()); + } + } + }; + + // Prefill the rechit index for all PFCluster fractions + // Optimized for GPU parallel, but works on any backend + class FillRhfIndex { + public: + template >> + ALPAKA_FN_ACC void operator()(const TAcc& acc, + const reco::PFRecHitHostCollection::ConstView pfRecHits, + reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, + reco::PFRecHitFractionDeviceCollection::View fracView) const { + const int nRH = pfRecHits.size(); + + for (auto index : elements_with_stride_nd(acc, {nRH, nRH})) { + const int i = index[0u]; // i is a seed index + const int j = index[1u]; // j is NOT a seed + int topoId = pfClusteringVars[i].pfrh_topoId(); + if (topoId > -1 && pfClusteringVars[i].pfrh_isSeed() && topoId == pfClusteringVars[j].pfrh_topoId()) { + if (!pfClusteringVars[j].pfrh_isSeed()) { // NOT a seed + int k = alpaka::atomicAdd( + acc, &pfClusteringVars[i].rhCount(), 1); // Increment the number of rechit fractions for this seed + auto fraction = fracView[pfClusteringVars[i].seedFracOffsets() + k]; + fraction.pfrhIdx() = j; + fraction.pfcIdx() = pfClusteringVars[i].rhIdxToSeedIdx(); + } else if (i == j) { // i==j is a seed rechit index + auto seed = fracView[pfClusteringVars[i].seedFracOffsets()]; + seed.pfrhIdx() = j; + seed.frac() = 1; + seed.pfcIdx() = pfClusteringVars[i].rhIdxToSeedIdx(); + } + } + } + } + }; + + class FastCluster { + public: + template >> + ALPAKA_FN_ACC void operator()(const TAcc& acc, + const reco::PFRecHitHostCollection::ConstView pfRecHits, + const reco::PFClusterParamsDeviceCollection::ConstView pfClusParams, + reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, + reco::PFClusterDeviceCollection::View clusterView, + reco::PFRecHitFractionDeviceCollection::View fracView) const { + const int nRH = pfRecHits.size(); + int& topoId = alpaka::declareSharedVar(acc); + int& nRHTopo = alpaka::declareSharedVar(acc); + int& nSeeds = alpaka::declareSharedVar(acc); + + if (once_per_block(acc)) { + topoId = alpaka::getIdx(acc)[0u]; + nRHTopo = pfClusteringVars[topoId].topoRHCount(); + nSeeds = pfClusteringVars[topoId].topoSeedCount(); + } + + alpaka::syncBlockThreads(acc); // all threads call sync + + if (topoId < nRH && nRHTopo > 0 && nSeeds > 0) { + if (nRHTopo == nSeeds) { + // PF cluster is isolated seed. No iterations needed + if (once_per_block(acc)) { + // Fill PFCluster-level information + int rhIdx = pfClusteringVars[pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList(); + int seedIdx = pfClusteringVars[rhIdx].rhIdxToSeedIdx(); + clusterView[seedIdx].energy() = pfRecHits[rhIdx].energy(); + clusterView[seedIdx].x() = pfRecHits[rhIdx].x(); + clusterView[seedIdx].y() = pfRecHits[rhIdx].y(); + clusterView[seedIdx].z() = pfRecHits[rhIdx].z(); + } + } else if constexpr (!std::is_same_v) { + // singleSeed and multiSeedParallel functions work only for GPU backend + if (nSeeds == 1) { + // Single seed cluster + hcalFastCluster_singleSeed( + acc, pfClusParams, topoId, nRHTopo, pfRecHits, pfClusteringVars, clusterView, fracView); + } else if (nSeeds <= 100 && nRHTopo - nSeeds < threadsPerBlockForClustering) { + hcalFastCluster_multiSeedParallel( + acc, pfClusParams, topoId, nSeeds, nRHTopo, pfRecHits, pfClusteringVars, clusterView, fracView); + } + } else if (nSeeds <= 400 && nRHTopo - nSeeds <= 1500) { + hcalFastCluster_multiSeedIterative( + acc, pfClusParams, topoId, nSeeds, nRHTopo, pfRecHits, pfClusteringVars, clusterView, fracView); + } else { + if constexpr (debug) { + if (once_per_block(acc)) + printf("Topo cluster %d has %d seeds and %d rechits. Will be processed in next kernel.\n", + topoId, + nSeeds, + nRHTopo); + } + } + } + } + }; + + // Process very large, exotic topo clusters + class FastClusterExotic { + public: + template >> + ALPAKA_FN_ACC void operator()(const TAcc& acc, + const reco::PFRecHitHostCollection::ConstView pfRecHits, + const reco::PFClusterParamsDeviceCollection::ConstView pfClusParams, + reco::PFClusteringVarsDeviceCollection::View pfClusteringVars, + reco::PFClusterDeviceCollection::View clusterView, + reco::PFRecHitFractionDeviceCollection::View fracView, + Position4* __restrict__ globalClusterPos, + Position4* __restrict__ globalPrevClusterPos, + float* __restrict__ globalClusterEnergy, + float* __restrict__ globalRhFracSum, + int* __restrict__ globalSeeds, + int* __restrict__ globalRechits) const { + const int nRH = pfRecHits.size(); + for (int topoId = alpaka::getIdx(acc)[0u]; topoId < nRH; + topoId += blocksForExoticClusters) { + int nRHTopo = pfClusteringVars[topoId].topoRHCount(); + int nSeeds = pfClusteringVars[topoId].topoSeedCount(); + + if (nRHTopo > 0 && nSeeds > 400 && nRHTopo - nSeeds > 1500) { + hcalFastCluster_exotic(acc, + pfClusParams, + topoId, + nSeeds, + nRHTopo, + pfRecHits, + pfClusteringVars, + clusterView, + fracView, + globalClusterPos, + globalPrevClusterPos, + globalClusterEnergy, + globalRhFracSum, + globalSeeds, + globalRechits); + } + alpaka::syncBlockThreads(acc); // all threads call sync + } + } + }; + + PFClusterProducerKernel::PFClusterProducerKernel(Queue& queue, const reco::PFRecHitHostCollection& pfRecHits) + : nSeeds(cms::alpakatools::make_device_buffer(queue)), + globalClusterPos( + cms::alpakatools::make_device_buffer(queue, blocksForExoticClusters * maxTopoInput)), + globalPrevClusterPos( + cms::alpakatools::make_device_buffer(queue, blocksForExoticClusters * maxTopoInput)), + globalClusterEnergy( + cms::alpakatools::make_device_buffer(queue, blocksForExoticClusters * maxTopoInput)), + globalRhFracSum(cms::alpakatools::make_device_buffer(queue, blocksForExoticClusters * maxTopoInput)), + globalSeeds(cms::alpakatools::make_device_buffer(queue, blocksForExoticClusters * maxTopoInput)), + globalRechits(cms::alpakatools::make_device_buffer(queue, blocksForExoticClusters * maxTopoInput)) { + alpaka::memset(queue, nSeeds, 0x00); // Reset nSeeds + } + + void PFClusterProducerKernel::execute(Queue& queue, + const reco::PFClusterParamsDeviceCollection& params, + reco::PFClusteringVarsDeviceCollection& pfClusteringVars, + reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars, + const reco::PFRecHitHostCollection& pfRecHits, + reco::PFClusterDeviceCollection& pfClusters, + reco::PFRecHitFractionDeviceCollection& pfrhFractions) { + const int nRH = pfRecHits->size(); + const int threadsPerBlock = 256; + const int blocks = divide_up_by(nRH, threadsPerBlock); + + // seedingTopoThresh + alpaka::exec(queue, + make_workdiv(blocks, threadsPerBlock), + SeedingTopoThresh{}, + pfClusteringVars.view(), + params.view(), + pfRecHits.view(), + pfClusters.view(), + pfrhFractions.view(), + nSeeds.data()); + // prepareTopoInputs + alpaka::exec(queue, + make_workdiv(blocks, threadsPerBlock), + PrepareTopoInputs{}, + pfRecHits.view(), + pfClusteringVars.view(), + pfClusteringEdgeVars.view(), + nSeeds.data()); + // ECLCC + alpaka::exec(queue, + make_workdiv(blocks, threadsPerBlock), + ECLCCInit{}, + pfRecHits.view(), + pfClusteringVars.view(), + pfClusteringEdgeVars.view()); + alpaka::exec(queue, + make_workdiv(blocks, threadsPerBlock), + ECLCCCompute1{}, + pfRecHits.view(), + pfClusteringVars.view(), + pfClusteringEdgeVars.view()); + alpaka::exec(queue, + make_workdiv(blocks, threadsPerBlock), + ECLCCFlatten{}, + pfRecHits.view(), + pfClusteringVars.view(), + pfClusteringEdgeVars.view()); + // topoClusterContraction + alpaka::exec(queue, + make_workdiv(1, threadsPerBlockForClustering), + TopoClusterContraction{}, + pfRecHits.view(), + pfClusteringVars.view(), + pfClusters.view(), + nSeeds.data()); + + // fillRhfIndex + alpaka::exec(queue, + make_workdiv({divide_up_by(nRH, 32), divide_up_by(nRH, 32)}, {32, 32}), + FillRhfIndex{}, + pfRecHits.view(), + pfClusteringVars.view(), + pfrhFractions.view()); + + // Run fastCluster + alpaka::exec(queue, + make_workdiv(nRH, threadsPerBlockForClustering), + FastCluster{}, + pfRecHits.view(), + params.view(), + pfClusteringVars.view(), + pfClusters.view(), + pfrhFractions.view()); + // exotic clustering kernel + alpaka::exec(queue, + make_workdiv(blocksForExoticClusters, + threadsPerBlockForClustering), // uses 4 blocks to minimize memory usage + FastClusterExotic{}, + pfRecHits.view(), + params.view(), + pfClusteringVars.view(), + pfClusters.view(), + pfrhFractions.view(), + globalClusterPos.data(), + globalPrevClusterPos.data(), + globalClusterEnergy.data(), + globalRhFracSum.data(), + globalSeeds.data(), + globalRechits.data()); + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducerKernel.h b/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducerKernel.h new file mode 100644 index 0000000000000..8ebafbd366efe --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducerKernel.h @@ -0,0 +1,61 @@ +#ifndef RecoParticleFlow_PFClusterProducer_PFClusterProducerAlpakaKernel_h +#define RecoParticleFlow_PFClusterProducer_PFClusterProducerAlpakaKernel_h + +#include "DataFormats/ParticleFlowReco/interface/alpaka/PFRecHitDeviceCollection.h" +#include "DataFormats/ParticleFlowReco/interface/PFRecHitHostCollection.h" +#include "DataFormats/ParticleFlowReco/interface/alpaka/PFClusterDeviceCollection.h" +#include "DataFormats/ParticleFlowReco/interface/alpaka/PFRecHitFractionDeviceCollection.h" +#include "RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusterParamsDeviceCollection.h" +#include "RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusteringVarsDeviceCollection.h" +#include "RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusteringEdgeVarsDeviceCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + namespace reco::pfClustering { + struct Position4 { + float x; + float y; + float z; + float w; + }; + + struct Position3 { + float x; + float y; + float z; + }; + + struct Neighbours4 { + int x; + int y; + int z; + int w; + }; + } // namespace reco::pfClustering + + class PFClusterProducerKernel { + public: + PFClusterProducerKernel(Queue& queue, const reco::PFRecHitHostCollection& pfRecHits); + + void execute(Queue& queue, + const reco::PFClusterParamsDeviceCollection& params, + reco::PFClusteringVarsDeviceCollection& pfClusteringVars, + reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars, + const reco::PFRecHitHostCollection& pfRecHits, + reco::PFClusterDeviceCollection& pfClusters, + reco::PFRecHitFractionDeviceCollection& pfrhFractions); + + private: + cms::alpakatools::device_buffer nSeeds; + cms::alpakatools::device_buffer globalClusterPos; + cms::alpakatools::device_buffer globalPrevClusterPos; + cms::alpakatools::device_buffer globalClusterEnergy; + cms::alpakatools::device_buffer globalRhFracSum; + cms::alpakatools::device_buffer globalSeeds; + cms::alpakatools::device_buffer globalRechits; + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif diff --git a/RecoParticleFlow/PFClusterProducer/python/particleFlowCluster_cff.py b/RecoParticleFlow/PFClusterProducer/python/particleFlowCluster_cff.py index 415c3b55d4b45..6971ce8a0052a 100644 --- a/RecoParticleFlow/PFClusterProducer/python/particleFlowCluster_cff.py +++ b/RecoParticleFlow/PFClusterProducer/python/particleFlowCluster_cff.py @@ -26,7 +26,7 @@ pfClusteringECALTask = cms.Task(particleFlowRecHitECAL, particleFlowClusterECALUncorrected, particleFlowClusterECALTask) -pfClusteringECAL = cms.Sequence(pfClusteringECALTask) +pfClusteringECAL = cms.Sequence(pfClusteringECALTask) pfClusteringPSTask = cms.Task(particleFlowRecHitPS,particleFlowClusterPS) pfClusteringPS = cms.Sequence(pfClusteringPSTask) @@ -85,3 +85,12 @@ _phase2_timing_particleFlowClusterECALTask) phase2_timing.toModify(particleFlowClusterECAL, inputECAL = 'particleFlowTimeAssignerECAL') + +# Replace HBHE rechit and clustering with Alpaka modules + +from Configuration.ProcessModifiers.alpaka_cff import alpaka + +def _addProcessPFClusterAlpaka(process): + process.load("RecoParticleFlow.PFClusterProducerAlpaka.pfClusterHBHEAlpaka_cff") + +modifyConfigurationPFClusterAlpaka_ = alpaka.makeProcessModifier(_addProcessPFClusterAlpaka) diff --git a/RecoParticleFlow/PFClusterProducer/python/pfClusterHBHEAlpaka_cff.py b/RecoParticleFlow/PFClusterProducer/python/pfClusterHBHEAlpaka_cff.py new file mode 100644 index 0000000000000..8053333f0769d --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/python/pfClusterHBHEAlpaka_cff.py @@ -0,0 +1,96 @@ +import FWCore.ParameterSet.Config as cms + +from Configuration.ProcessModifiers.alpaka_cff import alpaka + +from RecoParticleFlow.PFRecHitProducer.hcalRecHitSoAProducer_cfi import hcalRecHitSoAProducer as _hcalRecHitSoAProducer +from RecoParticleFlow.PFRecHitProducer.pfRecHitHCALParamsESProducer_cfi import pfRecHitHCALParamsESProducer as _pfRecHitHCALParamsESProducer +from RecoParticleFlow.PFRecHitProducer.pfRecHitHCALTopologyESProducer_cfi import pfRecHitHCALTopologyESProducer as _pfRecHitHCALTopologyESProducer +from RecoParticleFlow.PFRecHitProducer.pfRecHitSoAProducerHCAL_cfi import pfRecHitSoAProducerHCAL as _pfRecHitSoAProducerHCAL +from RecoParticleFlow.PFRecHitProducer.legacyPFRecHitProducer_cfi import legacyPFRecHitProducer as _legacyPFRecHitProducer +from RecoParticleFlow.PFClusterProducer.pfClusterParamsESProducer_cfi import pfClusterParamsESProducer as _pfClusterParamsESProducer +from RecoParticleFlow.PFClusterProducer.pfClusterSoAProducer_cfi import pfClusterSoAProducer as _pfClusterSoAProducer +from RecoParticleFlow.PFClusterProducer.legacyPFClusterProducer_cfi import legacyPFClusterProducer as _legacyPFClusterProducer + +from RecoParticleFlow.PFClusterProducer.particleFlowCluster_cff import pfClusteringHBHEHFTask, particleFlowClusterHBHE, particleFlowRecHitHBHE, particleFlowClusterHCAL + +_alpaka_pfClusteringHBHEHFTask = pfClusteringHBHEHFTask.copy() + +pfRecHitHCALParamsRecordSource = cms.ESSource('EmptyESSource', + recordName = cms.string('PFRecHitHCALParamsRecord'), + iovIsRunNotTime = cms.bool(True), + firstValid = cms.vuint32(1) + ) + +pfRecHitHCALTopologyRecordSource = cms.ESSource('EmptyESSource', + recordName = cms.string('PFRecHitHCALTopologyRecord'), + iovIsRunNotTime = cms.bool(True), + firstValid = cms.vuint32(1) + ) + +pfClusterParamsRecordSource = cms.ESSource('EmptyESSource', + recordName = cms.string('JobConfigurationGPURecord'), + iovIsRunNotTime = cms.bool(True), + firstValid = cms.vuint32(1) + ) + +hbheRecHitToSoA = _hcalRecHitSoAProducer.clone( + src = "hbhereco" + ) + +pfRecHitHCALParamsESProducer = _pfRecHitHCALParamsESProducer.clone( + energyThresholdsHB = cms.vdouble( 0.1, 0.2, 0.3, 0.3 ), + energyThresholdsHE = cms.vdouble( 0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2 ) + ) + +pfRecHitHCALTopologyESProducer = _pfRecHitHCALTopologyESProducer.clone() +pfRecHitSoAProducerHCAL = _pfRecHitSoAProducerHCAL.clone( + producers = cms.VPSet( + cms.PSet( + src = cms.InputTag("hbheRecHitToSoA"), + params = cms.ESInputTag("pfRecHitHCALParamsESProducer:"), + ) + ), + topology = "pfRecHitHCALTopologyESProducer:", + synchronise = cms.untracked.bool(False) + ) + +legacyPFRecHitProducer = _legacyPFRecHitProducer.clone( + src = "pfRecHitSoAProducerHCAL" + ) + +pfClusterParamsESProducer = _pfClusterParamsESProducer.clone() +pfClusterSoAProducer = _pfClusterSoAProducer.clone( + pfRecHits = 'pfRecHitSoAProducerHCAL', + pfClusterParams = 'pfClusterParamsESProducer:', + synchronise = cms.bool(False) + ) + + +legacyPFClusterProducer = _legacyPFClusterProducer.clone( + src = 'pfClusterProducerAlpaka', + pfClusterParams = 'pfClusterParamsESProducer:', + pfClusterBuilder = particleFlowClusterHBHE.pfClusterBuilder, + recHitsSource = 'legacyPFRecHitProducer', + PFRecHitsLabelIn = 'pfRecHitSoAProducerHCAL' + ) + + +_alpaka_pfClusteringHBHEHFTask.add(pfRecHitHCALParamsRecordSource) +_alpaka_pfClusteringHBHEHFTask.add(pfRecHitHCALTopologyRecordSource) +_alpaka_pfClusteringHBHEHFTask.add(pfClusterParamsRecordSource) +_alpaka_pfClusteringHBHEHFTask.add(hbheRecHitToSoA) +_alpaka_pfClusteringHBHEHFTask.add(pfRecHitHCALParamsESProducer) +_alpaka_pfClusteringHBHEHFTask.add(pfRecHitSoAProducerHCAL) +_alpaka_pfClusteringHBHEHFTask.add(legacyPFRecHitProducer) +_alpaka_pfClusteringHBHEHFTask.add(pfClusterParamsESProducer) +_alpaka_pfClusteringHBHEHFTask.add(pfClusterSoAProducer) + +_alpaka_pfClusteringHBHEHFTask.remove(particleFlowRecHitHBHE) +_alpaka_pfClusteringHBHEHFTask.remove(particleFlowClusterHBHE) +_alpaka_pfClusteringHBHEHFTask.remove(particleFlowClusterHCAL) +_alpaka_pfClusteringHBHEHFTask.add(particleFlowClusterHBHE) +_alpaka_pfClusteringHBHEHFTask.add(particleFlowClusterHCAL) + +alpaka.toReplaceWith(particleFlowClusterHBHE, legacyPFClusterProducer) + +alpaka.toReplaceWith(pfClusteringHBHEHFTask, _alpaka_pfClusteringHBHEHFTask) diff --git a/RecoParticleFlow/PFClusterProducer/src/PFClusterParamsHostCollection.cc b/RecoParticleFlow/PFClusterProducer/src/PFClusterParamsHostCollection.cc new file mode 100644 index 0000000000000..4d88391c4b5c1 --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/src/PFClusterParamsHostCollection.cc @@ -0,0 +1,4 @@ +#include "FWCore/Utilities/interface/typelookup.h" +#include "RecoParticleFlow/PFClusterProducer/interface/PFClusterParamsHostCollection.h" + +TYPELOOKUP_DATA_REG(reco::PFClusterParamsHostCollection); diff --git a/RecoParticleFlow/PFClusterProducer/src/alpaka/PFClusterParamsDeviceCollection.cc b/RecoParticleFlow/PFClusterProducer/src/alpaka/PFClusterParamsDeviceCollection.cc new file mode 100644 index 0000000000000..54a63b04ad9c0 --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/src/alpaka/PFClusterParamsDeviceCollection.cc @@ -0,0 +1,4 @@ +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/typelookup.h" + +#include "RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusterParamsDeviceCollection.h" +TYPELOOKUP_ALPAKA_DATA_REG(reco::PFClusterParamsDeviceCollection); diff --git a/RecoParticleFlow/PFClusterProducer/test/test_PFRecHitAndClusterSoA.py b/RecoParticleFlow/PFClusterProducer/test/test_PFRecHitAndClusterSoA.py new file mode 100644 index 0000000000000..4b7e7753cad4e --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/test/test_PFRecHitAndClusterSoA.py @@ -0,0 +1,491 @@ +# Auto generated configuration file +# using: +# Revision: 1.19 +# Source: /local/reps/CMSSW/CMSSW/Configuration/Applications/python/ConfigBuilder.py,v +# with command line options: reHLT --processName reHLT -s HLT:@relval2021 --conditions auto:phase1_2021_realistic --datatier GEN-SIM-DIGI-RAW -n 5 --eventcontent FEVTDEBUGHLT --geometry DB:Extended --era Run3 --customise=HLTrigger/Configuration/customizeHLTforPatatrack.customizeHLTforPatatrack --filein /store/relval/CMSSW_12_3_0_pre5/RelValTTbar_14TeV/GEN-SIM-DIGI-RAW/123X_mcRun3_2021_realistic_v6-v1/10000/2639d8f2-aaa6-4a78-b7c2-9100a6717e6c.root +import FWCore.ParameterSet.Config as cms + +from Configuration.Eras.Era_Run3_cff import Run3 + +process = cms.Process('rereHLT',Run3) + +_thresholdsHB = cms.vdouble(0.8, 0.8, 0.8, 0.8) +_thresholdsHE = cms.vdouble(0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8) +_thresholdsHBphase1 = cms.vdouble(0.1, 0.2, 0.3, 0.3) +_thresholdsHEphase1 = cms.vdouble(0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2) +_seedingThresholdsHB = cms.vdouble(1.0, 1.0, 1.0, 1.0) +_seedingThresholdsHE = cms.vdouble(1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1) +_seedingThresholdsHBphase1 = cms.vdouble(0.125, 0.25, 0.35, 0.35) +_seedingThresholdsHEphase1 = cms.vdouble(0.1375, 0.275, 0.275, 0.275, 0.275, 0.275, 0.275) +#updated HB RecHit threshold for 2023 +_thresholdsHBphase1_2023 = cms.vdouble(0.4, 0.3, 0.3, 0.3) +#updated HB seeding threshold for 2023 +_seedingThresholdsHBphase1_2023 = cms.vdouble(0.6, 0.5, 0.5, 0.5) + +# import of standard configurations +process.load('Configuration.StandardSequences.Services_cff') +process.load('SimGeneral.HepPDTESSource.pythiapdt_cfi') +process.load('FWCore.MessageService.MessageLogger_cfi') +process.load('Configuration.EventContent.EventContent_cff') +process.load('SimGeneral.MixingModule.mixNoPU_cfi') +process.load('Configuration.StandardSequences.GeometryRecoDB_cff') +process.load('Configuration.StandardSequences.MagneticField_cff') +process.load('HLTrigger.Configuration.HLT_GRun_cff') +process.load('Configuration.StandardSequences.EndOfProcess_cff') +process.load('Configuration.StandardSequences.FrontierConditions_GlobalTag_cff') + +process.load('Configuration.StandardSequences.Accelerators_cff') +process.load('HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka_cfi') + +process.maxEvents = cms.untracked.PSet( + #input = cms.untracked.int32(1), + #input = cms.untracked.int32(100), + input = cms.untracked.int32(1000), + output = cms.optional.untracked.allowed(cms.int32,cms.PSet) +) + +# Input source +# Need to use a file that contains HCAL/ECAL hits. Verify using: +# root root://eoscms.cern.ch//eos/cms/store/relval/CMSSW_13_0_0/RelValTTbar_14TeV/GEN-SIM-DIGI-RAW/PU_130X_mcRun3_2022_realistic_v2_HS-v4/2590000/0088b51b-0cda-40f2-95fc-590f446624ee.root -e 'Events->Print()' -q | grep -E "hltHbhereco|hltEcalRecHit" +process.source = cms.Source("PoolSource", + #fileNames = cms.untracked.vstring('/store/relval/CMSSW_13_0_0/RelValTTbar_14TeV/GEN-SIM-DIGI-RAW/PU_130X_mcRun3_2022_realistic_v2_HS-v4/2590000/0088b51b-0cda-40f2-95fc-590f446624ee.root'), + fileNames = cms.untracked.vstring('/store/relval/CMSSW_13_0_8/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/130X_mcRun3_2022_realistic_v3_2022-v1/2580000/0e63ba30-251b-4034-93ca-4d400aaa399e.root'), + secondaryFileNames = cms.untracked.vstring(), + #skipEvents = cms.untracked.uint32(999) +) + +process.options = cms.untracked.PSet( + IgnoreCompletely = cms.untracked.vstring(), + Rethrow = cms.untracked.vstring(), + allowUnscheduled = cms.obsolete.untracked.bool, + canDeleteEarly = cms.untracked.vstring(), + deleteNonConsumedUnscheduledModules = cms.untracked.bool(True), + dumpOptions = cms.untracked.bool(False), + emptyRunLumiMode = cms.obsolete.untracked.string, + eventSetup = cms.untracked.PSet( + forceNumberOfConcurrentIOVs = cms.untracked.PSet( + allowAnyLabel_=cms.required.untracked.uint32 + ), + numberOfConcurrentIOVs = cms.untracked.uint32(0) + ), + fileMode = cms.untracked.string('FULLMERGE'), + forceEventSetupCacheClearOnNewRun = cms.untracked.bool(False), + makeTriggerResults = cms.obsolete.untracked.bool, + numberOfConcurrentLuminosityBlocks = cms.untracked.uint32(0), + numberOfConcurrentRuns = cms.untracked.uint32(1), + numberOfStreams = cms.untracked.uint32(0), + numberOfThreads = cms.untracked.uint32(1), + printDependencies = cms.untracked.bool(False), + sizeOfStackForThreadsInKB = cms.optional.untracked.uint32, + throwIfIllegalParameter = cms.untracked.bool(True), + wantSummary = cms.untracked.bool(False) +) + +# Production Info +process.configurationMetadata = cms.untracked.PSet( + annotation = cms.untracked.string('reHLT nevts:5'), + name = cms.untracked.string('Applications'), + version = cms.untracked.string('$Revision: 1.19 $') +) + +# Output definition +process.FEVTDEBUGHLToutput = cms.OutputModule("PoolOutputModule", + dataset = cms.untracked.PSet( + dataTier = cms.untracked.string('GEN-SIM-DIGI-RAW'), + filterName = cms.untracked.string('') + ), + fileName = cms.untracked.string('reHLT_HLT.root'), + outputCommands = process.FEVTDEBUGHLTEventContent.outputCommands, + splitLevel = cms.untracked.int32(0) +) + +# Other statements +from HLTrigger.Configuration.CustomConfigs import ProcessName +process = ProcessName(process) + +from Configuration.AlCa.GlobalTag import GlobalTag +process.GlobalTag = GlobalTag(process.GlobalTag, 'auto:phase1_2022_realistic', '') + +# Path and EndPath definitions +process.endjob_step = cms.EndPath(process.endOfProcess) +process.FEVTDEBUGHLToutput_step = cms.EndPath(process.FEVTDEBUGHLToutput) + +# Schedule definition +# process.schedule imported from cff in HLTrigger.Configuration +process.schedule.extend([process.endjob_step,process.FEVTDEBUGHLToutput_step]) +from PhysicsTools.PatAlgos.tools.helpers import associatePatAlgosToolsTask +associatePatAlgosToolsTask(process) + +# customisation of the process +from HLTrigger.Configuration.customizeHLTforMC import customizeHLTforMC +process = customizeHLTforMC(process) + +# Add early deletion of temporary data products to reduce peak memory need +from Configuration.StandardSequences.earlyDeleteSettings_cff import customiseEarlyDelete +process = customiseEarlyDelete(process) + +process.load( "HLTrigger.Timer.FastTimerService_cfi" ) +if 'MessageLogger' in process.__dict__: + process.MessageLogger.TriggerSummaryProducerAOD = cms.untracked.PSet() + process.MessageLogger.L1GtTrigReport = cms.untracked.PSet() + process.MessageLogger.L1TGlobalSummary = cms.untracked.PSet() + process.MessageLogger.HLTrigReport = cms.untracked.PSet() + process.MessageLogger.FastReport = cms.untracked.PSet() + process.MessageLogger.ThroughputService = cms.untracked.PSet() + process.MessageLogger.cerr.FastReport = cms.untracked.PSet( limit = cms.untracked.int32( 10000000 ) ) + + +##################################### +## Read command-line arguments ## +##################################### +import sys +import argparse +parser = argparse.ArgumentParser(prog=f"{sys.argv[0]} {sys.argv[1]} --", description='Test and validation of PFRecHitProducer with Alpaka') +parser.add_argument('-c', '--cal', type=str, default='HCAL', + help='Calorimeter type. Possible options: HCAL, ECAL. Default: HCAL') +parser.add_argument('-b', '--backend', type=str, default='auto', + help='Alpaka backend. Possible options: CPU, GPU, auto. Default: auto') +parser.add_argument('-s', '--synchronise', action='store_true', default=False, + help='Put synchronisation point at the end of Alpaka modules (for benchmarking performance)') +parser.add_argument('-t', '--threads', type=int, default=8, + help='Number of threads. Default: 8') +parser.add_argument('-d', '--debug', type=int, default=0, const=1, nargs="?", + help='Dump PFRecHits for first event (n>0) or first error (n<0). This applies to the n-th validation (1: Legacy vs Alpaka, 2: Legacy vs Legacy-from-Alpaka, 3: Alpaka vs Legacy-from-Alpaka). Default: 0') +args = parser.parse_args() + +if(args.debug and args.threads != 1): + args.threads = 1 + print("Number of threads set to 1 for debugging") + +assert args.cal.lower() in ["hcal", "ecal", "h", "e"], "Invalid calorimeter type" +hcal = args.cal.lower() in ["hcal", "h"] +CAL = "HCAL" if hcal else "ECAL" + +alpaka_backends = { + "cpu": "alpaka_serial_sync::%s", # Execute on CPU + "gpu": "alpaka_cuda_async::%s", # Execute using CUDA + "cuda": "alpaka_cuda_async::%s", # Execute using CUDA + "auto": "%s@alpaka" # Let framework choose +} +assert args.backend.lower() in alpaka_backends, "Invalid backend" +alpaka_backend_str = alpaka_backends[args.backend.lower()] + + + +######################################## +## Legacy HBHE PFRecHit producer ## +######################################## +process.hltParticleFlowRecHitHBHE = cms.EDProducer("PFRecHitProducer", + navigator = cms.PSet( + hcalEnums = cms.vint32(1, 2), + name = cms.string('PFRecHitHCALDenseIdNavigator') + ), + producers = cms.VPSet(cms.PSet( + name = cms.string('PFHBHERecHitCreator'), + qualityTests = cms.VPSet( + cms.PSet( + usePFThresholdsFromDB = cms.bool(False), + cuts = cms.VPSet( + cms.PSet( + depth = cms.vint32(1, 2, 3, 4), + detectorEnum = cms.int32(1), + threshold = cms.vdouble(0.4, 0.3, 0.3, 0.3) + ), + cms.PSet( + depth = cms.vint32(1, 2, 3, 4, 5, 6, 7), + detectorEnum = cms.int32(2), + threshold = cms.vdouble(0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2) + ) + ), + name = cms.string('PFRecHitQTestHCALThresholdVsDepth') + ), + cms.PSet( + cleaningThresholds = cms.vdouble(0.0), + flags = cms.vstring('Standard'), + maxSeverities = cms.vint32(11), + name = cms.string('PFRecHitQTestHCALChannel') + ) + ), + src = cms.InputTag("hltHbherecoLegacy") + )) +) + + +##################################### +## Legacy PFRecHit producer ## +##################################### +if hcal: + process.hltParticleFlowRecHit = cms.EDProducer("PFRecHitProducer", + navigator = cms.PSet( + hcalEnums = cms.vint32(1, 2), + name = cms.string('PFRecHitHCALDenseIdNavigator') + ), + producers = cms.VPSet(cms.PSet( + name = cms.string('PFHBHERecHitCreator'), + qualityTests = cms.VPSet( + cms.PSet( + usePFThresholdsFromDB = cms.bool(False), + cuts = cms.VPSet( + cms.PSet( + depth = cms.vint32(1, 2, 3, 4), + detectorEnum = cms.int32(1), + threshold = cms.vdouble(0.4, 0.3, 0.3, 0.3) + ), + cms.PSet( + depth = cms.vint32(1, 2, 3, 4, 5, 6, 7), + detectorEnum = cms.int32(2), + threshold = cms.vdouble(0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2) + ) + ), + name = cms.string('PFRecHitQTestHCALThresholdVsDepth') + ), + cms.PSet( + cleaningThresholds = cms.vdouble(0.0), + flags = cms.vstring('Standard'), + maxSeverities = cms.vint32(11), + name = cms.string('PFRecHitQTestHCALChannel') + ) + ), + src = cms.InputTag("hltHbherecoLegacy") + )) + ) +else: # ecal + qualityTestsECAL = cms.VPSet( + cms.PSet( + name = cms.string("PFRecHitQTestDBThreshold"), + applySelectionsToAllCrystals=cms.bool(True), + ), + cms.PSet( + name = cms.string("PFRecHitQTestECAL"), + cleaningThreshold = cms.double(2.0), + timingCleaning = cms.bool(True), + topologicalCleaning = cms.bool(True), + skipTTRecoveredHits = cms.bool(True) + ) + ) + process.hltParticleFlowRecHit = cms.EDProducer("PFRecHitProducer", + navigator = cms.PSet( + name = cms.string("PFRecHitECALNavigator"), + barrel = cms.PSet( ), + endcap = cms.PSet( ) + ), + producers = cms.VPSet( + cms.PSet( + name = cms.string("PFEBRecHitCreator"), + src = cms.InputTag("hltEcalRecHit","EcalRecHitsEB"), + srFlags = cms.InputTag(""), + qualityTests = qualityTestsECAL + ), + cms.PSet( + name = cms.string("PFEERecHitCreator"), + src = cms.InputTag("hltEcalRecHit","EcalRecHitsEE"), + srFlags = cms.InputTag(""), + qualityTests = qualityTestsECAL + ) + ) + ) + + +##################################### +## Alpaka PFRecHit producer ## +##################################### +# Convert legacy CaloRecHits to CaloRecHitSoA +if hcal: + process.hltParticleFlowRecHitToSoA = cms.EDProducer(alpaka_backend_str % "HCALRecHitSoAProducer", + src = cms.InputTag("hltHbherecoLegacy"), + synchronise = cms.untracked.bool(args.synchronise) + ) +else: # ecal + process.hltParticleFlowRecHitEBToSoA = cms.EDProducer(alpaka_backend_str % "ECALRecHitSoAProducer", + src = cms.InputTag("hltEcalRecHit","EcalRecHitsEB"), + synchronise = cms.untracked.bool(args.synchronise) + ) + process.hltParticleFlowRecHitEEToSoA = cms.EDProducer(alpaka_backend_str % "ECALRecHitSoAProducer", + src = cms.InputTag("hltEcalRecHit","EcalRecHitsEE"), + synchronise = cms.untracked.bool(args.synchronise) + ) + +# Construct topology and cut parameter information +process.pfRecHitTopologyRecordSource = cms.ESSource('EmptyESSource', + recordName = cms.string(f'PFRecHit{CAL}TopologyRecord'), + iovIsRunNotTime = cms.bool(True), + firstValid = cms.vuint32(1) +) +process.pfRecHitParamsRecordSource = cms.ESSource('EmptyESSource', + recordName = cms.string(f'PFRecHit{CAL}ParamsRecord'), + iovIsRunNotTime = cms.bool(True), + firstValid = cms.vuint32(1) +) +process.hltParticleFlowRecHitTopologyESProducer = cms.ESProducer(alpaka_backend_str % f"PFRecHit{CAL}TopologyESProducer") +if hcal: + process.hltParticleFlowRecHitParamsESProducer = cms.ESProducer(alpaka_backend_str % "PFRecHitHCALParamsESProducer", + energyThresholdsHB = cms.vdouble( 0.4, 0.3, 0.3, 0.3 ), + energyThresholdsHE = cms.vdouble( 0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2 ) + ) +else: # ecal + process.hltParticleFlowRecHitParamsESProducer = cms.ESProducer(alpaka_backend_str % "PFRecHitECALParamsESProducer") + +# Construct PFRecHitSoA +if hcal: + process.hltParticleFlowPFRecHitAlpaka = cms.EDProducer(alpaka_backend_str % "PFRecHitSoAProducerHCAL", + producers = cms.VPSet( + cms.PSet( + src = cms.InputTag("hltParticleFlowRecHitToSoA"), + params = cms.ESInputTag("hltParticleFlowRecHitParamsESProducer:"), + ) + ), + topology = cms.ESInputTag("hltParticleFlowRecHitTopologyESProducer:"), + synchronise = cms.untracked.bool(args.synchronise) + ) +else: # ecal + process.hltParticleFlowPFRecHitAlpaka = cms.EDProducer(alpaka_backend_str % "PFRecHitSoAProducerECAL", + producers = cms.VPSet( + cms.PSet( + src = cms.InputTag("hltParticleFlowRecHitEBToSoA"), + params = cms.ESInputTag("hltParticleFlowRecHitParamsESProducer:") + ), + cms.PSet( + src = cms.InputTag("hltParticleFlowRecHitEEToSoA"), + params = cms.ESInputTag("hltParticleFlowRecHitParamsESProducer:") + ) + ), + topology = cms.ESInputTag("hltParticleFlowRecHitTopologyESProducer:"), + synchronise = cms.bool(args.synchronise) + ) + +# Convert Alpaka PFRecHits to legacy format (for validation) +process.hltParticleFlowAlpakaToLegacyPFRecHits = cms.EDProducer("LegacyPFRecHitProducer", + src = cms.InputTag("hltParticleFlowPFRecHitAlpaka") +) + + +##################################### +## PFRecHit validation ## +##################################### +# Validate legacy format from legacy module vs SoA format from Alpaka module +# This is the main Alpaka vs legacy test +from DQMServices.Core.DQMEDAnalyzer import DQMEDAnalyzer +process.hltParticleFlowPFRecHitComparison = DQMEDAnalyzer("PFRecHitProducerTest", + #caloRecHits = cms.untracked.InputTag("hltParticleFlowRecHitToSoA"), + pfRecHitsSource1 = cms.untracked.InputTag("hltParticleFlowRecHit"), + pfRecHitsSource2 = cms.untracked.InputTag("hltParticleFlowPFRecHitAlpaka"), + pfRecHitsType1 = cms.untracked.string("legacy"), + pfRecHitsType2 = cms.untracked.string("alpaka"), + title = cms.untracked.string("Legacy vs Alpaka"), + dumpFirstEvent = cms.untracked.bool(args.debug == 1), + dumpFirstError = cms.untracked.bool(args.debug == -1), + strictCompare = cms.untracked.bool(True) +) + +# Validate legacy format from legacy module vs legacy format from Alpaka module +process.hltParticleFlowAlpakaToLegacyPFRecHitsComparison1 = DQMEDAnalyzer("PFRecHitProducerTest", + pfRecHitsSource1 = cms.untracked.InputTag("hltParticleFlowRecHitHBHE"), + pfRecHitsSource2 = cms.untracked.InputTag("hltParticleFlowAlpakaToLegacyPFRecHits"), + pfRecHitsType1 = cms.untracked.string("legacy"), + pfRecHitsType2 = cms.untracked.string("legacy"), + title = cms.untracked.string("Legacy vs Legacy-from-Alpaka"), + dumpFirstEvent = cms.untracked.bool(args.debug == 2), + dumpFirstError = cms.untracked.bool(args.debug == -2), + strictCompare = cms.untracked.bool(True) +) + +# Validate SoA format from Alpaka module vs legacy format from Alpaka module +# This tests the SoA-to-legacy conversion module +process.hltParticleFlowAlpakaToLegacyPFRecHitsComparison2 = DQMEDAnalyzer("PFRecHitProducerTest", + pfRecHitsSource1 = cms.untracked.InputTag("hltParticleFlowPFRecHitAlpaka"), + pfRecHitsSource2 = cms.untracked.InputTag("hltParticleFlowAlpakaToLegacyPFRecHits"), + pfRecHitsType1 = cms.untracked.string("alpaka"), + pfRecHitsType2 = cms.untracked.string("legacy"), + title = cms.untracked.string("Alpaka vs Legacy-from-Alpaka"), + dumpFirstEvent = cms.untracked.bool(args.debug == 3), + dumpFirstError = cms.untracked.bool(args.debug == -3), + strictCompare = cms.untracked.bool(True) +) + +#Move Onto Clustering + +process.pfClusterParamsAlpakaESRcdSource = cms.ESSource('EmptyESSource', + recordName = cms.string('JobConfigurationGPURecord'), + iovIsRunNotTime = cms.bool(True), + firstValid = cms.vuint32(1) +) + +from RecoParticleFlow.PFClusterProducer.pfClusterParamsESProducer_cfi import pfClusterParamsESProducer as _pfClusterParamsESProducer + +process.hltParticleFlowClusterParamsESProducer = _pfClusterParamsESProducer.clone() +process.hltParticleFlowClusterParamsESProducer.pfClusterBuilder.maxIterations = 5 + +for idx, x in enumerate(process.hltParticleFlowClusterParamsESProducer.initialClusteringStep.thresholdsByDetector): + for idy, y in enumerate(process.hltParticleFlowClusterHBHE.initialClusteringStep.thresholdsByDetector): + if x.detector == y.detector: + x.gatheringThreshold = y.gatheringThreshold +for idx, x in enumerate(process.hltParticleFlowClusterParamsESProducer.pfClusterBuilder.recHitEnergyNorms): + for idy, y in enumerate(process.hltParticleFlowClusterHBHE.pfClusterBuilder.recHitEnergyNorms): + if x.detector == y.detector: + x.recHitEnergyNorm = y.recHitEnergyNorm +for idx, x in enumerate(process.hltParticleFlowClusterParamsESProducer.seedFinder.thresholdsByDetector): + for idy, y in enumerate(process.hltParticleFlowClusterHBHE.seedFinder.thresholdsByDetector): + if x.detector == y.detector: + x.seedingThreshold = y.seedingThreshold + +process.hltParticleFlowPFClusterAlpaka = cms.EDProducer(alpaka_backend_str % "PFClusterSoAProducer", + pfClusterParams = cms.ESInputTag("hltParticleFlowClusterParamsESProducer:"), + synchronise = cms.bool(args.synchronise)) +process.hltParticleFlowPFClusterAlpaka.pfRecHits = cms.InputTag("hltParticleFlowPFRecHitAlpaka") + +# Create legacy PFClusters + +process.hltParticleFlowAlpakaToLegacyPFClusters = cms.EDProducer("LegacyPFClusterProducer", + src = cms.InputTag("hltParticleFlowPFClusterAlpaka"), + pfClusterParams = cms.ESInputTag("hltParticleFlowClusterParamsESProducer:"), + pfClusterBuilder = process.hltParticleFlowClusterHBHE.pfClusterBuilder, + recHitsSource = cms.InputTag("hltParticleFlowAlpakaToLegacyPFRecHits")) +process.hltParticleFlowAlpakaToLegacyPFClusters.PFRecHitsLabelIn = cms.InputTag("hltParticleFlowPFRecHitAlpaka") + +process.hltParticleFlowClusterHBHE.pfClusterBuilder.maxIterations = 5 +process.hltParticleFlowClusterHBHE.usePFThresholdsFromDB = cms.bool(False) + +# Additional customization +process.FEVTDEBUGHLToutput.outputCommands = cms.untracked.vstring('drop *_*_*_*') +process.FEVTDEBUGHLToutput.outputCommands.append('keep *_hltParticleFlowRecHitToSoA_*_*') +process.FEVTDEBUGHLToutput.outputCommands.append('keep *_hltParticleFlowPFRecHitAlpaka_*_*') +process.FEVTDEBUGHLToutput.outputCommands.append('keep *_hltParticleFlowAlpakaToLegacyPFClusters_*_*') +process.FEVTDEBUGHLToutput.outputCommands.append('keep *_hltParticleFlowClusterHBHE_*_*') + +# Path/sequence definitions +path = process.hltHcalDigis +path += process.hltHbherecoLegacy +path += process.hltParticleFlowRecHit # Construct PFRecHits on CPU +if hcal: + path += process.hltParticleFlowRecHitToSoA # Convert legacy calorimeter hits to SoA (HCAL barrel+endcap) +else: # ecal + path += process.hltParticleFlowRecHitEBToSoA # Convert legacy calorimeter hits to SoA (ECAL barrel) + path += process.hltParticleFlowRecHitEEToSoA # Convert legacy calorimeter hits to SoA (ECAL endcap) +path += process.hltParticleFlowPFRecHitAlpaka # Construct PFRecHits SoA +path += process.hltParticleFlowRecHitHBHE # Construct Legacy PFRecHits +path += process.hltParticleFlowClusterHBHE +path += process.hltParticleFlowPFRecHitComparison # Validate Alpaka vs CPU +path += process.hltParticleFlowAlpakaToLegacyPFRecHits # Convert Alpaka PFRecHits SoA to legacy format +path += process.hltParticleFlowAlpakaToLegacyPFRecHitsComparison1 # Validate legacy-format-from-alpaka vs regular legacy format +path += process.hltParticleFlowAlpakaToLegacyPFRecHitsComparison2 # Validate Alpaka format vs legacy-format-from-alpaka + +path += process.hltParticleFlowPFClusterAlpaka +path += process.hltParticleFlowAlpakaToLegacyPFClusters + +process.PFClusterAlpakaValidationTask = cms.EndPath(path) +process.schedule = cms.Schedule(process.PFClusterAlpakaValidationTask) +process.schedule.extend([process.endjob_step,process.FEVTDEBUGHLToutput_step]) +process.options.numberOfThreads = cms.untracked.uint32(args.threads) + +# Save DQM output +process.DQMoutput = cms.OutputModule("DQMRootOutputModule", + dataset = cms.untracked.PSet( + dataTier = cms.untracked.string('DQMIO'), + filterName = cms.untracked.string('') + ), + fileName = cms.untracked.string('file:DQMIO.root'), + outputCommands = process.DQMEventContent.outputCommands, + splitLevel = cms.untracked.int32(0) +) +process.DQMTask = cms.EndPath(process.DQMoutput) +process.schedule.append(process.DQMTask) diff --git a/Validation/RecoParticleFlow/plugins/PFCaloGPUComparisonTask.cc b/Validation/RecoParticleFlow/plugins/PFCaloGPUComparisonTask.cc new file mode 100644 index 0000000000000..404c27f715773 --- /dev/null +++ b/Validation/RecoParticleFlow/plugins/PFCaloGPUComparisonTask.cc @@ -0,0 +1,168 @@ +#include "DQMServices/Core/interface/DQMEDAnalyzer.h" +#include "DQMServices/Core/interface/DQMStore.h" +#include "DataFormats/CaloRecHit/interface/CaloCluster.h" +#include "DataFormats/CaloRecHit/interface/CaloClusterFwd.h" +#include "DataFormats/CaloTowers/interface/CaloTowerCollection.h" +#include "DataFormats/CaloTowers/interface/CaloTowerDetId.h" +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/DetId/interface/DetId.h" +#include "DataFormats/EcalDetId/interface/EcalSubdetector.h" +#include "DataFormats/HcalDetId/interface/HcalDetId.h" +#include "DataFormats/HcalDetId/interface/HcalSubdetector.h" +#include "DataFormats/Math/interface/Vector3D.h" +#include "DataFormats/Math/interface/deltaR.h" +#include "DataFormats/ParticleFlowCandidate/interface/PFCandidate.h" +#include "DataFormats/ParticleFlowReco/interface/PFBlock.h" +#include "DataFormats/ParticleFlowReco/interface/PFBlockElementCluster.h" +#include "DataFormats/ParticleFlowReco/interface/PFBlockElementTrack.h" +#include "DataFormats/ParticleFlowReco/interface/PFCluster.h" +#include "DataFormats/ParticleFlowReco/interface/PFClusterFwd.h" +#include "DataFormats/ParticleFlowReco/interface/PFLayer.h" +#include "DataFormats/ParticleFlowReco/interface/PFRecHit.h" +#include "DataFormats/ParticleFlowReco/interface/PFRecHitFraction.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/PluginManager/interface/ModuleDef.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "SimDataFormats/GeneratorProducts/interface/HepMCProduct.h" + +#include +#include +#include +#include +#include +#include + +#ifdef PFLOW_DEBUG +#define LOGVERB(x) edm::LogVerbatim(x) +#else +#define LOGVERB(x) LogTrace(x) +#endif + +class PFCaloGPUComparisonTask : public DQMEDAnalyzer { +public: + PFCaloGPUComparisonTask(edm::ParameterSet const& conf); + ~PFCaloGPUComparisonTask() override = default; + void analyze(edm::Event const& e, edm::EventSetup const& c) override; + void bookHistograms(DQMStore::IBooker&, edm::Run const&, edm::EventSetup const&) override; + +private: + edm::EDGetTokenT pfClusterTok_ref_; + edm::EDGetTokenT pfClusterTok_target_; + + MonitorElement* pfCluster_Multiplicity_GPUvsCPU_; + MonitorElement* pfCluster_Energy_GPUvsCPU_; + MonitorElement* pfCluster_RecHitMultiplicity_GPUvsCPU_; + MonitorElement* pfCluster_Layer_GPUvsCPU_; + MonitorElement* pfCluster_Depth_GPUvsCPU_; + MonitorElement* pfCluster_Eta_GPUvsCPU_; + MonitorElement* pfCluster_Phi_GPUvsCPU_; + MonitorElement* pfCluster_DuplicateMatches_GPUvsCPU_; + + std::string pfCaloGPUCompDir_; +}; + +PFCaloGPUComparisonTask::PFCaloGPUComparisonTask(const edm::ParameterSet& conf) + : pfClusterTok_ref_{consumes( + conf.getUntrackedParameter("pfClusterToken_ref"))}, + pfClusterTok_target_{ + consumes(conf.getUntrackedParameter("pfClusterToken_target"))}, + pfCaloGPUCompDir_{conf.getUntrackedParameter("pfCaloGPUCompDir")} {} + +void PFCaloGPUComparisonTask::bookHistograms(DQMStore::IBooker& ibooker, + edm::Run const& irun, + edm::EventSetup const& isetup) { + const char* histo; + + ibooker.setCurrentFolder("ParticleFlow/" + pfCaloGPUCompDir_); + + histo = "pfCluster_Multiplicity_GPUvsCPU"; + pfCluster_Multiplicity_GPUvsCPU_ = ibooker.book2D(histo, histo, 100, 0, 2000, 100, 0, 2000); + + histo = "pfCluster_Energy_GPUvsCPU"; + pfCluster_Energy_GPUvsCPU_ = ibooker.book2D(histo, histo, 100, 0, 500, 100, 0, 500); + + histo = "pfCluster_RecHitMultiplicity_GPUvsCPU"; + pfCluster_RecHitMultiplicity_GPUvsCPU_ = ibooker.book2D(histo, histo, 100, 0, 100, 100, 0, 100); + + histo = "pfCluster_Layer_GPUvsCPU"; + pfCluster_Layer_GPUvsCPU_ = ibooker.book2D(histo, histo, 100, 0, 100, 100, 0, 100); + + histo = "pfCluster_Depth_GPUvsCPU"; + pfCluster_Depth_GPUvsCPU_ = ibooker.book2D(histo, histo, 100, 0, 100, 100, 0, 100); + + histo = "pfCluster_Eta_GPUvsCPU"; + pfCluster_Eta_GPUvsCPU_ = ibooker.book2D(histo, histo, 100, 0, 100, 100, 0, 100); + + histo = "pfCluster_Phi_GPUvsCPU"; + pfCluster_Phi_GPUvsCPU_ = ibooker.book2D(histo, histo, 100, 0, 100, 100, 0, 100); + + histo = "pfCluster_DuplicateMatches_GPUvsCPU"; + pfCluster_DuplicateMatches_GPUvsCPU_ = ibooker.book1D(histo, histo, 100, 0., 1000); +} +void PFCaloGPUComparisonTask::analyze(edm::Event const& event, edm::EventSetup const& c) { + edm::Handle pfClusters_ref; + event.getByToken(pfClusterTok_ref_, pfClusters_ref); + + edm::Handle pfClusters_target; + event.getByToken(pfClusterTok_target_, pfClusters_target); + + // + // Compare per-event PF cluster multiplicity + + if (pfClusters_ref->size() != pfClusters_target->size()) + LOGVERB("PFCaloGPUComparisonTask") << " PFCluster multiplicity " << pfClusters_ref->size() << " " + << pfClusters_target->size(); + pfCluster_Multiplicity_GPUvsCPU_->Fill((float)pfClusters_ref->size(), (float)pfClusters_target->size()); + + // + // Find matching PF cluster pairs + std::vector matched_idx; + matched_idx.reserve(pfClusters_ref->size()); + for (unsigned i = 0; i < pfClusters_ref->size(); ++i) { + bool matched = false; + for (unsigned j = 0; j < pfClusters_target->size(); ++j) { + if (pfClusters_ref->at(i).seed() == pfClusters_target->at(j).seed()) { + if (!matched) { + matched = true; + matched_idx.push_back((int)j); + } else { + edm::LogWarning("PFCaloGPUComparisonTask") << "Found duplicate match"; + pfCluster_DuplicateMatches_GPUvsCPU_->Fill((int)j); + } + } + } + if (!matched) + matched_idx.push_back(-1); // if you don't find a match, put a dummy number + } + + // + // Plot matching PF cluster variables + for (unsigned i = 0; i < pfClusters_ref->size(); ++i) { + if (matched_idx[i] >= 0) { + unsigned int j = matched_idx[i]; + int ref_energy_bin = pfCluster_Energy_GPUvsCPU_->getTH2F()->GetXaxis()->FindBin(pfClusters_ref->at(i).energy()); + int target_energy_bin = + pfCluster_Energy_GPUvsCPU_->getTH2F()->GetXaxis()->FindBin(pfClusters_target->at(j).energy()); + if (ref_energy_bin != target_energy_bin) + edm::LogPrint("PFCaloGPUComparisonTask") + << "Off-diagonal energy bin entries: " << pfClusters_ref->at(i).energy() << " " + << pfClusters_ref->at(i).eta() << " " << pfClusters_ref->at(i).phi() << " " + << pfClusters_target->at(j).energy() << " " << pfClusters_target->at(j).eta() << " " + << pfClusters_target->at(j).phi() << std::endl; + pfCluster_Energy_GPUvsCPU_->Fill(pfClusters_ref->at(i).energy(), pfClusters_target->at(j).energy()); + pfCluster_Layer_GPUvsCPU_->Fill(pfClusters_ref->at(i).layer(), pfClusters_target->at(j).layer()); + pfCluster_Eta_GPUvsCPU_->Fill(pfClusters_ref->at(i).eta(), pfClusters_target->at(j).eta()); + pfCluster_Phi_GPUvsCPU_->Fill(pfClusters_ref->at(i).phi(), pfClusters_target->at(j).phi()); + pfCluster_Depth_GPUvsCPU_->Fill(pfClusters_ref->at(i).depth(), pfClusters_target->at(j).depth()); + pfCluster_RecHitMultiplicity_GPUvsCPU_->Fill((float)pfClusters_ref->at(i).recHitFractions().size(), + (float)pfClusters_target->at(j).recHitFractions().size()); + } + } +} + +#include "FWCore/Framework/interface/MakerMacros.h" +DEFINE_FWK_MODULE(PFCaloGPUComparisonTask);