diff --git a/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h b/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h new file mode 100644 index 0000000000000..f0f2e5f5103ab --- /dev/null +++ b/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h @@ -0,0 +1,14 @@ +#ifndef CalibTracker_Records_SiPixelGainCalibrationForHLTSoARcd_h +#define CalibTracker_Records_SiPixelGainCalibrationForHLTSoARcd_h + +#include "CondFormats/DataRecord/interface/SiPixelGainCalibrationForHLTRcd.h" +#include "FWCore/Framework/interface/DependentRecordImplementation.h" +#include "FWCore/Framework/interface/EventSetupRecordImplementation.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" + +class SiPixelGainCalibrationForHLTSoARcd + : public edm::eventsetup::DependentRecordImplementation< + SiPixelGainCalibrationForHLTSoARcd, + edm::mpl::Vector> {}; + +#endif // CalibTracker_Records_SiPixelGainCalibrationForHLTSoARcd_h diff --git a/CalibTracker/Records/interface/SiPixelMappingSoARecord.h b/CalibTracker/Records/interface/SiPixelMappingSoARecord.h new file mode 100644 index 0000000000000..e86d110f008f2 --- /dev/null +++ b/CalibTracker/Records/interface/SiPixelMappingSoARecord.h @@ -0,0 +1,17 @@ +#ifndef CalibTracker_Records_interface_SiPixelMappingSoARecord_h +#define CalibTracker_Records_interface_SiPixelMappingSoARecord_h + +#include "FWCore/Framework/interface/EventSetupRecordImplementation.h" +#include "FWCore/Framework/interface/DependentRecordImplementation.h" +#include "CondFormats/DataRecord/interface/SiPixelGainCalibrationForHLTRcd.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "RecoTracker/Record/interface/CkfComponentsRecord.h" +#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" +#include "CondFormats/DataRecord/interface/SiPixelQualityRcd.h" + +class SiPixelMappingSoARecord : + public edm::eventsetup::DependentRecordImplementation< + SiPixelMappingSoARecord, + edm::mpl::Vector> {}; + +#endif \ No newline at end of file diff --git a/CalibTracker/Records/src/SiPixelGainCalibrationForHLTSoARcd.cc b/CalibTracker/Records/src/SiPixelGainCalibrationForHLTSoARcd.cc new file mode 100644 index 0000000000000..6634cee007301 --- /dev/null +++ b/CalibTracker/Records/src/SiPixelGainCalibrationForHLTSoARcd.cc @@ -0,0 +1,5 @@ +#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h" +#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" +#include "FWCore/Utilities/interface/typelookup.h" + +EVENTSETUP_RECORD_REG(SiPixelGainCalibrationForHLTSoARcd); diff --git a/CalibTracker/Records/src/SiPixelMappingSoARcd.cc b/CalibTracker/Records/src/SiPixelMappingSoARcd.cc new file mode 100644 index 0000000000000..fea2c978c1539 --- /dev/null +++ b/CalibTracker/Records/src/SiPixelMappingSoARcd.cc @@ -0,0 +1,5 @@ +#include "CalibTracker/Records/interface/SiPixelMappingSoARecord.h" +#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" +#include "FWCore/Utilities/interface/typelookup.h" + +EVENTSETUP_RECORD_REG(SiPixelMappingSoARecord); diff --git a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml index 05446593b6229..0cd075b5d9498 100644 --- a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml +++ b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml @@ -1,16 +1,38 @@ - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/CalibTracker/SiPixelESProducers/plugins/alpaka/SiPixelCablingSoAESProducer.cc b/CalibTracker/SiPixelESProducers/plugins/alpaka/SiPixelCablingSoAESProducer.cc new file mode 100644 index 0000000000000..fa2b5eb8c11c0 --- /dev/null +++ b/CalibTracker/SiPixelESProducers/plugins/alpaka/SiPixelCablingSoAESProducer.cc @@ -0,0 +1,147 @@ +#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" +#include "CondFormats/DataRecord/interface/SiPixelQualityRcd.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" +#include "CalibTracker/Records/interface/SiPixelMappingSoARecord.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelQuality.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelMappingHost.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h" +#include "FWCore/Framework/interface/ESTransientHandle.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/Utilities/interface/ESGetToken.h" + +#include "Geometry/CommonDetUnit/interface/GeomDetType.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +#include "RecoTracker/Record/interface/CkfComponentsRecord.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using namespace cms::alpakatools; + + class SiPixelCablingSoAESProducer : public ESProducer { + public: + SiPixelCablingSoAESProducer(edm::ParameterSet const& iConfig) + : ESProducer(iConfig), useQuality_(iConfig.getParameter("UseQualityInfo")) { + auto const& component = iConfig.getParameter("appendToDataLabel"); + auto cc = setWhatProduced(this, component); + cablingMapToken_ = cc.consumes(edm::ESInputTag{"", iConfig.getParameter("CablingMapLabel")}); + if (useQuality_) { + qualityToken_ = cc.consumes(); + } + geometryToken_ = cc.consumes(); + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("appendToDataLabel", ""); + desc.add("CablingMapLabel", "")->setComment("CablingMap label"); + desc.add("UseQualityInfo", false); + descriptions.addWithDefaultLabel(desc); + } + + std::optional produce(const SiPixelMappingSoARecord& iRecord) { + auto cablingMap = iRecord.getTransientHandle(cablingMapToken_); + const SiPixelQuality* quality = nullptr; + if (useQuality_) { + auto qualityInfo = iRecord.getTransientHandle(qualityToken_); + quality = qualityInfo.product(); + } + bool hasQuality = quality != nullptr; + auto geom = iRecord.getTransientHandle(geometryToken_); + SiPixelMappingHost product(pixelgpudetails::MAX_SIZE, cms::alpakatools::host()); + std::vector const& fedIds = cablingMap->fedIds(); + std::unique_ptr const& cabling = cablingMap->cablingTree(); + + unsigned int startFed = fedIds.front(); + unsigned int endFed = fedIds.back(); + + sipixelobjects::CablingPathToDetUnit path; + int index = 1; + + auto mapView = product.view(); + + mapView.hasQuality() = hasQuality; + for (unsigned int fed = startFed; fed <= endFed; fed++) { + for (unsigned int link = 1; link <= pixelgpudetails::MAX_LINK; link++) { + for (unsigned int roc = 1; roc <= pixelgpudetails::MAX_ROC; roc++) { + path = {fed, link, roc}; + const sipixelobjects::PixelROC* pixelRoc = cabling->findItem(path); + mapView[index].fed() = fed; + mapView[index].link() = link; + mapView[index].roc() = roc; + if (pixelRoc != nullptr) { + mapView[index].rawId() = pixelRoc->rawId(); + mapView[index].rocInDet() = pixelRoc->idInDetUnit(); + mapView[index].modToUnpDefault() = false; + if (quality != nullptr) + mapView[index].badRocs() = quality->IsRocBad(pixelRoc->rawId(), pixelRoc->idInDetUnit()); + else + mapView[index].badRocs() = false; + } else { // store some dummy number + mapView[index].rawId() = pixelClustering::invalidModuleId; + mapView[index].rocInDet() = pixelClustering::invalidModuleId; + mapView[index].badRocs() = true; + mapView[index].modToUnpDefault() = true; + } + index++; + } + } + } // end of FED loop + // Given FedId, Link and idinLnk; use the following formula + // to get the rawId and idinDU + // index = (FedID-1200) * MAX_LINK* MAX_ROC + (Link-1)* MAX_ROC + idinLnk; + // where, MAX_LINK = 48, MAX_ROC = 8 + // FedID varies between 1200 to 1338 (In total 108 FED's) + // Link varies between 1 to 48 + // idinLnk varies between 1 to 8 + + auto trackerGeom = iRecord.getTransientHandle(geometryToken_); + + for (int i = 1; i < index; i++) { + if (mapView[i].rawId() == pixelClustering::invalidModuleId) { + mapView[i].moduleId() = pixelClustering::invalidModuleId; + } else { + auto gdet = trackerGeom->idToDetUnit(mapView[i].rawId()); + if (!gdet) { + LogDebug("SiPixelCablingSoAESProducer") << " Not found: " << mapView[i].rawId() << std::endl; + continue; + } + mapView[i].moduleId() = gdet->index(); + } + LogDebug("SiPixelCablingSoAESProducer") + << "----------------------------------------------------------------------------" << std::endl; + LogDebug("SiPixelCablingSoAESProducer") << i << std::setw(20) << mapView[i].fed() << std::setw(20) + << mapView[i].link() << std::setw(20) << mapView[i].roc() << std::endl; + LogDebug("SiPixelCablingSoAESProducer") + << i << std::setw(20) << mapView[i].rawId() << std::setw(20) << mapView[i].rocInDet() << std::setw(20) + << mapView[i].moduleId() << std::endl; + LogDebug("SiPixelCablingSoAESProducer") + << i << std::setw(20) << mapView[i].badRocs() << std::setw(20) << std::endl; + LogDebug("SiPixelCablingSoAESProducer") + << "----------------------------------------------------------------------------" << std::endl; + } + + mapView.size() = index - 1; + + return product; + } + + private: + edm::ESGetToken cablingMapToken_; + edm::ESGetToken qualityToken_; + edm::ESGetToken geometryToken_; + bool useQuality_; + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(SiPixelCablingSoAESProducer); diff --git a/CalibTracker/SiPixelESProducers/plugins/alpaka/SiPixelGainCalibrationForHLTSoAESProducer.cc b/CalibTracker/SiPixelESProducers/plugins/alpaka/SiPixelGainCalibrationForHLTSoAESProducer.cc new file mode 100644 index 0000000000000..7b4d06be2b76c --- /dev/null +++ b/CalibTracker/SiPixelESProducers/plugins/alpaka/SiPixelGainCalibrationForHLTSoAESProducer.cc @@ -0,0 +1,129 @@ +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTHost.h" +#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h" +#include "CondFormats/DataRecord/interface/SiPixelGainCalibrationForHLTRcd.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "FWCore/Framework/interface/ESProducer.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/ModuleFactory.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/CommonDetUnit/interface/GeomDetType.h" + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" + +#include + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class SiPixelGainCalibrationForHLTSoAESProducer : public ESProducer { + public: + explicit SiPixelGainCalibrationForHLTSoAESProducer(const edm::ParameterSet& iConfig); + std::unique_ptr produce(const SiPixelGainCalibrationForHLTSoARcd& iRecord); + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + private: + edm::ESGetToken gainsToken_; + edm::ESGetToken geometryToken_; + }; + + SiPixelGainCalibrationForHLTSoAESProducer::SiPixelGainCalibrationForHLTSoAESProducer(const edm::ParameterSet& iConfig) + : ESProducer(iConfig) { + auto cc = setWhatProduced(this); + gainsToken_ = cc.consumes(); + geometryToken_ = cc.consumes(); + } + + void SiPixelGainCalibrationForHLTSoAESProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + descriptions.addWithDefaultLabel(desc); + } + + std::unique_ptr SiPixelGainCalibrationForHLTSoAESProducer::produce( + const SiPixelGainCalibrationForHLTSoARcd& iRecord) { + + auto const& gains= iRecord.get(gainsToken_); + auto const& geom = iRecord.get(geometryToken_); + + auto product = std::make_unique(gains.data().size(), cms::alpakatools::host()); + + // bizzarre logic (looking for fist strip-det) don't ask + auto const& dus = geom.detUnits(); + unsigned int n_detectors = dus.size(); + for (unsigned int i = 1; i < 7; ++i) { + const auto offset = geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]); + if (offset != dus.size() && dus[offset]->type().isTrackerStrip()) { + if (n_detectors > offset) + n_detectors = offset; + } + } + + LogDebug("SiPixelGainCalibrationForHLTSoA") + << "caching calibs for " << n_detectors << " pixel detectors of size " << gains.data().size() << '\n' + << "sizes " << sizeof(char) << ' ' << sizeof(uint8_t) << ' ' << sizeof(siPixelGainsSoA::DecodingStructure); + + for (size_t i = 0; i < gains.data().size(); i = i + 2) { + product->view().v_pedestals()[i / 2].gain = gains.data()[i]; + product->view().v_pedestals()[i / 2].ped = gains.data()[i + 1]; + } + + //std::copy here + // do not read back from the (possibly write-combined) memory buffer + auto minPed = gains.getPedLow(); + auto maxPed = gains.getPedHigh(); + auto minGain = gains.getGainLow(); + auto maxGain = gains.getGainHigh(); + auto nBinsToUseForEncoding = 253; + + // we will simplify later (not everything is needed....) + product->view().minPed() = minPed; + product->view().maxPed() = maxPed; + product->view().minGain() = minGain; + product->view().maxGain() = maxGain; + + product->view().numberOfRowsAveragedOver() = 80; + product->view().nBinsToUseForEncoding() = nBinsToUseForEncoding; + product->view().deadFlag() = 255; + product->view().noisyFlag() = 254; + + product->view().pedPrecision() = static_cast(maxPed - minPed) / nBinsToUseForEncoding; + product->view().gainPrecision() = static_cast(maxGain - minGain) / nBinsToUseForEncoding; + + LogDebug("SiPixelGainCalibrationForHLTSoA") + << "precisions g " << product->view().pedPrecision() << ' ' << product->view().gainPrecision(); + + // fill the index map + auto const& ind = gains.getIndexes(); + LogDebug("SiPixelGainCalibrationForHLTSoA") << ind.size() << " " << n_detectors; + + for (auto i = 0U; i < n_detectors; ++i) { + auto p = std::lower_bound( + ind.begin(), ind.end(), dus[i]->geographicalId().rawId(), SiPixelGainCalibrationForHLT::StrictWeakOrdering()); + assert(p != ind.end() && p->detid == dus[i]->geographicalId()); + assert(p->iend <= gains.data().size()); + assert(p->iend >= p->ibegin); + assert(0 == p->ibegin % 2); + assert(0 == p->iend % 2); + assert(p->ibegin != p->iend); + assert(p->ncols > 0); + + product->view().modStarts()[i] = p->ibegin; + product->view().modEnds()[i] = p->iend; + product->view().modCols()[i] = p->ncols; + + if (ind[i].detid != dus[i]->geographicalId()) + LogDebug("SiPixelGainCalibrationForHLTSoA") << ind[i].detid << "!=" << dus[i]->geographicalId(); + } + + return product; + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(SiPixelGainCalibrationForHLTSoAESProducer); diff --git a/CondFormats/SiPixelObjects/BuildFile.xml b/CondFormats/SiPixelObjects/BuildFile.xml index 1d9b8d6b19f53..ddd87c956d217 100644 --- a/CondFormats/SiPixelObjects/BuildFile.xml +++ b/CondFormats/SiPixelObjects/BuildFile.xml @@ -1,3 +1,4 @@ + @@ -12,6 +13,9 @@ + + + diff --git a/CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTHost.h b/CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTHost.h new file mode 100644 index 0000000000000..28361ab184073 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTHost.h @@ -0,0 +1,9 @@ +#ifndef CondFormats_SiPixelObjects_SiPixelGainCalibrationForHLTHost_h +#define CondFormats_SiPixelObjects_SiPixelGainCalibrationForHLTHost_h + +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h" + +using SiPixelGainCalibrationForHLTHost = PortableHostCollection; + +#endif // CondFormats_SiPixelObjects_SiPixelGainCalibrationForHLTHost_h diff --git a/CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h b/CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h new file mode 100644 index 0000000000000..03c1c37c61046 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h @@ -0,0 +1,42 @@ +#ifndef CondFormats_SiPixelObjects_interface_SiPixelGainCalibrationForHLTLayout_h +#define CondFormats_SiPixelObjects_interface_SiPixelGainCalibrationForHLTLayout_h + +#include +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +namespace siPixelGainsSoA { + struct DecodingStructure { + uint8_t gain; + uint8_t ped; + }; + + using Ranges = std::array; + using Cols = std::array; +} // namespace siPixelGainsSoA + +GENERATE_SOA_LAYOUT(SiPixelGainCalibrationForHLTLayout, + SOA_COLUMN(siPixelGainsSoA::DecodingStructure, v_pedestals), + + SOA_SCALAR(siPixelGainsSoA::Ranges, modStarts), + SOA_SCALAR(siPixelGainsSoA::Ranges, modEnds), + SOA_SCALAR(siPixelGainsSoA::Cols, modCols), + + SOA_SCALAR(float, minPed), + SOA_SCALAR(float, maxPed), + SOA_SCALAR(float, minGain), + SOA_SCALAR(float, maxGain), + SOA_SCALAR(float, pedPrecision), + SOA_SCALAR(float, gainPrecision), + + SOA_SCALAR(unsigned int, numberOfRowsAveragedOver), + SOA_SCALAR(unsigned int, nBinsToUseForEncoding), + SOA_SCALAR(unsigned int, deadFlag), + SOA_SCALAR(unsigned int, noisyFlag), + SOA_SCALAR(float, link)) + +using SiPixelGainCalibrationForHLTSoA = SiPixelGainCalibrationForHLTLayout<>; +using SiPixelGainCalibrationForHLTSoAView = SiPixelGainCalibrationForHLTSoA::View; +using SiPixelGainCalibrationForHLTSoAConstView = SiPixelGainCalibrationForHLTSoA::ConstView; + +#endif // CondFormats_SiPixelObjects_interface_SiPixelGainCalibrationForHLTLayout_h diff --git a/CondFormats/SiPixelObjects/interface/SiPixelMappingHost.h b/CondFormats/SiPixelObjects/interface/SiPixelMappingHost.h new file mode 100644 index 0000000000000..772a7a97e267b --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/SiPixelMappingHost.h @@ -0,0 +1,10 @@ +#ifndef CondFormats_SiPixelObjects_SiPixelMappingHost_h +#define CondFormats_SiPixelObjects_SiPixelMappingHost_h + +#include +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h" + +using SiPixelMappingHost = PortableHostCollection; + +#endif // CondFormats_SiPixelObjects_SiPixelMappingHost_h diff --git a/CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h b/CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h new file mode 100644 index 0000000000000..ef123d443c795 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h @@ -0,0 +1,24 @@ +#ifndef CondFormats_SiPixelObjects_interface_SiPixelMappingLayout_h +#define CondFormats_SiPixelObjects_interface_SiPixelMappingLayout_h + +#include +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h" + +GENERATE_SOA_LAYOUT(SiPixelMappingLayout, + SOA_COLUMN(unsigned int, fed), + SOA_COLUMN(unsigned int, link), + SOA_COLUMN(unsigned int, roc), + SOA_COLUMN(unsigned int, rawId), + SOA_COLUMN(unsigned int, rocInDet), + SOA_COLUMN(unsigned int, moduleId), + SOA_COLUMN(bool, badRocs), + SOA_COLUMN(unsigned char, modToUnpDefault), + SOA_SCALAR(unsigned int, size), + SOA_SCALAR(bool, hasQuality)) + +using SiPixelMappingSoA = SiPixelMappingLayout<>; +using SiPixelMappingSoAView = SiPixelMappingSoA::View; +using SiPixelMappingSoAConstView = SiPixelMappingSoA::ConstView; + +#endif // CondFormats_SiPixelObjects_interface_SiPixelMappingLayout_h diff --git a/CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h new file mode 100644 index 0000000000000..3c5e7094654c6 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h @@ -0,0 +1,13 @@ +#ifndef CondFormats_SiPixelObjects_interface_alpaka_SiPixelGainCalibrationForHLTDevice_h +#define CondFormats_SiPixelObjects_interface_alpaka_SiPixelGainCalibrationForHLTDevice_h + +#include +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using SiPixelGainCalibrationForHLTDevice = PortableCollection; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif // CondFormats_SiPixelObjects_interface_alpaka_SiPixelGainCalibrationForHLTDevice_h diff --git a/CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTUtilities.h b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTUtilities.h new file mode 100644 index 0000000000000..1fbce15dbe231 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTUtilities.h @@ -0,0 +1,41 @@ +#ifndef CondFormats_SiPixelObjects_interface_alpaka_SiPixelGainCalibrationForHLTUtilities_h +#define CondFormats_SiPixelObjects_interface_alpaka_SiPixelGainCalibrationForHLTUtilities_h + +#include +#include +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h" + +struct SiPixelGainUtilities { + ALPAKA_FN_HOST_ACC ALPAKA_FN_ACC ALPAKA_FN_INLINE static std::pair getPedAndGain( + const SiPixelGainCalibrationForHLTSoAConstView& view, + uint32_t moduleInd, + int col, + int row, + bool& isDeadColumn, + bool& isNoisyColumn) { + auto start = view.modStarts()[moduleInd]; + auto end = view.modEnds()[moduleInd]; + auto nCols = view.modCols()[moduleInd]; + // determine what averaged data block we are in (there should be 1 or 2 of these depending on if plaquette is 1 by X or 2 by X + unsigned int lengthOfColumnData = (end - start) / nCols; + unsigned int lengthOfAveragedDataInEachColumn = 2; // we always only have two values per column averaged block + unsigned int numberOfDataBlocksToSkip = row / view.numberOfRowsAveragedOver(); + + auto offset = start + col * lengthOfColumnData + lengthOfAveragedDataInEachColumn * numberOfDataBlocksToSkip; + assert(offset < end); + assert(offset < 3088384); + assert(0 == offset % 2); + + auto lp = view.v_pedestals(); + auto s = lp[offset / 2]; + + isDeadColumn = (s.ped & 0xFF) == view.deadFlag(); + isNoisyColumn = (s.ped & 0xFF) == view.noisyFlag(); + float decodeGain = float(s.gain & 0xFF) * view.gainPrecision() + view.minGain(); + float decodePed = float(s.ped & 0xFF) * view.pedPrecision() + view.minPed(); + + return std::make_pair(decodePed, decodeGain); + }; +}; + +#endif //CondFormats_SiPixelObjects_interface_alpaka_SiPixelGainCalibrationForHLTUtilities_h \ No newline at end of file diff --git a/CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h new file mode 100644 index 0000000000000..8a16caa0d7368 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h @@ -0,0 +1,17 @@ +#ifndef CondFormats_SiPixelObjects_interface_alpaka_SiPixelMappingDevice_h +#define CondFormats_SiPixelObjects_interface_alpaka_SiPixelMappingDevice_h + +#include +#include +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using SiPixelMappingDevice = PortableCollection; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif // DataFormats_SiPixelMappingSoA_alpaka_SiPixelClustersDevice_h diff --git a/CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingUtilities.h b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingUtilities.h new file mode 100644 index 0000000000000..56547cdb27f4d --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingUtilities.h @@ -0,0 +1,53 @@ +#ifndef CondFormats_SiPixelObjects_interface_alpaka_SiPixelMappingUtilities_h +#define CondFormats_SiPixelObjects_interface_alpaka_SiPixelMappingUtilities_h + +#include +#include +#include "CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + struct SiPixelMappingUtilities { + ALPAKA_FN_HOST_ACC ALPAKA_FN_ACC ALPAKA_FN_INLINE static bool hasQuality( + const SiPixelMappingSoAConstView& view) { + return view.hasQuality(); + } + + ALPAKA_FN_HOST_ACC ALPAKA_FN_ACC ALPAKA_FN_INLINE static cms::alpakatools::device_buffer + getModToUnpRegionalAsync(std::set const& modules, + const SiPixelFedCablingTree* cabling, + std::vector const& fedIds, + Queue& queue) { + auto modToUnpDevice = cms::alpakatools::make_device_buffer(queue, pixelgpudetails::MAX_SIZE); + auto modToUnpHost = cms::alpakatools::make_host_buffer(queue, pixelgpudetails::MAX_SIZE); + + unsigned int startFed = fedIds.front(); + unsigned int endFed = fedIds.back() - 1; + + sipixelobjects::CablingPathToDetUnit path; + int index = 1; + + for (unsigned int fed = startFed; fed <= endFed; fed++) { + for (unsigned int link = 1; link <= pixelgpudetails::MAX_LINK; link++) { + for (unsigned int roc = 1; roc <= pixelgpudetails::MAX_ROC; roc++) { + path = {fed, link, roc}; + const sipixelobjects::PixelROC* pixelRoc = cabling->findItem(path); + if (pixelRoc != nullptr) { + modToUnpHost[index] = (not modules.empty()) and (modules.find(pixelRoc->rawId()) == modules.end()); + } else { // store some dummy number + modToUnpHost[index] = true; + } + index++; + } + } + } + + alpaka::memcpy(queue, modToUnpDevice, modToUnpHost); + + return modToUnpDevice; + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif //CondFormats_SiPixelObjects_interface_alpaka_SiPixelMappingUtilities_h \ No newline at end of file diff --git a/CondFormats/SiPixelObjects/src/T_EventSetup_SiPixelGainCalibrationForHLTHost.cc b/CondFormats/SiPixelObjects/src/T_EventSetup_SiPixelGainCalibrationForHLTHost.cc new file mode 100644 index 0000000000000..be54c23dd8df6 --- /dev/null +++ b/CondFormats/SiPixelObjects/src/T_EventSetup_SiPixelGainCalibrationForHLTHost.cc @@ -0,0 +1,4 @@ +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTHost.h" +#include "FWCore/Utilities/interface/typelookup.h" + +TYPELOOKUP_DATA_REG(SiPixelGainCalibrationForHLTHost); diff --git a/CondFormats/SiPixelObjects/src/T_EventSetup_SiPixelMappingHost.cc b/CondFormats/SiPixelObjects/src/T_EventSetup_SiPixelMappingHost.cc new file mode 100644 index 0000000000000..27201b65add22 --- /dev/null +++ b/CondFormats/SiPixelObjects/src/T_EventSetup_SiPixelMappingHost.cc @@ -0,0 +1,4 @@ +#include "CondFormats/SiPixelObjects/interface/SiPixelMappingHost.h" +#include "FWCore/Utilities/interface/typelookup.h" + +TYPELOOKUP_DATA_REG(SiPixelMappingHost); \ No newline at end of file diff --git a/CondFormats/SiPixelObjects/src/alpaka/T_EventSetup_SiPixelGainCalibrationForHLTDevice.cc b/CondFormats/SiPixelObjects/src/alpaka/T_EventSetup_SiPixelGainCalibrationForHLTDevice.cc new file mode 100644 index 0000000000000..fec7ca3ba1c52 --- /dev/null +++ b/CondFormats/SiPixelObjects/src/alpaka/T_EventSetup_SiPixelGainCalibrationForHLTDevice.cc @@ -0,0 +1,4 @@ +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/typelookup.h" + +TYPELOOKUP_ALPAKA_DATA_REG(SiPixelGainCalibrationForHLTDevice); \ No newline at end of file diff --git a/CondFormats/SiPixelObjects/src/alpaka/T_EventSetup_SiPixelMappingDevice.cc b/CondFormats/SiPixelObjects/src/alpaka/T_EventSetup_SiPixelMappingDevice.cc new file mode 100644 index 0000000000000..0b86fdf64978b --- /dev/null +++ b/CondFormats/SiPixelObjects/src/alpaka/T_EventSetup_SiPixelMappingDevice.cc @@ -0,0 +1,4 @@ +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/typelookup.h" + +TYPELOOKUP_ALPAKA_DATA_REG(SiPixelMappingDevice); diff --git a/Configuration/ProcessModifiers/python/alpakaValidationPixel_cff.py b/Configuration/ProcessModifiers/python/alpakaValidationPixel_cff.py new file mode 100644 index 0000000000000..ebdb7d9e6981a --- /dev/null +++ b/Configuration/ProcessModifiers/python/alpakaValidationPixel_cff.py @@ -0,0 +1,6 @@ +import FWCore.ParameterSet.Config as cms + +# This modifier chain is for turning on DQM modules used for alpaka device/host validation for pixels + +alpakaValidationPixel = cms.Modifier() + diff --git a/Configuration/ProcessModifiers/python/alpakaValidation_cff.py b/Configuration/ProcessModifiers/python/alpakaValidation_cff.py new file mode 100644 index 0000000000000..3399bdda7c4df --- /dev/null +++ b/Configuration/ProcessModifiers/python/alpakaValidation_cff.py @@ -0,0 +1,11 @@ +import FWCore.ParameterSet.Config as cms + +from Configuration.ProcessModifiers.alpaka_cff import * +from Configuration.ProcessModifiers.alpakaValidationPixel_cff import * + +# This modifier chain is for turning on DQM modules used for alpaka device/host validation + +alpakaValidation = cms.ModifierChain( + alpaka, + alpakaValidationPixel +) diff --git a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py index ae80f67eeedc3..d0ddd64b7230a 100644 --- a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py +++ b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py @@ -885,6 +885,7 @@ def setup_(self, step, stepName, stepDict, k, properties): # - HLT on CPU # - Pixel-only reconstruction on CPU, with DQM and validation # - harvesting + upgradeWFs['PatatrackPixelOnlyCPU'] = PatatrackWorkflow( digi = { # the HLT menu is already set up for using GPUs if available and if the "gpu" modifier is enabled @@ -1504,6 +1505,53 @@ def setup_(self, step, stepName, stepDict, k, properties): offset = 0.597, ) + +#Alpaka workflows + +upgradeWFs['PatatrackPixelOnlyAlpaka'] = PatatrackWorkflow( + digi = { + '--procModifiers': 'alpaka' + }, + reco = { + '-s': 'RAW2DIGI:RawToDigi_pixelOnly,RECO:reconstruction_pixelTrackingOnly,VALIDATION:@pixelTrackingOnlyValidation,DQM:@pixelTrackingOnlyDQM', + '--procModifiers': 'alpaka' + }, + harvest = { + '-s': 'HARVESTING:@trackingOnlyValidation+@pixelTrackingOnlyDQM' + }, + suffix = 'Patatrack_PixelOnlyAlpaka', + offset = 0.55, +) + +upgradeWFs['PatatrackPixelOnlyAlpakaProfiling'] = PatatrackWorkflow( + digi = { + '--procModifiers': 'alpaka' + }, + reco = { + '-s': 'RAW2DIGI:RawToDigi_pixelOnly,RECO:reconstruction_pixelTrackingOnly', + '--procModifiers': 'alpaka', + '--customise' : 'RecoTracker/Configuration/customizePixelOnlyForProfiling.customizePixelOnlyForProfilingGPUOnly' + }, + harvest = None, + suffix = 'Patatrack_PixelOnlyAlpaka_Profiling', + offset = 0.554, +) + +upgradeWFs['PatatrackPixelOnlyAlpakaValidation'] = PatatrackWorkflow( + digi = { + # the HLT menu is already set up for using GPUs if available and if the "gpu" modifier is enabled + }, + reco = { + '-s': 'RAW2DIGI:RawToDigi_pixelOnly,RECO:reconstruction_pixelTrackingOnly,VALIDATION:@pixelTrackingOnlyValidation,DQM:@pixelTrackingOnlyDQM', + '--procModifiers': 'alpakaValidation' + }, + harvest = { + '-s': 'HARVESTING:@trackingOnlyValidation+@pixelTrackingOnlyDQM' + }, + suffix = 'Patatrack_PixelOnlyAlpakaValidation', + offset = 0.557, +) + # end of Patatrack workflows class UpgradeWorkflow_ProdLike(UpgradeWorkflow): diff --git a/Configuration/StandardSequences/python/Services_cff.py b/Configuration/StandardSequences/python/Services_cff.py index d7530b51f0ed9..f081a8817f361 100644 --- a/Configuration/StandardSequences/python/Services_cff.py +++ b/Configuration/StandardSequences/python/Services_cff.py @@ -13,11 +13,17 @@ def _addProcessAccelerators(process): process.load("Configuration.StandardSequences.Accelerators_cff") +def _addProcessAcceleratorsAlpaka(process): + process.load("HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka_cfi") + from Configuration.ProcessModifiers.gpu_cff import gpu from Configuration.ProcessModifiers.pixelNtupletFit_cff import pixelNtupletFit from Configuration.ProcessModifiers.alpaka_cff import alpaka -modifyConfigurationStandardSequencesServicesAddProcessAccelerators_ = (gpu | pixelNtupletFit | alpaka).makeProcessModifier(_addProcessAccelerators) +modifyConfigurationStandardSequencesServicesAddProcessAccelerators_ = (gpu | pixelNtupletFit ).makeProcessModifier(_addProcessAccelerators) + +modifyConfigurationStandardSequencesServicesAddProcessAcceleratorsAlpaka_ = alpaka.makeProcessModifier(_addProcessAcceleratorsAlpaka) + # load TritonService when SONIC workflow is enabled def _addTritonService(process): diff --git a/DQM/SiPixelHeterogeneous/plugins/BuildFile.xml b/DQM/SiPixelHeterogeneous/plugins/BuildFile.xml index 66adf1666762e..8cf73f08268d7 100644 --- a/DQM/SiPixelHeterogeneous/plugins/BuildFile.xml +++ b/DQM/SiPixelHeterogeneous/plugins/BuildFile.xml @@ -5,8 +5,11 @@ + + + + - diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareRecHitsSoAAlpaka.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareRecHitsSoAAlpaka.cc new file mode 100644 index 0000000000000..5a7ba189ab440 --- /dev/null +++ b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareRecHitsSoAAlpaka.cc @@ -0,0 +1,244 @@ +#include "DQMServices/Core/interface/MonitorElement.h" +#include "DQMServices/Core/interface/DQMEDAnalyzer.h" +#include "DQMServices/Core/interface/DQMStore.h" +#include "DataFormats/Math/interface/approx_atan2.h" +#include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitSoAHost.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" +#include "Geometry/CommonTopologies/interface/PixelTopology.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" + +template +class SiPixelCompareRecHitsSoAAlpaka : public DQMEDAnalyzer { +public: + using HitsOnHost = TrackingRecHitHost; + + explicit SiPixelCompareRecHitsSoAAlpaka(const edm::ParameterSet&); + ~SiPixelCompareRecHitsSoAAlpaka() override = default; + void dqmBeginRun(const edm::Run&, const edm::EventSetup&) override; + void bookHistograms(DQMStore::IBooker& ibooker, edm::Run const& iRun, edm::EventSetup const& iSetup) override; + void analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) override; + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + const edm::ESGetToken geomToken_; + const edm::ESGetToken topoToken_; + const edm::EDGetTokenT tokenSoAHitsHost_; //these two are both on Host but originally they have been + const edm::EDGetTokenT tokenSoAHitsDevice_; //produced on Host or on Device + const std::string topFolderName_; + const float mind2cut_; + static constexpr uint32_t invalidHit_ = std::numeric_limits::max(); + static constexpr float micron_ = 10000.; + const TrackerGeometry* tkGeom_ = nullptr; + const TrackerTopology* tTopo_ = nullptr; + MonitorElement* hnHits_; + MonitorElement* hBchargeL_[4]; // max 4 barrel hits + MonitorElement* hBsizexL_[4]; + MonitorElement* hBsizeyL_[4]; + MonitorElement* hBposxL_[4]; + MonitorElement* hBposyL_[4]; + MonitorElement* hFchargeD_[2][12]; // max 12 endcap disks + MonitorElement* hFsizexD_[2][12]; + MonitorElement* hFsizeyD_[2][12]; + MonitorElement* hFposxD_[2][12]; + MonitorElement* hFposyD_[2][12]; + //differences + MonitorElement* hBchargeDiff_; + MonitorElement* hFchargeDiff_; + MonitorElement* hBsizeXDiff_; + MonitorElement* hFsizeXDiff_; + MonitorElement* hBsizeYDiff_; + MonitorElement* hFsizeYDiff_; + MonitorElement* hBposXDiff_; + MonitorElement* hFposXDiff_; + MonitorElement* hBposYDiff_; + MonitorElement* hFposYDiff_; +}; + +// +// constructors +// +template +SiPixelCompareRecHitsSoAAlpaka::SiPixelCompareRecHitsSoAAlpaka(const edm::ParameterSet& iConfig) + : geomToken_(esConsumes()), + topoToken_(esConsumes()), + tokenSoAHitsHost_(consumes(iConfig.getParameter("pixelHitsSrcCPU"))), + tokenSoAHitsDevice_(consumes(iConfig.getParameter("pixelHitsSrcGPU"))), + topFolderName_(iConfig.getParameter("topFolderName")), + mind2cut_(iConfig.getParameter("minD2cut")) {} + +// +// Begin Run +// +template +void SiPixelCompareRecHitsSoAAlpaka::dqmBeginRun(const edm::Run& iRun, const edm::EventSetup& iSetup) { + tkGeom_ = &iSetup.getData(geomToken_); + tTopo_ = &iSetup.getData(topoToken_); +} + +// +// -- Analyze +// +template +void SiPixelCompareRecHitsSoAAlpaka::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) { + const auto& rhsoaHandleHost = iEvent.getHandle(tokenSoAHitsHost_); + const auto& rhsoaHandleDevice = iEvent.getHandle(tokenSoAHitsDevice_); + if (not rhsoaHandleHost or not rhsoaHandleDevice) { + edm::LogWarning out("SiPixelCompareRecHitsSoAAlpaka"); + if (not rhsoaHandleHost) { + out << "reference (Host) rechits not found; "; + } + if (not rhsoaHandleDevice) { + out << "target (Device) rechits not found; "; + } + out << "the comparison will not run."; + return; + } + + auto const& rhsoaHost = *rhsoaHandleHost; + auto const& rhsoaDevice = *rhsoaHandleDevice; + + auto const& soa2dHost = rhsoaHost.const_view(); + auto const& soa2dDevice = rhsoaDevice.const_view(); + + uint32_t nHitsHost = soa2dHost.metadata().size(); + uint32_t nHitsDevice = soa2dDevice.metadata().size(); + + hnHits_->Fill(nHitsHost, nHitsDevice); + auto detIds = tkGeom_->detUnitIds(); + for (uint32_t i = 0; i < nHitsHost; i++) { + float minD = mind2cut_; + uint32_t matchedHit = invalidHit_; + uint16_t indHost = soa2dHost[i].detectorIndex(); + float xLocalHost = soa2dHost[i].xLocal(); + float yLocalHost = soa2dHost[i].yLocal(); + for (uint32_t j = 0; j < nHitsDevice; j++) { + if (soa2dDevice.detectorIndex(j) == indHost) { + float dx = xLocalHost - soa2dDevice[j].xLocal(); + float dy = yLocalHost - soa2dDevice[j].yLocal(); + float distance = dx * dx + dy * dy; + if (distance < minD) { + minD = distance; + matchedHit = j; + } + } + } + DetId id = detIds[indHost]; + uint32_t chargeHost = soa2dHost[i].chargeAndStatus().charge; + int16_t sizeXHost = std::ceil(float(std::abs(soa2dHost[i].clusterSizeX()) / 8.)); + int16_t sizeYHost = std::ceil(float(std::abs(soa2dHost[i].clusterSizeY()) / 8.)); + uint32_t chargeDevice = 0; + int16_t sizeXDevice = -99; + int16_t sizeYDevice = -99; + float xLocalDevice = -999.; + float yLocalDevice = -999.; + if (matchedHit != invalidHit_) { + chargeDevice = soa2dDevice[matchedHit].chargeAndStatus().charge; + sizeXDevice = std::ceil(float(std::abs(soa2dDevice[matchedHit].clusterSizeX()) / 8.)); + sizeYDevice = std::ceil(float(std::abs(soa2dDevice[matchedHit].clusterSizeY()) / 8.)); + xLocalDevice = soa2dDevice[matchedHit].xLocal(); + yLocalDevice = soa2dDevice[matchedHit].yLocal(); + } + switch (id.subdetId()) { + case PixelSubdetector::PixelBarrel: + hBchargeL_[tTopo_->pxbLayer(id) - 1]->Fill(chargeHost, chargeDevice); + hBsizexL_[tTopo_->pxbLayer(id) - 1]->Fill(sizeXHost, sizeXDevice); + hBsizeyL_[tTopo_->pxbLayer(id) - 1]->Fill(sizeYHost, sizeYDevice); + hBposxL_[tTopo_->pxbLayer(id) - 1]->Fill(xLocalHost, xLocalDevice); + hBposyL_[tTopo_->pxbLayer(id) - 1]->Fill(yLocalHost, yLocalDevice); + hBchargeDiff_->Fill(chargeHost - chargeDevice); + hBsizeXDiff_->Fill(sizeXHost - sizeXDevice); + hBsizeYDiff_->Fill(sizeYHost - sizeYDevice); + hBposXDiff_->Fill(micron_ * (xLocalHost - xLocalDevice)); + hBposYDiff_->Fill(micron_ * (yLocalHost - yLocalDevice)); + break; + case PixelSubdetector::PixelEndcap: + hFchargeD_[tTopo_->pxfSide(id) - 1][tTopo_->pxfDisk(id) - 1]->Fill(chargeHost, chargeDevice); + hFsizexD_[tTopo_->pxfSide(id) - 1][tTopo_->pxfDisk(id) - 1]->Fill(sizeXHost, sizeXDevice); + hFsizeyD_[tTopo_->pxfSide(id) - 1][tTopo_->pxfDisk(id) - 1]->Fill(sizeYHost, sizeYDevice); + hFposxD_[tTopo_->pxfSide(id) - 1][tTopo_->pxfDisk(id) - 1]->Fill(xLocalHost, xLocalDevice); + hFposyD_[tTopo_->pxfSide(id) - 1][tTopo_->pxfDisk(id) - 1]->Fill(yLocalHost, yLocalDevice); + hFchargeDiff_->Fill(chargeHost - chargeDevice); + hFsizeXDiff_->Fill(sizeXHost - sizeXDevice); + hFsizeYDiff_->Fill(sizeYHost - sizeYDevice); + hFposXDiff_->Fill(micron_ * (xLocalHost - xLocalDevice)); + hFposYDiff_->Fill(micron_ * (yLocalHost - yLocalDevice)); + break; + } + } +} + +// +// -- Book Histograms +// +template +void SiPixelCompareRecHitsSoAAlpaka::bookHistograms(DQMStore::IBooker& iBook, + edm::Run const& iRun, + edm::EventSetup const& iSetup) { + iBook.cd(); + iBook.setCurrentFolder(topFolderName_); + + // clang-format off + //Global + hnHits_ = iBook.book2I("nHits", "HostvsDevice RecHits per event;#Host RecHits;#Device RecHits", 200, 0, 5000,200, 0, 5000); + //Barrel Layer + for(unsigned int il=0;ilnumberOfLayers(PixelSubdetector::PixelBarrel);il++){ + hBchargeL_[il] = iBook.book2I(Form("recHitsBLay%dCharge",il+1), Form("HostvsDevice RecHits Charge Barrel Layer%d;Host Charge;Device Charge",il+1), 250, 0, 100000, 250, 0, 100000); + hBsizexL_[il] = iBook.book2I(Form("recHitsBLay%dSizex",il+1), Form("HostvsDevice RecHits SizeX Barrel Layer%d;Host SizeX;Device SizeX",il+1), 30, 0, 30, 30, 0, 30); + hBsizeyL_[il] = iBook.book2I(Form("recHitsBLay%dSizey",il+1), Form("HostvsDevice RecHits SizeY Barrel Layer%d;Host SizeY;Device SizeY",il+1), 30, 0, 30, 30, 0, 30); + hBposxL_[il] = iBook.book2D(Form("recHitsBLay%dPosx",il+1), Form("HostvsDevice RecHits x-pos in Barrel Layer%d;Host pos x;Device pos x",il+1), 200, -5, 5, 200,-5,5); + hBposyL_[il] = iBook.book2D(Form("recHitsBLay%dPosy",il+1), Form("HostvsDevice RecHits y-pos in Barrel Layer%d;Host pos y;Device pos y",il+1), 200, -5, 5, 200,-5,5); + } + //Endcaps + //Endcaps Disk + for(int is=0;is<2;is++){ + int sign=is==0? -1:1; + for(unsigned int id=0;idnumberOfLayers(PixelSubdetector::PixelEndcap);id++){ + hFchargeD_[is][id] = iBook.book2I(Form("recHitsFDisk%+dCharge",id*sign+sign), Form("HostvsDevice RecHits Charge Endcaps Disk%+d;Host Charge;Device Charge",id*sign+sign), 250, 0, 100000, 250, 0, 100000); + hFsizexD_[is][id] = iBook.book2I(Form("recHitsFDisk%+dSizex",id*sign+sign), Form("HostvsDevice RecHits SizeX Endcaps Disk%+d;Host SizeX;Device SizeX",id*sign+sign), 30, 0, 30, 30, 0, 30); + hFsizeyD_[is][id] = iBook.book2I(Form("recHitsFDisk%+dSizey",id*sign+sign), Form("HostvsDevice RecHits SizeY Endcaps Disk%+d;Host SizeY;Device SizeY",id*sign+sign), 30, 0, 30, 30, 0, 30); + hFposxD_[is][id] = iBook.book2D(Form("recHitsFDisk%+dPosx",id*sign+sign), Form("HostvsDevice RecHits x-pos Endcaps Disk%+d;Host pos x;Device pos x",id*sign+sign), 200, -5, 5, 200, -5, 5); + hFposyD_[is][id] = iBook.book2D(Form("recHitsFDisk%+dPosy",id*sign+sign), Form("HostvsDevice RecHits y-pos Endcaps Disk%+d;Host pos y;Device pos y",id*sign+sign), 200, -5, 5, 200, -5, 5); + } + } + //1D differences + hBchargeDiff_ = iBook.book1D("rechitChargeDiffBpix","Charge differnce of rechits in BPix; rechit charge difference (Host - Device)", 101, -50.5, 50.5); + hFchargeDiff_ = iBook.book1D("rechitChargeDiffFpix","Charge differnce of rechits in FPix; rechit charge difference (Host - Device)", 101, -50.5, 50.5); + hBsizeXDiff_ = iBook.book1D("rechitsizeXDiffBpix","SizeX difference of rechits in BPix; rechit sizex difference (Host - Device)", 21, -10.5, 10.5); + hFsizeXDiff_ = iBook.book1D("rechitsizeXDiffFpix","SizeX difference of rechits in FPix; rechit sizex difference (Host - Device)", 21, -10.5, 10.5); + hBsizeYDiff_ = iBook.book1D("rechitsizeYDiffBpix","SizeY difference of rechits in BPix; rechit sizey difference (Host - Device)", 21, -10.5, 10.5); + hFsizeYDiff_ = iBook.book1D("rechitsizeYDiffFpix","SizeY difference of rechits in FPix; rechit sizey difference (Host - Device)", 21, -10.5, 10.5); + hBposXDiff_ = iBook.book1D("rechitsposXDiffBpix","x-position difference of rechits in BPix; rechit x-pos difference (Host - Device)", 1000, -10, 10); + hFposXDiff_ = iBook.book1D("rechitsposXDiffFpix","x-position difference of rechits in FPix; rechit x-pos difference (Host - Device)", 1000, -10, 10); + hBposYDiff_ = iBook.book1D("rechitsposYDiffBpix","y-position difference of rechits in BPix; rechit y-pos difference (Host - Device)", 1000, -10, 10); + hFposYDiff_ = iBook.book1D("rechitsposYDiffFpix","y-position difference of rechits in FPix; rechit y-pos difference (Host - Device)", 1000, -10, 10); +} + +template +void SiPixelCompareRecHitsSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + // monitorpixelRecHitsSoAAlpaka + edm::ParameterSetDescription desc; + desc.add("pixelHitsSrcCPU", edm::InputTag("siPixelRecHitsPreSplittingAlpakaSerial")); + desc.add("pixelHitsSrcGPU", edm::InputTag("siPixelRecHitsPreSplittingAlpaka")); + desc.add("topFolderName", "SiPixelHeterogeneous/PixelRecHitsCompareDeviceVSHost"); + desc.add("minD2cut", 0.0001); + descriptions.addWithDefaultLabel(desc); +} + +using SiPixelPhase1CompareRecHitsSoAAlpaka = SiPixelCompareRecHitsSoAAlpaka; +using SiPixelPhase2CompareRecHitsSoAAlpaka = SiPixelCompareRecHitsSoAAlpaka; +using SiPixelHIonPhase1CompareRecHitsSoAAlpaka = SiPixelCompareRecHitsSoAAlpaka; + +#include "FWCore/Framework/interface/MakerMacros.h" +DEFINE_FWK_MODULE(SiPixelPhase1CompareRecHitsSoAAlpaka); +DEFINE_FWK_MODULE(SiPixelPhase2CompareRecHitsSoAAlpaka); +DEFINE_FWK_MODULE(SiPixelHIonPhase1CompareRecHitsSoAAlpaka); diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTrackSoAAlpaka.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTrackSoAAlpaka.cc new file mode 100644 index 0000000000000..92ca7cae6d3e1 --- /dev/null +++ b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTrackSoAAlpaka.cc @@ -0,0 +1,318 @@ +// -*- C++ -*- +// Package: SiPixelCompareTrackSoAAlpaka +// Class: SiPixelCompareTrackSoAAlpaka +// +/**\class SiPixelCompareTrackSoAAlpaka SiPixelCompareTrackSoAAlpaka.cc +*/ +// +// Author: Suvankar Roy Chowdhury +// + +// for string manipulations +#include +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/Math/interface/deltaR.h" +#include "DataFormats/Math/interface/deltaPhi.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/Utilities/interface/InputTag.h" +// DQM Histograming +#include "DQMServices/Core/interface/MonitorElement.h" +#include "DQMServices/Core/interface/DQMEDAnalyzer.h" +#include "DQMServices/Core/interface/DQMStore.h" +// DataFormats +#include "DataFormats/TrackSoA/interface/TrackSoAHost.h" +#include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h" + +namespace { + // same logic used for the MTV: + // cf https://github.com/cms-sw/cmssw/blob/master/Validation/RecoTrack/src/MTVHistoProducerAlgoForTracker.cc + typedef dqm::reco::DQMStore DQMStore; + + void setBinLog(TAxis* axis) { + int bins = axis->GetNbins(); + float from = axis->GetXmin(); + float to = axis->GetXmax(); + float width = (to - from) / bins; + std::vector new_bins(bins + 1, 0); + for (int i = 0; i <= bins; i++) { + new_bins[i] = TMath::Power(10, from + i * width); + } + axis->Set(bins, new_bins.data()); + } + + void setBinLogX(TH1* h) { + TAxis* axis = h->GetXaxis(); + setBinLog(axis); + } + void setBinLogY(TH1* h) { + TAxis* axis = h->GetYaxis(); + setBinLog(axis); + } + + template + dqm::reco::MonitorElement* make2DIfLog(DQMStore::IBooker& ibook, bool logx, bool logy, Args&&... args) { + auto h = std::make_unique(std::forward(args)...); + if (logx) + setBinLogX(h.get()); + if (logy) + setBinLogY(h.get()); + const auto& name = h->GetName(); + return ibook.book2I(name, h.release()); + } +} // namespace + +template +class SiPixelCompareTrackSoAAlpaka : public DQMEDAnalyzer { +public: + using PixelTrackSoA = TrackSoAHost; + + explicit SiPixelCompareTrackSoAAlpaka(const edm::ParameterSet&); + ~SiPixelCompareTrackSoAAlpaka() override = default; + void bookHistograms(DQMStore::IBooker& ibooker, edm::Run const& iRun, edm::EventSetup const& iSetup) override; + void analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) override; + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + const edm::EDGetTokenT tokenSoATrackCPU_; + const edm::EDGetTokenT tokenSoATrackGPU_; + const std::string topFolderName_; + const bool useQualityCut_; + const pixelTrack::Quality minQuality_; + const float dr2cut_; + MonitorElement* hnTracks_; + MonitorElement* hnLooseAndAboveTracks_; + MonitorElement* hnLooseAndAboveTracks_matched_; + MonitorElement* hnHits_; + MonitorElement* hnHitsVsPhi_; + MonitorElement* hnHitsVsEta_; + MonitorElement* hnLayers_; + MonitorElement* hnLayersVsPhi_; + MonitorElement* hnLayersVsEta_; + MonitorElement* hCharge_; + MonitorElement* hchi2_; + MonitorElement* hChi2VsPhi_; + MonitorElement* hChi2VsEta_; + MonitorElement* hpt_; + MonitorElement* hptLogLog_; + MonitorElement* heta_; + MonitorElement* hphi_; + MonitorElement* hz_; + MonitorElement* htip_; + MonitorElement* hquality_; + //1D differences + MonitorElement* hptdiffMatched_; + MonitorElement* hCurvdiffMatched_; + MonitorElement* hetadiffMatched_; + MonitorElement* hphidiffMatched_; + MonitorElement* hzdiffMatched_; + MonitorElement* htipdiffMatched_; + + //for matching eff vs region: derive the ratio at harvesting + MonitorElement* hpt_eta_tkAllCPU_; + MonitorElement* hpt_eta_tkAllCPUMatched_; + MonitorElement* hphi_z_tkAllCPU_; + MonitorElement* hphi_z_tkAllCPUMatched_; +}; + +// +// constructors +// + +template +SiPixelCompareTrackSoAAlpaka::SiPixelCompareTrackSoAAlpaka(const edm::ParameterSet& iConfig) + : tokenSoATrackCPU_(consumes(iConfig.getParameter("pixelTrackSrcCPU"))), + tokenSoATrackGPU_(consumes(iConfig.getParameter("pixelTrackSrcGPU"))), + topFolderName_(iConfig.getParameter("topFolderName")), + useQualityCut_(iConfig.getParameter("useQualityCut")), + minQuality_(pixelTrack::qualityByName(iConfig.getParameter("minQuality"))), + dr2cut_(iConfig.getParameter("deltaR2cut")) {} + +// +// -- Analyze +// +template +void SiPixelCompareTrackSoAAlpaka::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) { + using helper = TracksUtilities; + const auto& tsoaHandleCPU = iEvent.getHandle(tokenSoATrackCPU_); + const auto& tsoaHandleGPU = iEvent.getHandle(tokenSoATrackGPU_); + if (not tsoaHandleCPU or not tsoaHandleGPU) { + edm::LogWarning out("SiPixelCompareTrackSoAAlpaka"); + if (not tsoaHandleCPU) { + out << "reference (cpu) tracks not found; "; + } + if (not tsoaHandleGPU) { + out << "target (gpu) tracks not found; "; + } + out << "the comparison will not run."; + return; + } + + auto const& tsoaCPU = *tsoaHandleCPU; + auto const& tsoaGPU = *tsoaHandleGPU; + auto maxTracksCPU = tsoaCPU.view().metadata().size(); //this should be same for both? + auto maxTracksGPU = tsoaGPU.view().metadata().size(); //this should be same for both? + auto const* qualityCPU = tsoaCPU.view().quality(); + auto const* qualityGPU = tsoaGPU.view().quality(); + int32_t nTracksCPU = 0; + int32_t nTracksGPU = 0; + int32_t nLooseAndAboveTracksCPU = 0; + int32_t nLooseAndAboveTracksCPU_matchedGPU = 0; + int32_t nLooseAndAboveTracksGPU = 0; + + //Loop over GPU tracks and store the indices of the loose tracks. Whats happens if useQualityCut_ is false? + std::vector looseTrkidxGPU; + for (int32_t jt = 0; jt < maxTracksGPU; ++jt) { + if (helper::nHits(tsoaGPU.view(), jt) == 0) + break; // this is a guard + if (!(tsoaGPU.view()[jt].pt() > 0.)) + continue; + nTracksGPU++; + if (useQualityCut_ && qualityGPU[jt] < minQuality_) + continue; + nLooseAndAboveTracksGPU++; + looseTrkidxGPU.emplace_back(jt); + } + + //Now loop over CPU tracks//nested loop for loose gPU tracks + for (int32_t it = 0; it < maxTracksCPU; ++it) { + int nHitsCPU = helper::nHits(tsoaCPU.view(), it); + + if (nHitsCPU == 0) + break; // this is a guard + + float ptCPU = tsoaCPU.view()[it].pt(); + float etaCPU = tsoaCPU.view()[it].eta(); + float phiCPU = helper::phi(tsoaCPU.view(), it); + float zipCPU = helper::zip(tsoaCPU.view(), it); + float tipCPU = helper::tip(tsoaCPU.view(), it); + + if (!(ptCPU > 0.)) + continue; + nTracksCPU++; + if (useQualityCut_ && qualityCPU[it] < minQuality_) + continue; + nLooseAndAboveTracksCPU++; + //Now loop over loose GPU trk and find the closest in DeltaR//do we need pt cut? + const int32_t notFound = -1; + int32_t closestTkidx = notFound; + float mindr2 = dr2cut_; + + for (auto gid : looseTrkidxGPU) { + float etaGPU = tsoaGPU.view()[gid].eta(); + float phiGPU = helper::phi(tsoaGPU.view(), gid); + float dr2 = reco::deltaR2(etaCPU, phiCPU, etaGPU, phiGPU); + if (dr2 > dr2cut_) + continue; // this is arbitrary + if (mindr2 > dr2) { + mindr2 = dr2; + closestTkidx = gid; + } + } + + hpt_eta_tkAllCPU_->Fill(etaCPU, ptCPU); //all CPU tk + hphi_z_tkAllCPU_->Fill(phiCPU, zipCPU); + if (closestTkidx == notFound) + continue; + nLooseAndAboveTracksCPU_matchedGPU++; + + hchi2_->Fill(tsoaCPU.view()[it].chi2(), tsoaGPU.view()[closestTkidx].chi2()); + hCharge_->Fill(helper::charge(tsoaCPU.view(), it), helper::charge(tsoaGPU.view(), closestTkidx)); + hnHits_->Fill(helper::nHits(tsoaCPU.view(), it), helper::nHits(tsoaGPU.view(), closestTkidx)); + hnLayers_->Fill(tsoaCPU.view()[it].nLayers(), tsoaGPU.view()[closestTkidx].nLayers()); + hpt_->Fill(tsoaCPU.view()[it].pt(), tsoaGPU.view()[closestTkidx].pt()); + hptLogLog_->Fill(tsoaCPU.view()[it].pt(), tsoaGPU.view()[closestTkidx].pt()); + heta_->Fill(etaCPU, tsoaGPU.view()[closestTkidx].eta()); + hphi_->Fill(phiCPU, helper::phi(tsoaGPU.view(), closestTkidx)); + hz_->Fill(zipCPU, helper::zip(tsoaGPU.view(), closestTkidx)); + htip_->Fill(tipCPU, helper::tip(tsoaGPU.view(), closestTkidx)); + hptdiffMatched_->Fill(ptCPU - tsoaGPU.view()[closestTkidx].pt()); + hCurvdiffMatched_->Fill((helper::charge(tsoaCPU.view(), it) / tsoaCPU.view()[it].pt()) - + (helper::charge(tsoaGPU.view(), closestTkidx) / tsoaGPU.view()[closestTkidx].pt())); + hetadiffMatched_->Fill(etaCPU - tsoaGPU.view()[closestTkidx].eta()); + hphidiffMatched_->Fill(reco::deltaPhi(phiCPU, helper::phi(tsoaGPU.view(), closestTkidx))); + hzdiffMatched_->Fill(zipCPU - helper::zip(tsoaGPU.view(), closestTkidx)); + htipdiffMatched_->Fill(tipCPU - helper::tip(tsoaGPU.view(), closestTkidx)); + hpt_eta_tkAllCPUMatched_->Fill(etaCPU, tsoaCPU.view()[it].pt()); //matched to gpu + hphi_z_tkAllCPUMatched_->Fill(etaCPU, zipCPU); + } + hnTracks_->Fill(nTracksCPU, nTracksGPU); + hnLooseAndAboveTracks_->Fill(nLooseAndAboveTracksCPU, nLooseAndAboveTracksGPU); + hnLooseAndAboveTracks_matched_->Fill(nLooseAndAboveTracksCPU, nLooseAndAboveTracksCPU_matchedGPU); +} + +// +// -- Book Histograms +// +template +void SiPixelCompareTrackSoAAlpaka::bookHistograms(DQMStore::IBooker& iBook, + edm::Run const& iRun, + edm::EventSetup const& iSetup) { + iBook.cd(); + iBook.setCurrentFolder(topFolderName_); + + // clang-format off + std::string toRep = "Number of tracks"; + // FIXME: all the 2D correlation plots are quite heavy in terms of memory consumption, so a as soon as DQM supports THnSparse + // these should be moved to a less resource consuming format + hnTracks_ = iBook.book2I("nTracks", fmt::sprintf("%s per event; CPU; GPU",toRep), 501, -0.5, 500.5, 501, -0.5, 500.5); + hnLooseAndAboveTracks_ = iBook.book2I("nLooseAndAboveTracks", fmt::sprintf("%s (quality #geq loose) per event; CPU; GPU",toRep), 501, -0.5, 500.5, 501, -0.5, 500.5); + hnLooseAndAboveTracks_matched_ = iBook.book2I("nLooseAndAboveTracks_matched", fmt::sprintf("%s (quality #geq loose) per event; CPU; GPU",toRep), 501, -0.5, 500.5, 501, -0.5, 500.5); + + toRep = "Number of all RecHits per track (quality #geq loose)"; + hnHits_ = iBook.book2I("nRecHits", fmt::sprintf("%s;CPU;GPU",toRep), 15, -0.5, 14.5, 15, -0.5, 14.5); + + toRep = "Number of all layers per track (quality #geq loose)"; + hnLayers_ = iBook.book2I("nLayers", fmt::sprintf("%s;CPU;GPU",toRep), 15, -0.5, 14.5, 15, -0.5, 14.5); + + toRep = "Track (quality #geq loose) #chi^{2}/ndof"; + hchi2_ = iBook.book2I("nChi2ndof", fmt::sprintf("%s;CPU;GPU",toRep), 40, 0., 20., 40, 0., 20.); + + toRep = "Track (quality #geq loose) charge"; + hCharge_ = iBook.book2I("charge",fmt::sprintf("%s;CPU;GPU",toRep),3, -1.5, 1.5, 3, -1.5, 1.5); + + hpt_ = iBook.book2I("pt", "Track (quality #geq loose) p_{T} [GeV];CPU;GPU", 200, 0., 200., 200, 0., 200.); + hptLogLog_ = make2DIfLog(iBook, true, true, "ptLogLog", "Track (quality #geq loose) p_{T} [GeV];CPU;GPU", 200, log10(0.5), log10(200.), 200, log10(0.5), log10(200.)); + heta_ = iBook.book2I("eta", "Track (quality #geq loose) #eta;CPU;GPU", 30, -3., 3., 30, -3., 3.); + hphi_ = iBook.book2I("phi", "Track (quality #geq loose) #phi;CPU;GPU", 30, -M_PI, M_PI, 30, -M_PI, M_PI); + hz_ = iBook.book2I("z", "Track (quality #geq loose) z [cm];CPU;GPU", 30, -30., 30., 30, -30., 30.); + htip_ = iBook.book2I("tip", "Track (quality #geq loose) TIP [cm];CPU;GPU", 100, -0.5, 0.5, 100, -0.5, 0.5); + //1D difference plots + hptdiffMatched_ = iBook.book1D("ptdiffmatched", " p_{T} diff [GeV] between matched tracks; #Delta p_{T} [GeV]", 60, -30., 30.); + hCurvdiffMatched_ = iBook.book1D("curvdiffmatched", "q/p_{T} diff [GeV] between matched tracks; #Delta q/p_{T} [GeV]", 60, -30., 30.); + hetadiffMatched_ = iBook.book1D("etadiffmatched", " #eta diff between matched tracks; #Delta #eta", 160, -0.04 ,0.04); + hphidiffMatched_ = iBook.book1D("phidiffmatched", " #phi diff between matched tracks; #Delta #phi", 160, -0.04 ,0.04); + hzdiffMatched_ = iBook.book1D("zdiffmatched", " z diff between matched tracks; #Delta z [cm]", 300, -1.5, 1.5); + htipdiffMatched_ = iBook.book1D("tipdiffmatched", " TIP diff between matched tracks; #Delta TIP [cm]", 300, -1.5, 1.5); + //2D plots for eff + hpt_eta_tkAllCPU_ = iBook.book2I("ptetatrkAllCPU", "Track (quality #geq loose) on CPU; #eta; p_{T} [GeV];", 30, -M_PI, M_PI, 200, 0., 200.); + hpt_eta_tkAllCPUMatched_ = iBook.book2I("ptetatrkAllCPUmatched", "Track (quality #geq loose) on CPU matched to GPU track; #eta; p_{T} [GeV];", 30, -M_PI, M_PI, 200, 0., 200.); + + hphi_z_tkAllCPU_ = iBook.book2I("phiztrkAllCPU", "Track (quality #geq loose) on CPU; #phi; z [cm];", 30, -M_PI, M_PI, 30, -30., 30.); + hphi_z_tkAllCPUMatched_ = iBook.book2I("phiztrkAllCPUmatched", "Track (quality #geq loose) on CPU; #phi; z [cm];", 30, -M_PI, M_PI, 30, -30., 30.); + +} + +template +void SiPixelCompareTrackSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + // monitorpixelTrackSoA + edm::ParameterSetDescription desc; + desc.add("pixelTrackSrcCPU", edm::InputTag("pixelTracksAlpakaSerial")); + desc.add("pixelTrackSrcGPU", edm::InputTag("pixelTracksAlpaka")); + desc.add("topFolderName", "SiPixelHeterogeneous/PixelTrackCompareDeviceVSHost"); + desc.add("useQualityCut", true); + desc.add("minQuality", "loose"); + desc.add("deltaR2cut", 0.04); + descriptions.addWithDefaultLabel(desc); +} + +using SiPixelPhase1CompareTrackSoAAlpaka = SiPixelCompareTrackSoAAlpaka; +using SiPixelPhase2CompareTrackSoAAlpaka = SiPixelCompareTrackSoAAlpaka; +using SiPixelHIonPhase1CompareTrackSoAAlpaka = SiPixelCompareTrackSoAAlpaka; + +DEFINE_FWK_MODULE(SiPixelPhase1CompareTrackSoAAlpaka); +DEFINE_FWK_MODULE(SiPixelPhase2CompareTrackSoAAlpaka); +DEFINE_FWK_MODULE(SiPixelHIonPhase1CompareTrackSoAAlpaka); diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareVertexSoAAlpaka.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareVertexSoAAlpaka.cc new file mode 100644 index 0000000000000..1706cf54741bc --- /dev/null +++ b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareVertexSoAAlpaka.cc @@ -0,0 +1,186 @@ +// -*- C++ -*- +// Package: SiPixelCompareVertexSoAAlpaka +// Class: SiPixelCompareVertexSoAAlpaka +// +/**\class SiPixelCompareVertexSoAAlpaka SiPixelCompareVertexSoAAlpaka.cc +*/ +// +// Author: Suvankar Roy Chowdhury +// +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "DataFormats/Common/interface/Handle.h" +// DQM Histograming +#include "DQMServices/Core/interface/MonitorElement.h" +#include "DQMServices/Core/interface/DQMEDAnalyzer.h" +#include "DQMServices/Core/interface/DQMStore.h" +#include "DataFormats/Vertex/interface/ZVertexSoAHost.h" +#include "DataFormats/BeamSpot/interface/BeamSpot.h" + +class SiPixelCompareVertexSoAAlpaka : public DQMEDAnalyzer { +public: + using IndToEdm = std::vector; + explicit SiPixelCompareVertexSoAAlpaka(const edm::ParameterSet&); + ~SiPixelCompareVertexSoAAlpaka() override = default; + void bookHistograms(DQMStore::IBooker& ibooker, edm::Run const& iRun, edm::EventSetup const& iSetup) override; + void analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) override; + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + const edm::EDGetTokenT tokenSoAVertexCPU_; + const edm::EDGetTokenT tokenSoAVertexGPU_; + const edm::EDGetTokenT tokenBeamSpot_; + const std::string topFolderName_; + const float dzCut_; + MonitorElement* hnVertex_; + MonitorElement* hx_; + MonitorElement* hy_; + MonitorElement* hz_; + MonitorElement* hchi2_; + MonitorElement* hchi2oNdof_; + MonitorElement* hptv2_; + MonitorElement* hntrks_; + MonitorElement* hxdiff_; + MonitorElement* hydiff_; + MonitorElement* hzdiff_; +}; + +// +// constructors +// + +// Note tokenSoAVertexGPU_ contains data copied from device to host, hence is a HostCollection +SiPixelCompareVertexSoAAlpaka::SiPixelCompareVertexSoAAlpaka(const edm::ParameterSet& iConfig) + : tokenSoAVertexCPU_(consumes(iConfig.getParameter("pixelVertexSrcCPU"))), + tokenSoAVertexGPU_(consumes(iConfig.getParameter("pixelVertexSrcGPU"))), + tokenBeamSpot_(consumes(iConfig.getParameter("beamSpotSrc"))), + topFolderName_(iConfig.getParameter("topFolderName")), + dzCut_(iConfig.getParameter("dzCut")) {} + +// +// -- Analyze +// +void SiPixelCompareVertexSoAAlpaka::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) { + const auto& vsoaHandleCPU = iEvent.getHandle(tokenSoAVertexCPU_); + const auto& vsoaHandleGPU = iEvent.getHandle(tokenSoAVertexGPU_); + if (not vsoaHandleCPU or not vsoaHandleGPU) { + edm::LogWarning out("SiPixelCompareVertexSoAAlpaka"); + if (not vsoaHandleCPU) { + out << "reference (cpu) tracks not found; "; + } + if (not vsoaHandleGPU) { + out << "target (gpu) tracks not found; "; + } + out << "the comparison will not run."; + return; + } + + auto const& vsoaCPU = *vsoaHandleCPU; + int nVerticesCPU = vsoaCPU.view().nvFinal(); + auto const& vsoaGPU = *vsoaHandleGPU; + int nVerticesGPU = vsoaGPU.view().nvFinal(); + + auto bsHandle = iEvent.getHandle(tokenBeamSpot_); + float x0 = 0., y0 = 0., z0 = 0., dxdz = 0., dydz = 0.; + if (!bsHandle.isValid()) { + edm::LogWarning("SiPixelCompareVertexSoAAlpaka") << "No beamspot found. returning vertexes with (0,0,Z) "; + } else { + const reco::BeamSpot& bs = *bsHandle; + x0 = bs.x0(); + y0 = bs.y0(); + z0 = bs.z0(); + dxdz = bs.dxdz(); + dydz = bs.dydz(); + } + + for (int ivc = 0; ivc < nVerticesCPU; ivc++) { + auto sic = vsoaCPU.view()[ivc].sortInd(); + auto zc = vsoaCPU.view()[sic].zv(); + auto xc = x0 + dxdz * zc; + auto yc = y0 + dydz * zc; + zc += z0; + + auto ndofCPU = vsoaCPU.view()[sic].ndof(); + auto chi2CPU = vsoaCPU.view()[sic].chi2(); + + const int32_t notFound = -1; + int32_t closestVtxidx = notFound; + float mindz = dzCut_; + + for (int ivg = 0; ivg < nVerticesGPU; ivg++) { + auto sig = vsoaGPU.view()[ivg].sortInd(); + auto zgc = vsoaGPU.view()[sig].zv() + z0; + auto zDist = std::abs(zc - zgc); + //insert some matching condition + if (zDist > dzCut_) + continue; + if (mindz > zDist) { + mindz = zDist; + closestVtxidx = sig; + } + } + if (closestVtxidx == notFound) + continue; + + auto zg = vsoaGPU.view()[closestVtxidx].zv(); + auto xg = x0 + dxdz * zg; + auto yg = y0 + dydz * zg; + zg += z0; + auto ndofGPU = vsoaGPU.view()[closestVtxidx].ndof(); + auto chi2GPU = vsoaGPU.view()[closestVtxidx].chi2(); + + hx_->Fill(xc - x0, xg - x0); + hy_->Fill(yc - y0, yg - y0); + hz_->Fill(zc, zg); + hxdiff_->Fill(xc - xg); + hydiff_->Fill(yc - yg); + hzdiff_->Fill(zc - zg); + hchi2_->Fill(chi2CPU, chi2GPU); + hchi2oNdof_->Fill(chi2CPU / ndofCPU, chi2GPU / ndofGPU); + hptv2_->Fill(vsoaCPU.view()[sic].ptv2(), vsoaGPU.view()[closestVtxidx].ptv2()); + hntrks_->Fill(ndofCPU + 1, ndofGPU + 1); + } + hnVertex_->Fill(nVerticesCPU, nVerticesGPU); +} + +// +// -- Book Histograms +// +void SiPixelCompareVertexSoAAlpaka::bookHistograms(DQMStore::IBooker& ibooker, + edm::Run const& iRun, + edm::EventSetup const& iSetup) { + ibooker.cd(); + ibooker.setCurrentFolder(topFolderName_); + + // FIXME: all the 2D correlation plots are quite heavy in terms of memory consumption, so a as soon as DQM supports either TH2I or THnSparse + // these should be moved to a less resource consuming format + hnVertex_ = ibooker.book2I("nVertex", "# of Vertices;CPU;GPU", 101, -0.5, 100.5, 101, -0.5, 100.5); + hx_ = ibooker.book2I("vx", "Vertez x - Beamspot x;CPU;GPU", 50, -0.1, 0.1, 50, -0.1, 0.1); + hy_ = ibooker.book2I("vy", "Vertez y - Beamspot y;CPU;GPU", 50, -0.1, 0.1, 50, -0.1, 0.1); + hz_ = ibooker.book2I("vz", "Vertez z;CPU;GPU", 30, -30., 30., 30, -30., 30.); + hchi2_ = ibooker.book2I("chi2", "Vertex chi-squared;CPU;GPU", 40, 0., 20., 40, 0., 20.); + hchi2oNdof_ = ibooker.book2I("chi2oNdof", "Vertex chi-squared/Ndof;CPU;GPU", 40, 0., 20., 40, 0., 20.); + hptv2_ = ibooker.book2I("ptsq", "Vertex #sum (p_{T})^{2};CPU;GPU", 200, 0., 200., 200, 0., 200.); + hntrks_ = ibooker.book2I("ntrk", "#tracks associated;CPU;GPU", 100, -0.5, 99.5, 100, -0.5, 99.5); + hntrks_ = ibooker.book2I("ntrk", "#tracks associated;CPU;GPU", 100, -0.5, 99.5, 100, -0.5, 99.5); + hxdiff_ = ibooker.book1D("vxdiff", ";Vertex x difference (CPU - GPU);#entries", 100, -0.001, 0.001); + hydiff_ = ibooker.book1D("vydiff", ";Vertex y difference (CPU - GPU);#entries", 100, -0.001, 0.001); + hzdiff_ = ibooker.book1D("vzdiff", ";Vertex z difference (CPU - GPU);#entries", 100, -2.5, 2.5); +} + +void SiPixelCompareVertexSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + // monitorpixelVertexSoA + edm::ParameterSetDescription desc; + desc.add("pixelVertexSrcCPU", edm::InputTag("pixelVerticesAlpakaSerial")); + desc.add("pixelVertexSrcGPU", edm::InputTag("pixelVerticesAlpaka")); + desc.add("beamSpotSrc", edm::InputTag("offlineBeamSpot")); + desc.add("topFolderName", "SiPixelHeterogeneous/PixelVertexCompareSoADeviceVSHost"); + desc.add("dzCut", 1.); + descriptions.addWithDefaultLabel(desc); +} + +DEFINE_FWK_MODULE(SiPixelCompareVertexSoAAlpaka); diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorRecHitsSoAAlpaka.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorRecHitsSoAAlpaka.cc new file mode 100644 index 0000000000000..25de77e0bca1c --- /dev/null +++ b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorRecHitsSoAAlpaka.cc @@ -0,0 +1,198 @@ +#include "DQMServices/Core/interface/MonitorElement.h" +#include "DQMServices/Core/interface/DQMEDAnalyzer.h" +#include "DQMServices/Core/interface/DQMStore.h" +#include "DataFormats/Math/interface/approx_atan2.h" +#include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitSoAHost.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" +#include "Geometry/CommonTopologies/interface/PixelTopology.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" + +template +class SiPixelMonitorRecHitsSoAAlpaka : public DQMEDAnalyzer { +public: + using HitsOnHost = TrackingRecHitHost; + + explicit SiPixelMonitorRecHitsSoAAlpaka(const edm::ParameterSet&); + ~SiPixelMonitorRecHitsSoAAlpaka() override = default; + void dqmBeginRun(const edm::Run&, const edm::EventSetup&) override; + void bookHistograms(DQMStore::IBooker& ibooker, edm::Run const& iRun, edm::EventSetup const& iSetup) override; + void analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) override; + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + const edm::ESGetToken geomToken_; + const edm::ESGetToken topoToken_; + const edm::EDGetTokenT tokenSoAHitsCPU_; + const std::string topFolderName_; + const TrackerGeometry* tkGeom_ = nullptr; + const TrackerTopology* tTopo_ = nullptr; + MonitorElement* hnHits; + MonitorElement* hBFposZP; + MonitorElement* hBFposZR; + MonitorElement* hBposXY; + MonitorElement* hBposZP; + MonitorElement* hBcharge; + MonitorElement* hBsizex; + MonitorElement* hBsizey; + MonitorElement* hBposZPL[4]; // max 4 barrel hits + MonitorElement* hBchargeL[4]; + MonitorElement* hBsizexL[4]; + MonitorElement* hBsizeyL[4]; + MonitorElement* hFposXY; + MonitorElement* hFposZP; + MonitorElement* hFcharge; + MonitorElement* hFsizex; + MonitorElement* hFsizey; + MonitorElement* hFposXYD[2][12]; // max 12 endcap disks + MonitorElement* hFchargeD[2][12]; + MonitorElement* hFsizexD[2][12]; + MonitorElement* hFsizeyD[2][12]; +}; + +// +// constructors +// +template +SiPixelMonitorRecHitsSoAAlpaka::SiPixelMonitorRecHitsSoAAlpaka(const edm::ParameterSet& iConfig) + : geomToken_(esConsumes()), + topoToken_(esConsumes()), + tokenSoAHitsCPU_(consumes(iConfig.getParameter("pixelHitsSrc"))), + topFolderName_(iConfig.getParameter("TopFolderName")) {} + +// +// Begin Run +// +template +void SiPixelMonitorRecHitsSoAAlpaka::dqmBeginRun(const edm::Run& iRun, const edm::EventSetup& iSetup) { + tkGeom_ = &iSetup.getData(geomToken_); + tTopo_ = &iSetup.getData(topoToken_); +} + +// +// -- Analyze +// +template +void SiPixelMonitorRecHitsSoAAlpaka::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) { + const auto& rhsoaHandle = iEvent.getHandle(tokenSoAHitsCPU_); + if (!rhsoaHandle.isValid()) { + edm::LogWarning("SiPixelMonitorRecHitsSoAAlpaka") << "No RecHits SoA found \n returning!"; + return; + } + auto const& rhsoa = *rhsoaHandle; + auto const& soa2d = rhsoa.const_view(); + + uint32_t nHits_ = soa2d.metadata().size(); + hnHits->Fill(nHits_); + auto detIds = tkGeom_->detUnitIds(); + for (uint32_t i = 0; i < nHits_; i++) { + DetId id = detIds[soa2d[i].detectorIndex()]; + float xG = soa2d[i].xGlobal(); + float yG = soa2d[i].yGlobal(); + float zG = soa2d[i].zGlobal(); + float rG = soa2d[i].rGlobal(); + float fphi = short2phi(soa2d[i].iphi()); + uint32_t charge = soa2d[i].chargeAndStatus().charge; + int16_t sizeX = std::ceil(float(std::abs(soa2d[i].clusterSizeX()) / 8.)); + int16_t sizeY = std::ceil(float(std::abs(soa2d[i].clusterSizeY()) / 8.)); + hBFposZP->Fill(zG, fphi); + int16_t ysign = yG >= 0 ? 1 : -1; + hBFposZR->Fill(zG, rG * ysign); + switch (id.subdetId()) { + case PixelSubdetector::PixelBarrel: + hBposXY->Fill(xG, yG); + hBposZP->Fill(zG, fphi); + hBcharge->Fill(charge); + hBsizex->Fill(sizeX); + hBsizey->Fill(sizeY); + hBposZPL[tTopo_->pxbLayer(id) - 1]->Fill(zG, fphi); + hBchargeL[tTopo_->pxbLayer(id) - 1]->Fill(charge); + hBsizexL[tTopo_->pxbLayer(id) - 1]->Fill(sizeX); + hBsizeyL[tTopo_->pxbLayer(id) - 1]->Fill(sizeY); + break; + case PixelSubdetector::PixelEndcap: + hFposXY->Fill(xG, yG); + hFposZP->Fill(zG, fphi); + hFcharge->Fill(charge); + hFsizex->Fill(sizeX); + hFsizey->Fill(sizeY); + hFposXYD[tTopo_->pxfSide(id) - 1][tTopo_->pxfDisk(id) - 1]->Fill(xG, yG); + hFchargeD[tTopo_->pxfSide(id) - 1][tTopo_->pxfDisk(id) - 1]->Fill(charge); + hFsizexD[tTopo_->pxfSide(id) - 1][tTopo_->pxfDisk(id) - 1]->Fill(sizeX); + hFsizeyD[tTopo_->pxfSide(id) - 1][tTopo_->pxfDisk(id) - 1]->Fill(sizeY); + break; + } + } +} + +// +// -- Book Histograms +// +template +void SiPixelMonitorRecHitsSoAAlpaka::bookHistograms(DQMStore::IBooker& iBook, + edm::Run const& iRun, + edm::EventSetup const& iSetup) { + iBook.cd(); + iBook.setCurrentFolder(topFolderName_); + + // clang-format off + //Global + hnHits = iBook.book1D("nHits", "RecHits per event;RecHits;#events", 200, 0, 5000); + hBFposZP = iBook.book2D("recHitsGlobalPosZP", "RecHits position Global;Z;#phi", 1000, -60, 60, 200,-3.2,3.2); + hBFposZR = iBook.book2D("recHitsGlobalPosZR", "RecHits position Global;Z;R", 1000, -60, 60, 200,-20,20); + //Barrel + hBposXY = iBook.book2D("recHitsBarrelPosXY", "RecHits position Barrel;X;Y", 200, -20, 20, 200,-20,20); + hBposZP = iBook.book2D("recHitsBarrelPosZP", "RecHits position Barrel;Z;#phi", 300, -30, 30, 200,-3.2,3.2); + hBcharge = iBook.book1D("recHitsBarrelCharge", "RecHits Charge Barrel;Charge;#events", 250, 0, 100000); + hBsizex = iBook.book1D("recHitsBarrelSizex", "RecHits SizeX Barrel;SizeX;#events", 50, 0, 50); + hBsizey = iBook.book1D("recHitsBarrelSizey", "RecHits SizeY Barrel;SizeY;#events", 50, 0, 50); + //Barrel Layer + for(unsigned int il=0;ilnumberOfLayers(PixelSubdetector::PixelBarrel);il++){ + hBposZPL[il] = iBook.book2D(Form("recHitsBLay%dPosZP",il+1), Form("RecHits position Barrel Layer%d;Z;#phi",il+1), 300, -30, 30, 200,-3.2,3.2); + hBchargeL[il] = iBook.book1D(Form("recHitsBLay%dCharge",il+1), Form("RecHits Charge Barrel Layer%d;Charge;#events",il+1), 250, 0, 100000); + hBsizexL[il] = iBook.book1D(Form("recHitsBLay%dSizex",il+1), Form("RecHits SizeX Barrel Layer%d;SizeX;#events",il+1), 50, 0, 50); + hBsizeyL[il] = iBook.book1D(Form("recHitsBLay%dSizey",il+1), Form("RecHits SizeY Barrel Layer%d;SizeY;#events",il+1), 50, 0, 50); + } + //Endcaps + hFposXY = iBook.book2D("recHitsEndcapsPosXY", "RecHits position Endcaps;X;Y", 200, -20, 20, 200,-20, 20); + hFposZP = iBook.book2D("recHitsEndcapsPosZP", "RecHits position Endcaps;Z;#phi", 600, -60, 60, 200,-3.2,3.2); + hFcharge = iBook.book1D("recHitsEndcapsCharge", "RecHits Charge Endcaps;Charge;#events", 250, 0, 100000); + hFsizex = iBook.book1D("recHitsEndcapsSizex", "RecHits SizeX Endcaps;SizeX;#events", 50, 0, 50); + hFsizey = iBook.book1D("recHitsEndcapsSizey", "RecHits SizeY Endcaps;SizeY;#events", 50, 0, 50); + //Endcaps Disk + for(int is=0;is<2;is++){ + int sign=is==0? -1:1; + for(unsigned int id=0;idnumberOfLayers(PixelSubdetector::PixelEndcap);id++){ + hFposXYD[is][id] = iBook.book2D(Form("recHitsFDisk%+dPosXY",id*sign+sign), Form("RecHits position Endcaps Disk%+d;X;Y",id*sign+sign), 200, -20, 20, 200,-20,20); + hFchargeD[is][id] = iBook.book1D(Form("recHitsFDisk%+dCharge",id*sign+sign), Form("RecHits Charge Endcaps Disk%+d;Charge;#events",id*sign+sign), 250, 0, 100000); + hFsizexD[is][id] = iBook.book1D(Form("recHitsFDisk%+dSizex",id*sign+sign), Form("RecHits SizeX Endcaps Disk%+d;SizeX;#events",id*sign+sign), 50, 0, 50); + hFsizeyD[is][id] = iBook.book1D(Form("recHitsFDisk%+dSizey",id*sign+sign), Form("RecHits SizeY Endcaps Disk%+d;SizeY;#events",id*sign+sign), 50, 0, 50); + } + } +} + +template +void SiPixelMonitorRecHitsSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + // monitorpixelRecHitsSoA + edm::ParameterSetDescription desc; + desc.add("pixelHitsSrc", edm::InputTag("siPixelRecHitsPreSplittingAlpaka")); + desc.add("TopFolderName", "SiPixelHeterogeneous/PixelRecHitsAlpaka"); + descriptions.addWithDefaultLabel(desc); +} + +using SiPixelPhase1MonitorRecHitsSoAAlpaka = SiPixelMonitorRecHitsSoAAlpaka; +using SiPixelPhase2MonitorRecHitsSoAAlpaka = SiPixelMonitorRecHitsSoAAlpaka; +using SiPixelHIonPhase1MonitorRecHitsSoAAlpaka = SiPixelMonitorRecHitsSoAAlpaka; + +#include "FWCore/Framework/interface/MakerMacros.h" +DEFINE_FWK_MODULE(SiPixelPhase1MonitorRecHitsSoAAlpaka); +DEFINE_FWK_MODULE(SiPixelPhase2MonitorRecHitsSoAAlpaka); +DEFINE_FWK_MODULE(SiPixelHIonPhase1MonitorRecHitsSoAAlpaka); diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorTrackSoAAlpaka.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorTrackSoAAlpaka.cc new file mode 100644 index 0000000000000..8c205aec6cb9c --- /dev/null +++ b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorTrackSoAAlpaka.cc @@ -0,0 +1,197 @@ +// -*- C++ -*- +// Package: SiPixelMonitorTrackSoAAlpaka +// Class: SiPixelMonitorTrackSoAAlpaka +// +/**\class SiPixelMonitorTrackSoAAlpaka SiPixelMonitorTrackSoAAlpaka.cc +*/ +// +// Author: Suvankar Roy Chowdhury +// + +// for string manipulations +#include +#include "DataFormats/Common/interface/Handle.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "FWCore/Utilities/interface/InputTag.h" +// DQM Histograming +#include "DQMServices/Core/interface/MonitorElement.h" +#include "DQMServices/Core/interface/DQMEDAnalyzer.h" +#include "DQMServices/Core/interface/DQMStore.h" +#include "DataFormats/TrackSoA/interface/TrackSoAHost.h" + +template +class SiPixelMonitorTrackSoAAlpaka : public DQMEDAnalyzer { +public: + using PixelTrackHeterogeneous = TrackSoAHost; + explicit SiPixelMonitorTrackSoAAlpaka(const edm::ParameterSet&); + ~SiPixelMonitorTrackSoAAlpaka() override = default; + void bookHistograms(DQMStore::IBooker& ibooker, edm::Run const& iRun, edm::EventSetup const& iSetup) override; + void analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) override; + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + edm::EDGetTokenT tokenSoATrack_; + std::string topFolderName_; + bool useQualityCut_; + pixelTrack::Quality minQuality_; + MonitorElement* hnTracks; + MonitorElement* hnLooseAndAboveTracks; + MonitorElement* hnHits; + MonitorElement* hnHitsVsPhi; + MonitorElement* hnHitsVsEta; + MonitorElement* hnLayers; + MonitorElement* hnLayersVsPhi; + MonitorElement* hnLayersVsEta; + MonitorElement* hchi2; + MonitorElement* hChi2VsPhi; + MonitorElement* hChi2VsEta; + MonitorElement* hpt; + MonitorElement* heta; + MonitorElement* hphi; + MonitorElement* hz; + MonitorElement* htip; + MonitorElement* hquality; +}; + +// +// constructors +// + +template +SiPixelMonitorTrackSoAAlpaka::SiPixelMonitorTrackSoAAlpaka(const edm::ParameterSet& iConfig) { + tokenSoATrack_ = consumes(iConfig.getParameter("pixelTrackSrc")); + topFolderName_ = iConfig.getParameter("topFolderName"); //"SiPixelHeterogeneous/PixelTrackSoA"; + useQualityCut_ = iConfig.getParameter("useQualityCut"); + minQuality_ = pixelTrack::qualityByName(iConfig.getParameter("minQuality")); +} + +// +// -- Analyze +// +template +void SiPixelMonitorTrackSoAAlpaka::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) { + const auto& tsoaHandle = iEvent.getHandle(tokenSoATrack_); + if (!tsoaHandle.isValid()) { + edm::LogWarning("SiPixelMonitorTrackSoAAlpaka") << "No Track SoA found \n returning!" << std::endl; + return; + } + + auto const& tsoa = *tsoaHandle.product(); + auto maxTracks = tsoa.view().metadata().size(); + auto const* quality = tsoa.view().quality(); + int32_t nTracks = 0; + int32_t nLooseAndAboveTracks = 0; + + for (int32_t it = 0; it < maxTracks; ++it) { + auto nHits = tsoa.view().detIndices().size(it); + auto nLayers = tsoa.view()[it].nLayers(); + if (nHits == 0) + break; // this is a guard + float pt = tsoa.view()[it].pt(); + if (!(pt > 0.)) + continue; + + // fill the quality for all tracks + pixelTrack::Quality qual = quality[it]; + hquality->Fill(int(qual)); + nTracks++; + + if (useQualityCut_ && quality[it] < minQuality_) + continue; + + // fill parameters only for quality >= loose + + float chi2 = tsoa.view()[it].chi2(); + float phi = tsoa.view()[it].state()(0); //TODO: put these numbers in enum + float zip = tsoa.view()[it].state()(4); + float eta = tsoa.view()[it].eta(); + float tip = tsoa.view()[it].state()(1); + + hchi2->Fill(chi2); + hChi2VsPhi->Fill(phi, chi2); + hChi2VsEta->Fill(eta, chi2); + hnHits->Fill(nHits); + hnLayers->Fill(nLayers); + hnHitsVsPhi->Fill(phi, nHits); + hnHitsVsEta->Fill(eta, nHits); + hnLayersVsPhi->Fill(phi, nLayers); + hnLayersVsEta->Fill(eta, nLayers); + hpt->Fill(pt); + heta->Fill(eta); + hphi->Fill(phi); + hz->Fill(zip); + htip->Fill(tip); + nLooseAndAboveTracks++; + } + hnTracks->Fill(nTracks); + hnLooseAndAboveTracks->Fill(nLooseAndAboveTracks); +} + +// +// -- Book Histograms +// +template +void SiPixelMonitorTrackSoAAlpaka::bookHistograms(DQMStore::IBooker& iBook, + edm::Run const& iRun, + edm::EventSetup const& iSetup) { + iBook.cd(); + iBook.setCurrentFolder(topFolderName_); + + // clang-format off +std::string toRep = "Number of tracks"; +hnTracks = iBook.book1D("nTracks", fmt::sprintf(";%s per event;#events",toRep), 1001, -0.5, 1000.5); +hnLooseAndAboveTracks = iBook.book1D("nLooseAndAboveTracks", fmt::sprintf(";%s (quality #geq loose) per event;#events",toRep), 1001, -0.5, 1000.5); + +toRep = "Number of all RecHits per track (quality #geq loose)"; +hnHits = iBook.book1D("nRecHits", fmt::sprintf(";%s;#tracks",toRep), 15, -0.5, 14.5); +hnHitsVsPhi = iBook.bookProfile("nHitsPerTrackVsPhi", fmt::sprintf("%s vs track #phi;Track #phi;%s",toRep,toRep), 30, -M_PI, M_PI,0., 15.); +hnHitsVsEta = iBook.bookProfile("nHitsPerTrackVsEta", fmt::sprintf("%s vs track #eta;Track #eta;%s",toRep,toRep), 30, -3., 3., 0., 15.); + +toRep = "Number of all layers per track (quality #geq loose)"; +hnLayers = iBook.book1D("nLayers", fmt::sprintf(";%s;#tracks",toRep), 15, -0.5, 14.5); +hnLayersVsPhi = iBook.bookProfile("nLayersPerTrackVsPhi", fmt::sprintf("%s vs track #phi;Track #phi;%s",toRep,toRep), 30, -M_PI, M_PI,0., 15.); +hnLayersVsEta = iBook.bookProfile("nLayersPerTrackVsEta", fmt::sprintf("%s vs track #eta;Track #eta;%s",toRep,toRep), 30, -3., 3., 0., 15.); + +toRep = "Track (quality #geq loose) #chi^{2}/ndof"; +hchi2 = iBook.book1D("nChi2ndof", fmt::sprintf(";%s;#tracks",toRep), 40, 0., 20.); +hChi2VsPhi = iBook.bookProfile("nChi2ndofVsPhi", fmt::sprintf("%s vs track #phi;Track #phi;%s",toRep,toRep), 30, -M_PI, M_PI, 0., 20.); +hChi2VsEta = iBook.bookProfile("nChi2ndofVsEta", fmt::sprintf("%s vs track #eta;Track #eta;%s",toRep,toRep), 30, -3., 3., 0., 20.); + // clang-format on + + hpt = iBook.book1D("pt", ";Track (quality #geq loose) p_{T} [GeV];#tracks", 200, 0., 200.); + heta = iBook.book1D("eta", ";Track (quality #geq loose) #eta;#tracks", 30, -3., 3.); + hphi = iBook.book1D("phi", ";Track (quality #geq loose) #phi;#tracks", 30, -M_PI, M_PI); + hz = iBook.book1D("z", ";Track (quality #geq loose) z [cm];#tracks", 30, -30., 30.); + htip = iBook.book1D("tip", ";Track (quality #geq loose) TIP [cm];#tracks", 100, -0.5, 0.5); + hquality = iBook.book1D("quality", ";Track Quality;#tracks", 7, -0.5, 6.5); + uint i = 1; + for (const auto& q : pixelTrack::qualityName) { + hquality->setBinLabel(i, q.data(), 1); + i++; + } +} + +template +void SiPixelMonitorTrackSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + // monitorpixelTrackSoA + edm::ParameterSetDescription desc; + desc.add("pixelTrackSrc", edm::InputTag("pixelTracksAlpaka")); + desc.add("topFolderName", "SiPixelHeterogeneous/PixelTrackAlpaka"); + desc.add("useQualityCut", true); + desc.add("minQuality", "loose"); + descriptions.addWithDefaultLabel(desc); +} + +using SiPixelPhase1MonitorTrackSoAAlpaka = SiPixelMonitorTrackSoAAlpaka; +using SiPixelPhase2MonitorTrackSoAAlpaka = SiPixelMonitorTrackSoAAlpaka; +using SiPixelHIonPhase1MonitorTrackSoAAlpaka = SiPixelMonitorTrackSoAAlpaka; + +DEFINE_FWK_MODULE(SiPixelPhase1MonitorTrackSoAAlpaka); +DEFINE_FWK_MODULE(SiPixelPhase2MonitorTrackSoAAlpaka); +DEFINE_FWK_MODULE(SiPixelHIonPhase1MonitorTrackSoAAlpaka); \ No newline at end of file diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorVertexSoAAlpaka.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorVertexSoAAlpaka.cc new file mode 100644 index 0000000000000..eaaa499f0009a --- /dev/null +++ b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorVertexSoAAlpaka.cc @@ -0,0 +1,131 @@ +// -*- C++ -*- +///bookLayer +// Package: SiPixelMonitorVertexSoAAlpaka +// Class: SiPixelMonitorVertexSoAAlpaka +// +/**\class SiPixelMonitorVertexSoAAlpaka SiPixelMonitorVertexSoAAlpaka.cc +*/ +// +// Author: Suvankar Roy Chowdhury +// +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "DataFormats/Common/interface/Handle.h" +// DQM Histograming +#include "DQMServices/Core/interface/MonitorElement.h" +#include "DQMServices/Core/interface/DQMEDAnalyzer.h" +#include "DQMServices/Core/interface/DQMStore.h" +#include "DataFormats/Vertex/interface/ZVertexSoAHost.h" +#include "DataFormats/BeamSpot/interface/BeamSpot.h" + +class SiPixelMonitorVertexSoAAlpaka : public DQMEDAnalyzer { +public: + using IndToEdm = std::vector; + explicit SiPixelMonitorVertexSoAAlpaka(const edm::ParameterSet&); + ~SiPixelMonitorVertexSoAAlpaka() override = default; + void bookHistograms(DQMStore::IBooker& ibooker, edm::Run const& iRun, edm::EventSetup const& iSetup) override; + void analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) override; + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + const edm::EDGetTokenT tokenSoAVertex_; + const edm::EDGetTokenT tokenBeamSpot_; + std::string topFolderName_; + MonitorElement* hnVertex; + MonitorElement* hx; + MonitorElement* hy; + MonitorElement* hz; + MonitorElement* hchi2; + MonitorElement* hchi2oNdof; + MonitorElement* hptv2; + MonitorElement* hntrks; +}; + +// +// constructors +// + +SiPixelMonitorVertexSoAAlpaka::SiPixelMonitorVertexSoAAlpaka(const edm::ParameterSet& iConfig) + : tokenSoAVertex_(consumes(iConfig.getParameter("pixelVertexSrc"))), + tokenBeamSpot_(consumes(iConfig.getParameter("beamSpotSrc"))), + topFolderName_(iConfig.getParameter("topFolderName")) {} + +// +// -- Analyze +// +void SiPixelMonitorVertexSoAAlpaka::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) { + const auto& vsoaHandle = iEvent.getHandle(tokenSoAVertex_); + if (!vsoaHandle.isValid()) { + edm::LogWarning("SiPixelMonitorVertexSoAAlpaka") << "No Vertex SoA found \n returning!" << std::endl; + return; + } + + auto const& vsoa = *vsoaHandle; + int nVertices = vsoa.view().nvFinal(); + auto bsHandle = iEvent.getHandle(tokenBeamSpot_); + float x0 = 0., y0 = 0., z0 = 0., dxdz = 0., dydz = 0.; + if (!bsHandle.isValid()) { + edm::LogWarning("SiPixelMonitorVertexSoAAlpaka") << "No beamspot found. returning vertexes with (0,0,Z) "; + } else { + const reco::BeamSpot& bs = *bsHandle; + x0 = bs.x0(); + y0 = bs.y0(); + z0 = bs.z0(); + dxdz = bs.dxdz(); + dydz = bs.dydz(); + } + + for (int iv = 0; iv < nVertices; iv++) { + auto si = vsoa.view()[iv].sortInd(); + auto z = vsoa.view()[si].zv(); + auto x = x0 + dxdz * z; + auto y = y0 + dydz * z; + + z += z0; + hx->Fill(x); + hy->Fill(y); + hz->Fill(z); + auto ndof = vsoa.view()[si].ndof(); + hchi2->Fill(vsoa.view()[si].chi2()); + hchi2oNdof->Fill(vsoa.view()[si].chi2() / ndof); + hptv2->Fill(vsoa.view()[si].ptv2()); + hntrks->Fill(ndof + 1); + } + hnVertex->Fill(nVertices); +} + +// +// -- Book Histograms +// +void SiPixelMonitorVertexSoAAlpaka::bookHistograms(DQMStore::IBooker& ibooker, + edm::Run const& iRun, + edm::EventSetup const& iSetup) { + //std::string top_folder = ""// + ibooker.cd(); + ibooker.setCurrentFolder(topFolderName_); + hnVertex = ibooker.book1D("nVertex", ";# of Vertices;#entries", 101, -0.5, 100.5); + hx = ibooker.book1D("vx", ";Vertex x;#entries", 10, -5., 5.); + hy = ibooker.book1D("vy", ";Vertex y;#entries", 10, -5., 5.); + hz = ibooker.book1D("vz", ";Vertex z;#entries", 30, -30., 30); + hchi2 = ibooker.book1D("chi2", ";Vertex chi-squared;#entries", 40, 0., 20.); + hchi2oNdof = ibooker.book1D("chi2oNdof", ";Vertex chi-squared/Ndof;#entries", 40, 0., 20.); + hptv2 = ibooker.book1D("ptsq", ";Vertex #sum (p_{T})^{2};#entries", 200, 0., 200.); + hntrks = ibooker.book1D("ntrk", ";#tracks associated;#entries", 100, -0.5, 99.5); +} + +void SiPixelMonitorVertexSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + // monitorpixelVertexSoA + edm::ParameterSetDescription desc; + desc.add("pixelVertexSrc", edm::InputTag("pixelVerticesAlpaka")); + desc.add("beamSpotSrc", edm::InputTag("offlineBeamSpot")); + desc.add("topFolderName", "SiPixelHeterogeneous/PixelVertexAlpaka"); + descriptions.addWithDefaultLabel(desc); +} + +DEFINE_FWK_MODULE(SiPixelMonitorVertexSoAAlpaka); diff --git a/DQM/SiPixelHeterogeneous/python/SiPixelHeterogenousDQM_FirstStep_cff.py b/DQM/SiPixelHeterogeneous/python/SiPixelHeterogenousDQM_FirstStep_cff.py index dfb83708c95cf..967152e9ca6c2 100644 --- a/DQM/SiPixelHeterogeneous/python/SiPixelHeterogenousDQM_FirstStep_cff.py +++ b/DQM/SiPixelHeterogeneous/python/SiPixelHeterogenousDQM_FirstStep_cff.py @@ -7,20 +7,35 @@ from DQM.SiPixelHeterogeneous.siPixelPhase2MonitorTrackSoA_cfi import * from DQM.SiPixelHeterogeneous.siPixelHIonPhase1MonitorTrackSoA_cfi import * from DQM.SiPixelHeterogeneous.siPixelMonitorVertexSoA_cfi import * +# Alpaka Modules +from Configuration.ProcessModifiers.alpaka_cff import alpaka +from DQM.SiPixelHeterogeneous.siPixelPhase1MonitorRecHitsSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelPhase2MonitorRecHitsSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelHIonPhase1MonitorRecHitsSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelPhase1MonitorTrackSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelPhase2MonitorTrackSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelHIonPhase1MonitorTrackSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelMonitorVertexSoAAlpaka_cfi import * # Run-3 sequence monitorpixelSoASource = cms.Sequence(siPixelPhase1MonitorRecHitsSoA * siPixelPhase1MonitorTrackSoA * siPixelMonitorVertexSoA) - +# Run-3 Alpaka sequence +monitorpixelSoASourceAlpaka = cms.Sequence(siPixelPhase1MonitorRecHitsSoAAlpaka * siPixelPhase1MonitorTrackSoAAlpaka * siPixelMonitorVertexSoAAlpaka) +alpaka.toReplaceWith(monitorpixelSoASource, monitorpixelSoASourceAlpaka) # Phase-2 sequence from Configuration.Eras.Modifier_phase2_tracker_cff import phase2_tracker _monitorpixelSoARecHitsSource = cms.Sequence(siPixelPhase2MonitorRecHitsSoA * siPixelPhase2MonitorTrackSoA * siPixelMonitorVertexSoA) -phase2_tracker.toReplaceWith(monitorpixelSoASource, _monitorpixelSoARecHitsSource) +(phase2_tracker & ~alpaka).toReplaceWith(monitorpixelSoASource, _monitorpixelSoARecHitsSource) +_monitorpixelSoARecHitsSourceAlpaka = cms.Sequence(siPixelPhase2MonitorRecHitsSoAAlpaka * siPixelPhase2MonitorTrackSoAAlpaka * siPixelMonitorVertexSoAAlpaka) +(phase2_tracker & alpaka).toReplaceWith(monitorpixelSoASource, _monitorpixelSoARecHitsSourceAlpaka) # HIon Phase 1 sequence from Configuration.ProcessModifiers.pp_on_AA_cff import pp_on_AA _monitorpixelSoARecHitsSourceHIon = cms.Sequence(siPixelHIonPhase1MonitorRecHitsSoA * siPixelHIonPhase1MonitorTrackSoA * siPixelMonitorVertexSoA) (pp_on_AA & ~phase2_tracker).toReplaceWith(monitorpixelSoASource, _monitorpixelSoARecHitsSourceHIon) +_monitorpixelSoARecHitsSourceHIonAlpaka = cms.Sequence(siPixelHIonPhase1MonitorRecHitsSoAAlpaka * siPixelHIonPhase1MonitorTrackSoAAlpaka * siPixelMonitorVertexSoAAlpaka) +(pp_on_AA & ~phase2_tracker & alpaka).toReplaceWith(monitorpixelSoASource, _monitorpixelSoARecHitsSourceHIonAlpaka) #Define the sequence for GPU vs CPU validation #This should run:- individual monitor for the 2 collections + comparison module @@ -33,6 +48,16 @@ from DQM.SiPixelHeterogeneous.siPixelCompareVertexSoA_cfi import * from DQM.SiPixelHeterogeneous.siPixelPhase1RawDataErrorComparator_cfi import * from DQM.SiPixelPhase1Common.SiPixelPhase1RawData_cfi import * +#Alpaka +from DQM.SiPixelHeterogeneous.siPixelPhase1CompareRecHitsSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelPhase2CompareRecHitsSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelHIonPhase1CompareRecHitsSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelPhase1CompareTrackSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelPhase2CompareTrackSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelHIonPhase1CompareTrackSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelCompareVertexSoAAlpaka_cfi import * +# from DQM.SiPixelHeterogeneous.siPixelPhase1RawDataErrorComparator_cfi import * +# from DQM.SiPixelPhase1Common.SiPixelPhase1RawData_cfi import * # digi errors SiPixelPhase1RawDataConfForCPU = copy.deepcopy(SiPixelPhase1RawDataConf) @@ -126,6 +151,43 @@ topFolderName = 'SiPixelHeterogeneous/PixelVertexSoAGPU', ) +### Alpaka + +# PixelRecHits: monitor of CPUSerial product (Alpaka backend: 'serial_sync') +siPixelRecHitsSoAMonitorSerial = siPixelPhase1MonitorRecHitsSoAAlpaka.clone( + pixelHitsSrc = cms.InputTag( 'siPixelRecHitsPreSplittingAlpakaSerial' ), + TopFolderName = cms.string( 'SiPixelHeterogeneous/PixelRecHitsSerial' ) +) + +# PixelRecHits: monitor of Device product (Alpaka backend: '') +siPixelRecHitsSoAMonitorDevice = siPixelPhase1MonitorRecHitsSoAAlpaka.clone( + pixelHitsSrc = cms.InputTag( 'siPixelRecHitsPreSplittingAlpaka' ), + TopFolderName = cms.string( 'SiPixelHeterogeneous/PixelRecHitsDevice' ) +) + +# PixelTracks: monitor of CPUSerial product (Alpaka backend: 'serial_sync') +siPixelTrackSoAMonitorSerial = siPixelPhase1MonitorTrackSoAAlpaka.clone( + pixelTrackSrc = cms.InputTag('pixelTracksAlpakaSerial'), + topFolderName = cms.string('SiPixelHeterogeneous/PixelTrackSerial') +) + +# PixelTracks: monitor of CPUSerial product (Alpaka backend: 'serial_sync') +siPixelTrackSoAMonitorDevice = siPixelPhase1MonitorTrackSoAAlpaka.clone( + pixelTrackSrc = cms.InputTag('pixelTracksAlpaka'), + topFolderName = cms.string('SiPixelHeterogeneous/PixelTrackDevice') +) + +# PixelVertices: monitor of CPUSerial product (Alpaka backend: 'serial_sync') +siPixelVertexSoAMonitorSerial = siPixelMonitorVertexSoAAlpaka.clone( + pixelVertexSrc = cms.InputTag("pixelVerticesAlpakaSerial"), + topFolderName = cms.string('SiPixelHeterogeneous/PixelVertexSerial') +) + +siPixelVertexSoAMonitorDevice = siPixelMonitorVertexSoAAlpaka.clone( + pixelVertexSrc = cms.InputTag("pixelVerticesAlpaka"), + topFolderName = cms.string('SiPixelHeterogeneous/PixelVertexDevice') +) + # Run-3 sequence monitorpixelSoACompareSource = cms.Sequence(siPixelPhase1MonitorRawDataACPU * siPixelPhase1MonitorRawDataAGPU * @@ -139,6 +201,17 @@ siPixelMonitorVertexSoAGPU * siPixelCompareVertexSoA * siPixelPhase1RawDataErrorComparator) +# and the Alpaka version +monitorpixelSoACompareSourceAlpaka = cms.Sequence( + siPixelRecHitsSoAMonitorSerial * + siPixelRecHitsSoAMonitorDevice * + siPixelPhase1CompareRecHitsSoAAlpaka * + siPixelTrackSoAMonitorSerial * + siPixelTrackSoAMonitorDevice * + siPixelPhase1CompareTrackSoAAlpaka * + siPixelVertexSoAMonitorSerial * + siPixelVertexSoAMonitorDevice * + siPixelCompareVertexSoAAlpaka ) # Phase-2 sequence _monitorpixelSoACompareSource = cms.Sequence(siPixelPhase2MonitorRecHitsSoACPU * @@ -166,3 +239,6 @@ from Configuration.ProcessModifiers.gpuValidationPixel_cff import gpuValidationPixel gpuValidationPixel.toReplaceWith(monitorpixelSoASource, monitorpixelSoACompareSource) + +from Configuration.ProcessModifiers.alpakaValidationPixel_cff import alpakaValidationPixel +(alpakaValidationPixel & ~gpuValidationPixel).toReplaceWith(monitorpixelSoASource, monitorpixelSoACompareSourceAlpaka) diff --git a/DataFormats/BeamSpot/interface/BeamSpotHostProduct.h b/DataFormats/BeamSpot/interface/BeamSpotHostProduct.h new file mode 100644 index 0000000000000..596fdef0d45e6 --- /dev/null +++ b/DataFormats/BeamSpot/interface/BeamSpotHostProduct.h @@ -0,0 +1,10 @@ +#ifndef DataFormats_BeamSpot_interface_BeamSpotHostProduct_h +#define DataFormats_BeamSpot_interface_BeamSpotHostProduct_h + +#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" +#include "DataFormats/Portable/interface/PortableHostProduct.h" + +// simplified representation of the beamspot data, in host memory +using BeamSpotHostProduct = PortableHostProduct; + +#endif // DataFormats_BeamSpot_interface_BeamSpotHostProduct_h diff --git a/DataFormats/BeamSpot/interface/alpaka/BeamSpotDeviceProduct.h b/DataFormats/BeamSpot/interface/alpaka/BeamSpotDeviceProduct.h new file mode 100644 index 0000000000000..1199657f4fd3f --- /dev/null +++ b/DataFormats/BeamSpot/interface/alpaka/BeamSpotDeviceProduct.h @@ -0,0 +1,15 @@ +#ifndef DataFormats_BeamSpot_interface_alpaka_BeamSpotDeviceProduct_h +#define DataFormats_BeamSpot_interface_alpaka_BeamSpotDeviceProduct_h + +#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" +#include "DataFormats/Portable/interface/alpaka/PortableProduct.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + // simplified representation of the beamspot data, in device global memory + using BeamSpotDeviceProduct = PortableProduct; + +} + +#endif // DataFormats_BeamSpot_interface_alpaka_BeamSpotDeviceProduct_h diff --git a/DataFormats/Portable/interface/PortableDeviceProduct.h b/DataFormats/Portable/interface/PortableDeviceProduct.h new file mode 100644 index 0000000000000..9b8ee185ffacb --- /dev/null +++ b/DataFormats/Portable/interface/PortableDeviceProduct.h @@ -0,0 +1,74 @@ +#ifndef DataFormats_Portable_interface_PortableDeviceProduct_h +#define DataFormats_Portable_interface_PortableDeviceProduct_h + +#include +#include +#include + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +// generic SoA-based product in device memory +template >> +class PortableDeviceProduct { + static_assert(not std::is_same_v, + "Use PortableHostProduct instead of PortableDeviceProduct"); + +public: + using Product = T; + using Buffer = cms::alpakatools::device_buffer; + using ConstBuffer = cms::alpakatools::const_device_buffer; + + PortableDeviceProduct() = default; + + PortableDeviceProduct(TDev const& device) + // allocate global device memory + : buffer_{cms::alpakatools::make_device_buffer(device)} + { + assert(reinterpret_cast(buffer_->data()) % alignof(Product) == 0); + } + + template >> + PortableDeviceProduct(TQueue const& queue) + // allocate global device memory with queue-ordered semantic + : buffer_{cms::alpakatools::make_device_buffer(queue)} + { + assert(reinterpret_cast(buffer_->data()) % alignof(Product) == 0); + } + + // non-copyable + PortableDeviceProduct(PortableDeviceProduct const&) = delete; + PortableDeviceProduct& operator=(PortableDeviceProduct const&) = delete; + + // movable + PortableDeviceProduct(PortableDeviceProduct&&) = default; + PortableDeviceProduct& operator=(PortableDeviceProduct&&) = default; + + // default destructor + ~PortableDeviceProduct() = default; + + // access the product + Product& value() { return *buffer_->data(); } + Product const& value() const { return *buffer_->data(); } + + Product* data() { return buffer_->data(); } + Product const* data() const { return buffer_->data(); } + + Product& operator*() { return *buffer_->data(); } + Product const& operator*() const { return *buffer_->data(); } + + Product* operator->() { return buffer_->data(); } + Product const* operator->() const { return buffer_->data(); } + + // access the buffer + Buffer buffer() { return *buffer_; } + ConstBuffer buffer() const { return *buffer_; } + ConstBuffer const_buffer() const { return *buffer_; } + +private: + std::optional buffer_; +}; + +#endif // DataFormats_Portable_interface_PortableDeviceProduct_h diff --git a/DataFormats/Portable/interface/PortableHostProduct.h b/DataFormats/Portable/interface/PortableHostProduct.h new file mode 100644 index 0000000000000..e3e2d6e31d3cc --- /dev/null +++ b/DataFormats/Portable/interface/PortableHostProduct.h @@ -0,0 +1,103 @@ +#ifndef DataFormats_Portable_interface_PortableHostProduct_h +#define DataFormats_Portable_interface_PortableHostProduct_h + +#include +#include + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +// generic SoA-based product in host memory +template +class PortableHostProduct { +public: + using Product = T; + using Buffer = cms::alpakatools::host_buffer; + using ConstBuffer = cms::alpakatools::const_host_buffer; + + PortableHostProduct() = default; + + PortableHostProduct(alpaka_common::DevHost const& host) + // allocate pageable host memory + : buffer_{cms::alpakatools::make_host_buffer()}, + product_{buffer_->data()} + { + assert(reinterpret_cast(product_) % alignof(Product) == 0); + } + + template >> + PortableHostProduct(TQueue const& queue) + // allocate pinned host memory associated to the given work queue, accessible by the queue's device + : buffer_{cms::alpakatools::make_host_buffer(queue)}, + product_{buffer_->data()} + { + assert(reinterpret_cast(product_) % alignof(Product) == 0); + } + + // non-copyable + PortableHostProduct(PortableHostProduct const&) = delete; + PortableHostProduct& operator=(PortableHostProduct const&) = delete; + + // movable + PortableHostProduct(PortableHostProduct&&) = default; + PortableHostProduct& operator=(PortableHostProduct&&) = default; + + // default destructor + ~PortableHostProduct() = default; + + // access the product + Product& value() { return *product_; } + Product const& value() const { return *product_; } + + Product* data() { return product_; } + Product const* data() const { return product_; } + + Product& operator*() { return *product_; } + Product const& operator*() const { return *product_; } + + Product* operator->() { return product_; } + Product const* operator->() const { return product_; } + + // access the buffer + Buffer buffer() { return *buffer_; } + ConstBuffer buffer() const { return *buffer_; } + ConstBuffer const_buffer() const { return *buffer_; } + + // part of the ROOT read streamer + static void ROOTReadStreamer(PortableHostProduct* newObj, Product& product) { + std::cerr << "ROOT object at " << &product << std::endl; + std::cerr << "id: " << product.id << std::endl; + + std::cerr << "CMSSW object at " << newObj << std::endl; + std::cerr << "buffer present ? " << newObj->buffer_.has_value() << std::endl; + if (newObj->buffer_.has_value()) { + std::cerr << "buffer content at " << newObj->buffer_->data() << std::endl; + } + std::cerr << "struct content at " << newObj->product_ << std::endl; + if (newObj->product_) { + std::cerr << "id: " << newObj->product_->id << std::endl; + } + newObj->~PortableHostProduct(); + // use the global "host" object returned by cms::alpakatools::host() + new (newObj) PortableHostProduct(cms::alpakatools::host()); + std::memcpy(newObj->product_, &product, sizeof(Product)); + std::cerr << "CMSSW object at " << newObj << std::endl; + std::cerr << "buffer present ? " << newObj->buffer_.has_value() << std::endl; + if (newObj->buffer_.has_value()) { + std::cerr << "buffer content at " << newObj->buffer_->data() << std::endl; + } + std::cerr << "struct content at " << newObj->product_ << std::endl; + if (newObj->product_) { + std::cerr << "id: " << newObj->product_->id << std::endl; + } + } + +private: + std::optional buffer_; //! + Product* product_; +}; + +#endif // DataFormats_Portable_interface_PortableHostProduct_h diff --git a/DataFormats/Portable/interface/PortableProduct.h b/DataFormats/Portable/interface/PortableProduct.h new file mode 100644 index 0000000000000..32acd91bbe1c9 --- /dev/null +++ b/DataFormats/Portable/interface/PortableProduct.h @@ -0,0 +1,18 @@ +#ifndef DataFormats_Portable_interface_PortableProduct_h +#define DataFormats_Portable_interface_PortableProduct_h + +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" + +namespace traits { + + // trait for a generic SoA-based product + template >> + class PortableProductTrait; + +} // namespace traits + +// type alias for a generic SoA-based product +template >> +using PortableProduct = typename traits::PortableProductTrait::ProductType; + +#endif // DataFormats_Portable_interface_PortableProduct_h diff --git a/DataFormats/Portable/interface/alpaka/PortableProduct.h b/DataFormats/Portable/interface/alpaka/PortableProduct.h new file mode 100644 index 0000000000000..7982d394863af --- /dev/null +++ b/DataFormats/Portable/interface/alpaka/PortableProduct.h @@ -0,0 +1,65 @@ +#ifndef DataFormats_Portable_interface_alpaka_PortableProduct_h +#define DataFormats_Portable_interface_alpaka_PortableProduct_h + +#include + +#include "DataFormats/Portable/interface/PortableProduct.h" +#include "DataFormats/Portable/interface/PortableHostProduct.h" +#include "DataFormats/Portable/interface/PortableDeviceProduct.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToDevice.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + +#if defined ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + // ... or any other CPU-based accelerators + + // generic SoA-based product in host memory + template + using PortableProduct = ::PortableHostProduct; + +#else + + // generic SoA-based product in device memory + template + using PortableProduct = ::PortableDeviceProduct; + +#endif // ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace traits { + + // specialise the trait for the device provided by the ALPAKA_ACCELERATOR_NAMESPACE + template + class PortableProductTrait { + using ProductType = ALPAKA_ACCELERATOR_NAMESPACE::PortableProduct; + }; + +} // namespace traits + +namespace cms::alpakatools { + template + struct CopyToHost> { + template + static auto copyAsync(TQueue& queue, PortableDeviceProduct const& srcData) { + PortableHostProduct dstData(queue); + alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); + return dstData; + } + }; + + template + struct CopyToDevice> { + template + static auto copyAsync(TQueue& queue, PortableHostProduct const& srcData) { + using TDevice = typename alpaka::trait::DevType::type; + PortableDeviceProduct dstData(queue); + alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); + return dstData; + } + }; +} // namespace cms::alpakatools + +#endif // DataFormats_Portable_interface_alpaka_PortableProduct_h diff --git a/DataFormats/PortableTestObjects/interface/TestHostProduct.h b/DataFormats/PortableTestObjects/interface/TestHostProduct.h new file mode 100644 index 0000000000000..c6b93e431c220 --- /dev/null +++ b/DataFormats/PortableTestObjects/interface/TestHostProduct.h @@ -0,0 +1,14 @@ +#ifndef DataFormats_PortableTestObjects_interface_TestHostProduct_h +#define DataFormats_PortableTestObjects_interface_TestHostProduct_h + +#include "DataFormats/Portable/interface/PortableHostProduct.h" +#include "DataFormats/PortableTestObjects/interface/TestStruct.h" + +namespace portabletest { + + // struct with x, y, z, id fields in host memory + using TestHostProduct = PortableHostProduct; + +} // namespace portabletest + +#endif // DataFormats_PortableTestObjects_interface_TestHostProduct_h diff --git a/DataFormats/PortableTestObjects/interface/alpaka/TestDeviceProduct.h b/DataFormats/PortableTestObjects/interface/alpaka/TestDeviceProduct.h new file mode 100644 index 0000000000000..3d1b0671085c9 --- /dev/null +++ b/DataFormats/PortableTestObjects/interface/alpaka/TestDeviceProduct.h @@ -0,0 +1,23 @@ +#ifndef DataFormats_PortableTestObjects_interface_alpaka_TestDeviceProduct_h +#define DataFormats_PortableTestObjects_interface_alpaka_TestDeviceProduct_h + +#include "DataFormats/Portable/interface/alpaka/PortableProduct.h" +#include "DataFormats/PortableTestObjects/interface/TestStruct.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + namespace portabletest { + + // make the names from the top-level portabletest namespace visible for unqualified lookup + // inside the ALPAKA_ACCELERATOR_NAMESPACE::portabletest namespace + using namespace ::portabletest; + + // struct with x, y, z, id fields in device global memory + using TestDeviceProduct = PortableProduct; + + } // namespace portabletest + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif // DataFormats_PortableTestObjects_interface_alpaka_TestDeviceProduct_h diff --git a/DataFormats/SiPixelClusterSoA/BuildFile.xml b/DataFormats/SiPixelClusterSoA/BuildFile.xml new file mode 100644 index 0000000000000..cdcfd12ba7fa4 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/BuildFile.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h b/DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h new file mode 100644 index 0000000000000..efbce2a7f6647 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h @@ -0,0 +1,34 @@ +#ifndef DataFormats_SiPixelClusterSoA_ClusteringConstants_h +#define DataFormats_SiPixelClusterSoA_ClusteringConstants_h + +#include +#include +//TODO: move this to TrackerTraits! +namespace pixelClustering { +#ifdef GPU_SMALL_EVENTS + // kept for testing and debugging + constexpr uint32_t maxHitsInIter() { return 64; } +#else + // optimized for real data PU 50 + // tested on MC events with 55-75 pileup events + constexpr uint32_t maxHitsInIter() { return 160; } //TODO better tuning for PU 140-200 +#endif + constexpr uint32_t maxHitsInModule() { return 1024; } + + constexpr uint16_t clusterThresholdLayerOne = 2000; + constexpr uint16_t clusterThresholdOtherLayers = 4000; + + constexpr uint16_t clusterThresholdPhase2LayerOne = 4000; + constexpr uint16_t clusterThresholdPhase2OtherLayers = 4000; + + constexpr uint32_t maxNumDigis = 3 * 256 * 1024; // @PU=200 µ=530k σ=50k this is >4σ away + constexpr uint16_t maxNumModules = 4000; + + constexpr int32_t maxNumClustersPerModules = maxHitsInModule(); + constexpr uint16_t invalidModuleId = std::numeric_limits::max() - 1; + constexpr int invalidClusterId = -9999; + static_assert(invalidModuleId > maxNumModules); // invalidModuleId must be > maxNumModules + +} // namespace pixelClustering + +#endif // DataFormats_SiPixelClusterSoA_ClusteringConstants_h diff --git a/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h b/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h new file mode 100644 index 0000000000000..2593475bf5c3a --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h @@ -0,0 +1,38 @@ +#ifndef DataFormats_SiPixelClusterSoA_interface_SiPixelClustersDevice_h +#define DataFormats_SiPixelClusterSoA_interface_SiPixelClustersDevice_h + +#include +#include +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h" +#include "DataFormats/Portable/interface/PortableDeviceCollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" + +template +class SiPixelClustersDevice : public PortableDeviceCollection { +public: + SiPixelClustersDevice() = default; + + template + explicit SiPixelClustersDevice(size_t maxModules, TQueue queue) + : PortableDeviceCollection(maxModules + 1, queue) {} + + // Constructor which specifies the SoA size + explicit SiPixelClustersDevice(size_t maxModules, TDev const &device) + : PortableDeviceCollection(maxModules + 1, device) {} + + void setNClusters(uint32_t nClusters, int32_t offsetBPIX2) { + nClusters_h = nClusters; + offsetBPIX2_h = offsetBPIX2; + } + + uint32_t nClusters() const { return nClusters_h; } + int32_t offsetBPIX2() const { return offsetBPIX2_h; } + +private: + uint32_t nClusters_h = 0; + int32_t offsetBPIX2_h = 0; +}; + +#endif // DataFormats_SiPixelClusterSoA_interface_SiPixelClustersDevice_h diff --git a/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h b/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h new file mode 100644 index 0000000000000..eb086160a6188 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h @@ -0,0 +1,33 @@ +#ifndef DataFormats_SiPixelClusterSoA_interface_SiPixelClustersHost_h +#define DataFormats_SiPixelClusterSoA_interface_SiPixelClustersHost_h + +#include +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h" + +// TODO: The class is created via inheritance of the PortableCollection. +// This is generally discouraged, and should be done via composition. +// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 +class SiPixelClustersHost : public PortableHostCollection { +public: + SiPixelClustersHost() = default; + + template + explicit SiPixelClustersHost(size_t maxModules, TQueue queue) + : PortableHostCollection(maxModules + 1, queue) {} + + void setNClusters(uint32_t nClusters, int32_t offsetBPIX2) { + nClusters_h = nClusters; + offsetBPIX2_h = offsetBPIX2; + } + + uint32_t nClusters() const { return nClusters_h; } + int32_t offsetBPIX2() const { return offsetBPIX2_h; } + +private: + uint32_t nClusters_h = 0; + int32_t offsetBPIX2_h = 0; +}; + +#endif // DataFormats_SiPixelClusterSoA_interface_SiPixelClustersHost_h diff --git a/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h b/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h new file mode 100644 index 0000000000000..c44c0148662ff --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h @@ -0,0 +1,16 @@ +#ifndef DataFormats_SiPixelClusterSoA_SiPixelClustersLayout_h +#define DataFormats_SiPixelClusterSoA_SiPixelClustersLayout_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +GENERATE_SOA_LAYOUT(SiPixelClustersLayout, + SOA_COLUMN(uint32_t, moduleStart), + SOA_COLUMN(uint32_t, clusInModule), + SOA_COLUMN(uint32_t, moduleId), + SOA_COLUMN(uint32_t, clusModuleStart)) + +using SiPixelClustersSoA = SiPixelClustersLayout<>; +using SiPixelClustersSoAView = SiPixelClustersSoA::View; +using SiPixelClustersSoAConstView = SiPixelClustersSoA::ConstView; + +#endif // DataFormats_SiPixelClusterSoA_SiPixelClustersLayout_h diff --git a/DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersCollection.h b/DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersCollection.h new file mode 100644 index 0000000000000..71115192cb9df --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersCollection.h @@ -0,0 +1,36 @@ +#ifndef DataFormats_SiPixelClusterSoA_interface_alpaka_SiPixelClustersCollection_h +#define DataFormats_SiPixelClusterSoA_interface_alpaka_SiPixelClustersCollection_h + +#include + +#include + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { +#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + using SiPixelClustersCollection = SiPixelClustersHost; +#else + using SiPixelClustersCollection = SiPixelClustersDevice; +#endif +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace cms::alpakatools { + template <> + struct CopyToHost { + template + static auto copyAsync(TQueue &queue, ALPAKA_ACCELERATOR_NAMESPACE::SiPixelClustersCollection const &srcData) { + SiPixelClustersHost dstData(srcData->metadata().size(), queue); + alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); + dstData.setNClusters(srcData.nClusters(), srcData.offsetBPIX2()); + return dstData; + } + }; +} // namespace cms::alpakatools + +#endif // DataFormats_SiPixelClusterSoA_interface_alpaka_SiPixelClustersCollection_h diff --git a/DataFormats/SiPixelClusterSoA/src/alpaka/classes_cuda.h b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_cuda.h new file mode 100644 index 0000000000000..ee177cb60b93c --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_cuda.h @@ -0,0 +1,8 @@ +#ifndef DataFormats_SiPixelClusterSoA_src_alpaka_classes_cuda_h +#define DataFormats_SiPixelClusterSoA_src_alpaka_classes_cuda_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersCollection.h" + +#endif // DataFormats_SiPixelClusterSoA_src_alpaka_classes_cuda_h diff --git a/DataFormats/SiPixelClusterSoA/src/alpaka/classes_cuda_def.xml b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_cuda_def.xml new file mode 100644 index 0000000000000..529b72d3fe830 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_cuda_def.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/DataFormats/SiPixelClusterSoA/src/alpaka/classes_rocm.h b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_rocm.h new file mode 100644 index 0000000000000..187fc15f7012f --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_rocm.h @@ -0,0 +1,8 @@ +#ifndef DataFormats_SiPixelClusterSoA_src_alpaka_classes_rocm_h +#define DataFormats_SiPixelClusterSoA_src_alpaka_classes_rocm_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersCollection.h" + +#endif // DataFormats_SiPixelClusterSoA_src_alpaka_classes_rocm_h diff --git a/DataFormats/SiPixelClusterSoA/src/alpaka/classes_rocm_def.xml b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_rocm_def.xml new file mode 100644 index 0000000000000..a8d914519f51d --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_rocm_def.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/DataFormats/SiPixelClusterSoA/src/classes.h b/DataFormats/SiPixelClusterSoA/src/classes.h new file mode 100644 index 0000000000000..8514c7732375b --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/src/classes.h @@ -0,0 +1,7 @@ +#ifndef DataFormats_SiPixelClusterSoA_src_classes_h +#define DataFormats_SiPixelClusterSoA_src_classes_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h" + +#endif // DataFormats_SiPixelClusterSoA_src_classes_h diff --git a/DataFormats/SiPixelClusterSoA/src/classes_def.xml b/DataFormats/SiPixelClusterSoA/src/classes_def.xml new file mode 100644 index 0000000000000..57c7e7df2d83a --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/src/classes_def.xml @@ -0,0 +1,19 @@ + + + + + + ::ROOTReadStreamer(newObj, onfile.layout_); + ]]> + + + + + diff --git a/DataFormats/SiPixelClusterSoA/test/BuildFile.xml b/DataFormats/SiPixelClusterSoA/test/BuildFile.xml new file mode 100644 index 0000000000000..f99a54c6b0263 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/test/BuildFile.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.cc b/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.cc new file mode 100644 index 0000000000000..0c8634b835108 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.cc @@ -0,0 +1,46 @@ +#include +#include + +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersCollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h" + +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace testClusterSoA { + + void runKernels(SiPixelClustersSoAView clust_view, Queue& queue); + } +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +int main() { + const auto host = cms::alpakatools::host(); + const auto device = cms::alpakatools::devices()[0]; + Queue queue(device); + + // Inner scope to deallocate memory before destroying the stream + { + // Instantiate tracks on device. PortableDeviceCollection allocates + // SoA on device automatically. + SiPixelClustersCollection clusters_d(100, queue); + testClusterSoA::runKernels(clusters_d.view(), queue); + + // Instantate tracks on host. This is where the data will be + // copied to from device. + SiPixelClustersHost clusters_h(clusters_d.view().metadata().size(), queue); + + std::cout << clusters_h.view().metadata().size() << std::endl; + alpaka::memcpy(queue, clusters_h.buffer(), clusters_d.const_buffer()); + alpaka::wait(queue); + } + + return 0; +} diff --git a/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.dev.cc b/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.dev.cc new file mode 100644 index 0000000000000..dd903f3eec759 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.dev.cc @@ -0,0 +1,49 @@ +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersCollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" + +using namespace alpaka; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using namespace cms::alpakatools; + namespace testClusterSoA { + + class TestFillKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, SiPixelClustersSoAView clust_view) const { + for (int32_t j : elements_with_stride(acc, clust_view.metadata().size())) { + clust_view[j].moduleStart() = j; + clust_view[j].clusInModule() = j * 2; + clust_view[j].moduleId() = j * 3; + clust_view[j].clusModuleStart() = j * 4; + } + } + }; + + class TestVerifyKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, SiPixelClustersSoAConstView clust_view) const { + for (uint32_t j : elements_with_stride(acc, clust_view.metadata().size())) { + assert(clust_view[j].moduleStart() == j); + assert(clust_view[j].clusInModule() == j * 2); + assert(clust_view[j].moduleId() == j * 3); + assert(clust_view[j].clusModuleStart() == j * 4); + } + } + }; + + void runKernels(SiPixelClustersSoAView clust_view, Queue& queue) { + uint32_t items = 64; + uint32_t groups = divide_up_by(clust_view.metadata().size(), items); + auto workDiv = make_workdiv(groups, items); + alpaka::exec(queue, workDiv, TestFillKernel{}, clust_view); + alpaka::exec(queue, workDiv, TestVerifyKernel{}, clust_view); + } + + } // namespace testClusterSoA +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/DataFormats/SiPixelDigiSoA/BuildFile.xml b/DataFormats/SiPixelDigiSoA/BuildFile.xml new file mode 100644 index 0000000000000..02343c9974e4a --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/BuildFile.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h new file mode 100644 index 0000000000000..36c7d0be7e88a --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h @@ -0,0 +1,33 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsDevice_h +#define DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsDevice_h + +#include + +#include + +#include "DataFormats/Portable/interface/PortableDeviceCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +template +class SiPixelDigiErrorsDevice : public PortableDeviceCollection { +public: + SiPixelDigiErrorsDevice() = default; + template + explicit SiPixelDigiErrorsDevice(size_t maxFedWords, TQueue queue) + : PortableDeviceCollection(maxFedWords, queue), maxFedWords_(maxFedWords) {} + + // Constructor which specifies the SoA size + explicit SiPixelDigiErrorsDevice(size_t maxFedWords, TDev const& device) + : PortableDeviceCollection(maxFedWords, device) {} + + auto& error_data() const { return (*this->view().pixelErrors()); } + auto maxFedWords() const { return maxFedWords_; } + +private: + int maxFedWords_; +}; + +#endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsDevice_h diff --git a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h new file mode 100644 index 0000000000000..ac706dea4b544 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h @@ -0,0 +1,30 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsHost_h +#define DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsHost_h + +#include + +#include + +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +class SiPixelDigiErrorsHost : public PortableHostCollection { +public: + SiPixelDigiErrorsHost() = default; + template + explicit SiPixelDigiErrorsHost(int maxFedWords, TQueue queue) + : PortableHostCollection(maxFedWords, queue), maxFedWords_(maxFedWords) {} + + int maxFedWords() const { return maxFedWords_; } + + auto& error_data() { return (*view().pixelErrors()); } + auto const& error_data() const { return (*view().pixelErrors()); } + +private: + int maxFedWords_ = 0; +}; + +#endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsHost_h diff --git a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h new file mode 100644 index 0000000000000..b6398bc840c5b --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h @@ -0,0 +1,14 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsSoA_h +#define DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" + +GENERATE_SOA_LAYOUT(SiPixelDigiErrorsLayout, SOA_COLUMN(SiPixelErrorCompact, pixelErrors), SOA_SCALAR(uint32_t, size)) + +using SiPixelDigiErrorsSoA = SiPixelDigiErrorsLayout<>; +using SiPixelDigiErrorsSoAView = SiPixelDigiErrorsSoA::View; +using SiPixelDigiErrorsSoAConstView = SiPixelDigiErrorsSoA::ConstView; + +#endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsSoA_h diff --git a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h new file mode 100644 index 0000000000000..a949964fa445a --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h @@ -0,0 +1,37 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_SiPixelDigisDevice_h +#define DataFormats_SiPixelDigiSoA_interface_SiPixelDigisDevice_h + +#include + +#include + +#include "DataFormats/Portable/interface/PortableDeviceCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoAv2.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +template +class SiPixelDigisDevice : public PortableDeviceCollection { +public: + SiPixelDigisDevice() = default; + template + explicit SiPixelDigisDevice(size_t maxFedWords, TQueue queue) + : PortableDeviceCollection(maxFedWords + 1, queue) {} + + // Constructor which specifies the SoA size + explicit SiPixelDigisDevice(size_t maxFedWords, TDev const &device) + : PortableDeviceCollection(maxFedWords + 1, device) {} + + void setNModulesDigis(uint32_t nModules, uint32_t nDigis) { + nModules_h = nModules; + nDigis_h = nDigis; + } + + uint32_t nModules() const { return nModules_h; } + uint32_t nDigis() const { return nDigis_h; } + +private: + uint32_t nModules_h = 0; + uint32_t nDigis_h = 0; +}; + +#endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigisDevice_h diff --git a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h new file mode 100644 index 0000000000000..c2cb7e683b00b --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h @@ -0,0 +1,30 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_SiPixelDigisHost_h +#define DataFormats_SiPixelDigiSoA_interface_SiPixelDigisHost_h + +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoAv2.h" + +// TODO: The class is created via inheritance of the PortableDeviceCollection. +// This is generally discouraged, and should be done via composition. +// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 +class SiPixelDigisHost : public PortableHostCollection { +public: + SiPixelDigisHost() = default; + template + explicit SiPixelDigisHost(size_t maxFedWords, TQueue queue) + : PortableHostCollection(maxFedWords + 1, queue) {} + + void setNModulesDigis(uint32_t nModules, uint32_t nDigis) { + nModules_h = nModules; + nDigis_h = nDigis; + } + + uint32_t nModules() const { return nModules_h; } + uint32_t nDigis() const { return nDigis_h; } + +private: + uint32_t nModules_h = 0; + uint32_t nDigis_h = 0; +}; + +#endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigisHost_h diff --git a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoAv2.h b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoAv2.h new file mode 100644 index 0000000000000..c40bb38fb04b3 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoAv2.h @@ -0,0 +1,19 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_SiPixelDigisSoAv2_h +#define DataFormats_SiPixelDigiSoA_interface_SiPixelDigisSoAv2_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +GENERATE_SOA_LAYOUT(SiPixelDigisLayout, + SOA_COLUMN(int32_t, clus), + SOA_COLUMN(uint32_t, pdigi), + SOA_COLUMN(uint32_t, rawIdArr), + SOA_COLUMN(uint16_t, adc), + SOA_COLUMN(uint16_t, xx), + SOA_COLUMN(uint16_t, yy), + SOA_COLUMN(uint16_t, moduleId)) + +using SiPixelDigisSoAv2 = SiPixelDigisLayout<>; +using SiPixelDigisSoAv2View = SiPixelDigisSoAv2::View; +using SiPixelDigisSoAv2ConstView = SiPixelDigisSoAv2::ConstView; + +#endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigisSoAv2_h diff --git a/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsCollection.h b/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsCollection.h new file mode 100644 index 0000000000000..a7b0dee3e666d --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsCollection.h @@ -0,0 +1,35 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigiErrorsCollection_h +#define DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigiErrorsCollection_h + +#include + +#include + +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { +#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + using SiPixelDigiErrorsCollection = SiPixelDigiErrorsHost; +#else + using SiPixelDigiErrorsCollection = SiPixelDigiErrorsDevice; +#endif +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace cms::alpakatools { + template <> + struct CopyToHost { + template + static auto copyAsync(TQueue& queue, ALPAKA_ACCELERATOR_NAMESPACE::SiPixelDigiErrorsCollection const& srcData) { + SiPixelDigiErrorsHost dstData(srcData.maxFedWords(), queue); + alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); + + return dstData; + } + }; +} // namespace cms::alpakatools + +#endif // DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigiErrorsCollection_h diff --git a/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsUtilities.h b/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsUtilities.h new file mode 100644 index 0000000000000..aad1a671622c2 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsUtilities.h @@ -0,0 +1,29 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigiErrorsUtilities_h +#define DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigiErrorsUtilities_h + +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" + +// ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr SiPixelErrorCompactVec* error( +// SiPixelDigiErrorsSoAView& errors) { +// return (&errors.pixelErrorsVec()); +// } +// ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr SiPixelErrorCompactVec const* error( +// const SiPixelDigiErrorsSoAConstView& errors) { +// return (&errors.pixelErrorsVec()); +// } +// ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr SiPixelErrorCompact& error_data( +// SiPixelDigiErrorsSoAView& errors) { +// return (*errors.pixelErrors()); +// } +// ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr SiPixelErrorCompact const& error_data( +// const SiPixelDigiErrorsSoAConstView& errors) { +// return (*errors.pixelErrors()); +// } +// ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr SiPixelErrorCompactVec& error_vector( +// SiPixelDigiErrorsSoAView& errors) { +// return (errors.pixelErrorsVec()); +// } + +#endif // DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigiErrorsUtilities_h diff --git a/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisCollection.h b/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisCollection.h new file mode 100644 index 0000000000000..13d7217471d4e --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisCollection.h @@ -0,0 +1,36 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigisCollection_h +#define DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigisCollection_h + +#include + +#include + +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + +#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + using SiPixelDigisCollection = SiPixelDigisHost; +#else + using SiPixelDigisCollection = SiPixelDigisDevice; +#endif + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace cms::alpakatools { + template <> + struct CopyToHost { + template + static auto copyAsync(TQueue &queue, ALPAKA_ACCELERATOR_NAMESPACE::SiPixelDigisCollection const &srcData) { + SiPixelDigisHost dstData(srcData.view().metadata().size(), queue); + alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); + dstData.setNModulesDigis(srcData.nModules(), srcData.nDigis()); + return dstData; + } + }; +} // namespace cms::alpakatools + +#endif // DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigisCollection_h diff --git a/DataFormats/SiPixelDigiSoA/src/alpaka/classes_cuda.h b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_cuda.h new file mode 100644 index 0000000000000..b35504bcf8fe4 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_cuda.h @@ -0,0 +1,12 @@ +#ifndef DataFormats_SiPixelDigiSoA_Alpaka_Classes_cuda_h +#define DataFormats_SiPixelDigiSoA_Alpaka_Classes_cuda_h + +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoAv2.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisCollection.h" + +#endif // DataFormats_SiPixelDigiSoA_src_alpaka_classes_cuda_h diff --git a/DataFormats/SiPixelDigiSoA/src/alpaka/classes_cuda_def.xml b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_cuda_def.xml new file mode 100644 index 0000000000000..b4e424acf4a57 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_cuda_def.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/DataFormats/SiPixelDigiSoA/src/alpaka/classes_rocm.h b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_rocm.h new file mode 100644 index 0000000000000..5bad5c38e4255 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_rocm.h @@ -0,0 +1,13 @@ +#ifndef DataFormats_SiPixelDigiSoA_Alpaka_Classes_cuda_h +#define DataFormats_SiPixelDigiSoA_Alpaka_Classes_cuda_h + +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoAv2.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisCollection.h" + +#endif // DataFormats_SiPixelDigiSoA_src_alpaka_classes_cuda_h diff --git a/DataFormats/SiPixelDigiSoA/src/alpaka/classes_rocm_def.xml b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_rocm_def.xml new file mode 100644 index 0000000000000..a63bf5df92b26 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_rocm_def.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/DataFormats/SiPixelDigiSoA/src/classes.h b/DataFormats/SiPixelDigiSoA/src/classes.h new file mode 100644 index 0000000000000..d6a5bd2a915e8 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/src/classes.h @@ -0,0 +1,10 @@ +#ifndef DataFormats_SiPixelDigisSoA_src_classes_h +#define DataFormats_SiPixelDigisSoA_src_classes_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoAv2.h" + +#endif // DataFormats_SiPixelClusterSoA_src_classes_h diff --git a/DataFormats/SiPixelDigiSoA/src/classes_def.xml b/DataFormats/SiPixelDigiSoA/src/classes_def.xml new file mode 100644 index 0000000000000..30f613e625cb5 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/src/classes_def.xml @@ -0,0 +1,38 @@ + + + + + + ::ROOTReadStreamer(newObj, onfile.layout_); + ]]> + + + + + + + + + + + ::ROOTReadStreamer(newObj, onfile.layout_); + ]]> + + + + + diff --git a/DataFormats/SiPixelDigiSoA/test/BuildFile.xml b/DataFormats/SiPixelDigiSoA/test/BuildFile.xml new file mode 100644 index 0000000000000..48a107578a641 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/test/BuildFile.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/DataFormats/SiPixelDigiSoA/test/alpaka/DigiErrors_test.cc b/DataFormats/SiPixelDigiSoA/test/alpaka/DigiErrors_test.cc new file mode 100644 index 0000000000000..2f61b993884bd --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/test/alpaka/DigiErrors_test.cc @@ -0,0 +1,54 @@ +#include +#include + +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsCollection.h" + +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" + +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace testDigisSoA { + + void runKernels(SiPixelDigiErrorsSoAView digiErrors_view, Queue& queue); + } +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +int main() { + const auto host = cms::alpakatools::host(); + const auto device = cms::alpakatools::devices()[0]; + Queue queue(device); + + // Inner scope to deallocate memory before destroying the stream + { + // Instantiate tracks on device. PortableDeviceCollection allocates + // SoA on device automatically. + SiPixelDigiErrorsCollection digiErrors_d(1000, queue); + testDigisSoA::runKernels(digiErrors_d.view(), queue); + + // Instantate tracks on host. This is where the data will be + // copied to from device. + SiPixelDigiErrorsHost digiErrors_h(digiErrors_d.view().metadata().size(), queue); + alpaka::memcpy(queue, digiErrors_h.buffer(), digiErrors_d.const_buffer()); + std::cout << "digiErrors_h.view().metadata().size(): " << digiErrors_h.view().metadata().size() << std::endl; + std::cout << "digiErrors_h.view()[100].pixelErrors().rawId: " << digiErrors_h.view()[100].pixelErrors().rawId + << std::endl; + std::cout << "digiErrors_h.view()[100].pixelErrors().word: " << digiErrors_h.view()[100].pixelErrors().word + << std::endl; + std::cout << "digiErrors_h.view()[100].pixelErrors().errorType: " + << digiErrors_h.view()[100].pixelErrors().errorType << std::endl; + std::cout << "digiErrors_h.view()[100].pixelErrors().fedId: " << digiErrors_h.view()[100].pixelErrors().fedId + << std::endl; + alpaka::wait(queue); + } + + return 0; +} diff --git a/DataFormats/SiPixelDigiSoA/test/alpaka/DigiErrors_test.dev.cc b/DataFormats/SiPixelDigiSoA/test/alpaka/DigiErrors_test.dev.cc new file mode 100644 index 0000000000000..96825afe2a001 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/test/alpaka/DigiErrors_test.dev.cc @@ -0,0 +1,50 @@ +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" + +using namespace alpaka; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using namespace cms::alpakatools; + namespace testDigisSoA { + + class TestFillKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, SiPixelDigiErrorsSoAView digiErrors_view) const { + for (uint32_t j : elements_with_stride(acc, digiErrors_view.metadata().size())) { + digiErrors_view[j].pixelErrors().rawId = j; + digiErrors_view[j].pixelErrors().word = j; + digiErrors_view[j].pixelErrors().errorType = j; + digiErrors_view[j].pixelErrors().fedId = j; + } + } + }; + + class TestVerifyKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, SiPixelDigiErrorsSoAConstView digiErrors_view) const { + for (uint32_t j : elements_with_stride(acc, digiErrors_view.metadata().size())) { + assert(digiErrors_view[j].pixelErrors().rawId == j); + assert(digiErrors_view[j].pixelErrors().word == j); + assert(digiErrors_view[j].pixelErrors().errorType == j % 256); + assert(digiErrors_view[j].pixelErrors().fedId == j % 256); + } + } + }; + + void runKernels(SiPixelDigiErrorsSoAView digiErrors_view, Queue& queue) { + uint32_t items = 64; + uint32_t groups = divide_up_by(digiErrors_view.metadata().size(), items); + auto workDiv = make_workdiv(groups, items); + alpaka::exec(queue, workDiv, TestFillKernel{}, digiErrors_view); + alpaka::exec(queue, workDiv, TestVerifyKernel{}, digiErrors_view); + } + + } // namespace testDigisSoA +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/DataFormats/SiPixelDigiSoA/test/alpaka/Digis_test.cc b/DataFormats/SiPixelDigiSoA/test/alpaka/Digis_test.cc new file mode 100644 index 0000000000000..61f5756adf213 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/test/alpaka/Digis_test.cc @@ -0,0 +1,48 @@ +#include + +#include + +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoAv2.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace testDigisSoA { + + void runKernels(SiPixelDigisSoAv2View digis_view, Queue& queue); + + } +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +int main() { + const auto host = cms::alpakatools::host(); + const auto device = cms::alpakatools::devices()[0]; + Queue queue(device); + + // Inner scope to deallocate memory before destroying the stream + { + // Instantiate tracks on device. PortableDeviceCollection allocates + // SoA on device automatically. + SiPixelDigisCollection digis_d(1000, queue); + testDigisSoA::runKernels(digis_d.view(), queue); + + // Instantate tracks on host. This is where the data will be + // copied to from device. + SiPixelDigisHost digis_h(digis_d.view().metadata().size(), queue); + + std::cout << digis_h.view().metadata().size() << std::endl; + alpaka::memcpy(queue, digis_h.buffer(), digis_d.const_buffer()); + alpaka::wait(queue); + } + + return 0; +} diff --git a/DataFormats/SiPixelDigiSoA/test/alpaka/Digis_test.dev.cc b/DataFormats/SiPixelDigiSoA/test/alpaka/Digis_test.dev.cc new file mode 100644 index 0000000000000..97026b76f938f --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/test/alpaka/Digis_test.dev.cc @@ -0,0 +1,49 @@ +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" + +using namespace alpaka; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using namespace cms::alpakatools; + namespace testDigisSoA { + + class TestFillKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, SiPixelDigisSoAv2View digi_view) const { + for (int32_t j : elements_with_stride(acc, digi_view.metadata().size())) { + digi_view[j].clus() = j; + digi_view[j].rawIdArr() = j * 2; + digi_view[j].xx() = j * 3; + digi_view[j].moduleId() = j * 4; + } + } + }; + + class TestVerifyKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, SiPixelDigisSoAv2ConstView digi_view) const { + for (uint32_t j : elements_with_stride(acc, digi_view.metadata().size())) { + assert(digi_view[j].clus() == int(j)); + assert(digi_view[j].rawIdArr() == j * 2); + assert(digi_view[j].xx() == j * 3); + assert(digi_view[j].moduleId() == j * 4); + } + } + }; + + void runKernels(SiPixelDigisSoAv2View digi_view, Queue& queue) { + uint32_t items = 64; + uint32_t groups = divide_up_by(digi_view.metadata().size(), items); + auto workDiv = make_workdiv(groups, items); + alpaka::exec(queue, workDiv, TestFillKernel{}, digi_view); + alpaka::exec(queue, workDiv, TestVerifyKernel{}, digi_view); + } + + } // namespace testDigisSoA +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/DataFormats/SiPixelRawData/src/classes.h b/DataFormats/SiPixelRawData/src/classes.h index 7a07e9f35f388..9adc3a440e27b 100644 --- a/DataFormats/SiPixelRawData/src/classes.h +++ b/DataFormats/SiPixelRawData/src/classes.h @@ -1,10 +1,13 @@ #ifndef SIPIXELRAWDATA_CLASSES_H #define SIPIXELRAWDATA_CLASSES_H -#include "DataFormats/SiPixelRawData/interface/SiPixelRawDataError.h" -#include "DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h" -#include "DataFormats/Common/interface/Wrapper.h" -#include "DataFormats/Common/interface/DetSetVector.h" #include +#include "DataFormats/Common/interface/DetSetVector.h" +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelRawDataError.h" + #endif // SIPIXELRAWDATA_CLASSES_H diff --git a/DataFormats/SiPixelRawData/src/classes_def.xml b/DataFormats/SiPixelRawData/src/classes_def.xml index fd2b5dcf27965..d895f0af1dce3 100644 --- a/DataFormats/SiPixelRawData/src/classes_def.xml +++ b/DataFormats/SiPixelRawData/src/classes_def.xml @@ -14,7 +14,13 @@ + + + + + + diff --git a/DataFormats/TrackSoA/BuildFile.xml b/DataFormats/TrackSoA/BuildFile.xml new file mode 100644 index 0000000000000..5ee03bbcbbf1b --- /dev/null +++ b/DataFormats/TrackSoA/BuildFile.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/DataFormats/TrackSoA/README.md b/DataFormats/TrackSoA/README.md new file mode 100644 index 0000000000000..fe31bd0acfa92 --- /dev/null +++ b/DataFormats/TrackSoA/README.md @@ -0,0 +1,66 @@ +# TrackSoA Data Formats + +`DataFormat`s meant to be used on Host (CPU) or Device ( GPU) for +storing information about `TrackSoA`s created during the Pixel-local Reconstruction +chain. It stores data in an SoA manner. It combines the data contained in the +deprecated `TrackSoAHeterogeneousT` and `TrajectoryStateSoAT` classes. + +The host format is inheriting from `DataFormats/Portable/interface/PortableHostCollection.h`, +while the device format is inheriting from `DataFormats/Portable/interface/PortableDeviceCollection.h` + +Both formats use the same SoA Layout (`TrackSoAHeterogeneousLayout`) which is generated +via the `GENERATE_SOA_LAYOUT` macro in the `TrackDefinitions.h` file. + +## Notes + +-`hitIndices` and `detIndices`, instances of `HitContainer`, have been added into the +layout as `SOA_SCALAR`s, meaning that they manage their own data independently from the SoA +`Layout`. This could be improved in the future, if `HitContainer` (aka a `OneToManyAssoc` of fixed size) +is replaced, but there don't seem to be any conflicts in including it in the `Layout` like this. +- Host and Device classes should **not** be created via inheritance, as they're done here, +but via composition. See [this discussion](https://github.com/cms-sw/cmssw/pull/40465#discussion_r1066039309). + +## TrackSoAHost + +The version of the data format to be used for storing `TrackSoA` data on the CPU. +Instances of this class are to be used for: + +- Having a place to copy data to host from device, via `Memcpy`, or +- Running host-side algorithms using data stored in an SoA manner. + +## TrackSoADevice + +The version of the data format to be used for storing `TrackSoA` data on the GPU. + +Instances of `TrackSoADevice` are to be created on host and be +used on device only. To do so, the instance's `view()` method is to be called +to pass a `View` to any kernel launched. Accessing data from the `view()` is not +possible on the host side. + +## TrackSoACollection + +Depending on the Alpaka accelerator back-end enabled, `TrackSoACollection` wraps either the Host or Device SoA: + +```cpp + +#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + template + using TrackSoACollection = TrackSoAHost; +#else + template + using TrackSoACollection = TrackSoADevice; +#endif + +``` + +## Utilities + +`alpaka/TrackUtilities.h` contains a collection of methods which were originally +defined as class methods inside either `TrackSoAHeterogeneousT` and `TrajectoryStateSoAT` +which have been adapted to operate on `View` instances, so that they are callable +from within `__global__` kernels, on both CPU and CPU. + +## Use case + +See `test/TrackSoAHeterogeneous_test.cpp` for a simple example of instantiation, +processing and copying from device to host. diff --git a/DataFormats/TrackSoA/interface/TrackDefinitions.h b/DataFormats/TrackSoA/interface/TrackDefinitions.h new file mode 100644 index 0000000000000..b9e36b7fd33c4 --- /dev/null +++ b/DataFormats/TrackSoA/interface/TrackDefinitions.h @@ -0,0 +1,31 @@ +#ifndef DataFormats_Track_interface_TrackDefinitions_h +#define DataFormats_Track_interface_TrackDefinitions_h +#include +#include +#include + +namespace pixelTrack { + + enum class Quality : uint8_t { bad = 0, edup, dup, loose, strict, tight, highPurity, notQuality }; + constexpr uint32_t qualitySize{uint8_t(Quality::notQuality)}; + constexpr std::string_view qualityName[qualitySize]{"bad", "edup", "dup", "loose", "strict", "tight", "highPurity"}; + inline Quality qualityByName(std::string const &name) { + auto qp = std::find(qualityName, qualityName + qualitySize, name) - qualityName; + auto ret = static_cast(qp); + if (ret == pixelTrack::Quality::notQuality) + throw std::invalid_argument(name + "is not a pixelTrack::Quality!"); + + return ret; + } + +#ifdef GPU_SMALL_EVENTS + // kept for testing and debugging + constexpr uint32_t maxNumber() { return 2 * 1024; } +#else + // tested on MC events with 55-75 pileup events + constexpr uint32_t maxNumber() { return 32 * 1024; } +#endif + +} // namespace pixelTrack + +#endif diff --git a/DataFormats/TrackSoA/interface/TrackLayout.h b/DataFormats/TrackSoA/interface/TrackLayout.h new file mode 100644 index 0000000000000..14a303d62918a --- /dev/null +++ b/DataFormats/TrackSoA/interface/TrackLayout.h @@ -0,0 +1,52 @@ +#ifndef DataFormats_Track_interface_TrackLayout_h +#define DataFormats_Track_interface_TrackLayout_h + +#include +#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/TrackSoA/interface/TrackDefinitions.h" + +template +struct TrackSoA { + static constexpr int32_t S = TrackerTraits::maxNumberOfTuples; + static constexpr int32_t H = TrackerTraits::avgHitsPerTrack; + // Aliases in order to not confuse the GENERATE_SOA_LAYOUT + // macro with weird colons and angled brackets. + using Vector5f = Eigen::Matrix; + using Vector15f = Eigen::Matrix; + using Quality = pixelTrack::Quality; + + using hindex_type = uint32_t; + + using HitContainer = cms::alpakatools::OneToManyAssoc; + + GENERATE_SOA_LAYOUT(TrackSoAHeterogeneousLayout, + SOA_COLUMN(Quality, quality), + SOA_COLUMN(float, chi2), + SOA_COLUMN(int8_t, nLayers), + SOA_COLUMN(float, eta), + SOA_COLUMN(float, pt), + SOA_EIGEN_COLUMN(Vector5f, state), + SOA_EIGEN_COLUMN(Vector15f, covariance), + SOA_SCALAR(int, nTracks), + SOA_SCALAR(HitContainer, hitIndices), + SOA_SCALAR(HitContainer, detIndices)) +}; + +template +using TrackLayout = typename TrackSoA::template TrackSoAHeterogeneousLayout<>; +template +using TrackSoAView = typename TrackSoA::template TrackSoAHeterogeneousLayout<>::View; +template +using TrackSoAConstView = typename TrackSoA::template TrackSoAHeterogeneousLayout<>::ConstView; + + // State at the Beam spot + // phi,tip,1/pt,cotan(theta),zip +// template +// ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr float charge(const TrackSoAConstView& tracks, int32_t i) { +// float v = tracks[i].state()(2); +// return float((0.0f < v) - (v < 0.0f)); +// } + +#endif diff --git a/DataFormats/TrackSoA/interface/TrackSoADevice.h b/DataFormats/TrackSoA/interface/TrackSoADevice.h new file mode 100644 index 0000000000000..f2e66a4c95e1d --- /dev/null +++ b/DataFormats/TrackSoA/interface/TrackSoADevice.h @@ -0,0 +1,40 @@ +#ifndef DataFormats_Track_interface_TrackSoADevice_h +#define DataFormats_Track_interface_TrackSoADevice_h + +#include +#include +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "DataFormats/TrackSoA/interface/TrackLayout.h" +#include "DataFormats/TrackSoA/interface/TrackDefinitions.h" +#include "DataFormats/Portable/interface/PortableDeviceCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" + +// TODO: The class is created via inheritance of the PortableCollection. +// This is generally discouraged, and should be done via composition. +// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 +template +class TrackSoADevice : public PortableDeviceCollection, TDev> { +public: + static constexpr int32_t S = TrackerTraits::maxNumberOfTuples; //TODO: this could be made configurable at runtime + TrackSoADevice() = default; // necessary for ROOT dictionaries + + using PortableDeviceCollection, TDev>::view; + using PortableDeviceCollection, TDev>::const_view; + using PortableDeviceCollection, TDev>::buffer; + + // Constructor which specifies the SoA size + template + explicit TrackSoADevice(TQueue queue) + : PortableDeviceCollection, TDev>(S, queue) {} +}; + +namespace pixelTrack { + + template + using TrackSoADevicePhase1 = TrackSoADevice; + template + using TrackSoADevicePhase2 = TrackSoADevice; + +} // namespace pixelTrack + +#endif // DataFormats_Track_TrackSoADevice_H diff --git a/DataFormats/TrackSoA/interface/TrackSoAHost.h b/DataFormats/TrackSoA/interface/TrackSoAHost.h new file mode 100644 index 0000000000000..660334997caaa --- /dev/null +++ b/DataFormats/TrackSoA/interface/TrackSoAHost.h @@ -0,0 +1,42 @@ +#ifndef DataFormats_Track_TrackSoAHost_H +#define DataFormats_Track_TrackSoAHost_H + +#include +#include +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "DataFormats/TrackSoA/interface/TrackLayout.h" +#include "DataFormats/TrackSoA/interface/TrackDefinitions.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +// TODO: The class is created via inheritance of the PortableHostCollection. +// This is generally discouraged, and should be done via composition. +// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 +template +class TrackSoAHost : public PortableHostCollection> { +public: + static constexpr int32_t S = TrackerTraits::maxNumberOfTuples; //TODO: this could be made configurable at runtime + TrackSoAHost() = default; // Needed for the dictionary; not sure if line above is needed anymore + + using PortableHostCollection>::view; + using PortableHostCollection>::const_view; + using PortableHostCollection>::buffer; + + // Constructor which specifies the SoA size + template + explicit TrackSoAHost(TQueue queue) : PortableHostCollection>(S, queue) {} + + // Constructor which specifies the DevHost + explicit TrackSoAHost(alpaka_common::DevHost const& host) + : PortableHostCollection>(S, host) {} +}; + +namespace pixelTrack { + + using TrackSoAHostPhase1 = TrackSoAHost; + using TrackSoAHostPhase2 = TrackSoAHost; + using TrackSoAHostHIonPhase1 = TrackSoAHost; + +} // namespace pixelTrack + +#endif // DataFormats_Track_TrackSoAHost_H diff --git a/DataFormats/TrackSoA/interface/alpaka/TrackSoACollection.h b/DataFormats/TrackSoA/interface/alpaka/TrackSoACollection.h new file mode 100644 index 0000000000000..19d1fd1af2188 --- /dev/null +++ b/DataFormats/TrackSoA/interface/alpaka/TrackSoACollection.h @@ -0,0 +1,46 @@ +#ifndef DataFormats_Track_interface_alpaka_TrackSoACollection_h +#define DataFormats_Track_interface_alpaka_TrackSoACollection_h + +#include +#include +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "DataFormats/TrackSoA/interface/TrackLayout.h" +#include "DataFormats/TrackSoA/interface/TrackDefinitions.h" +#include "DataFormats/TrackSoA/interface/TrackSoAHost.h" +#include "DataFormats/TrackSoA/interface/TrackSoADevice.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" + +// TODO: The class is created via inheritance of the PortableCollection. +// This is generally discouraged, and should be done via composition. +// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 +namespace ALPAKA_ACCELERATOR_NAMESPACE { +#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + template + using TrackSoACollection = TrackSoAHost; +#else + template + using TrackSoACollection = TrackSoADevice; +#endif + //Classes definition for Phase1/Phase2/HIonPhase1, to make the classes_def lighter. Not actually used in the code. + namespace pixelTrack { + using TrackSoACollectionPhase1 = TrackSoACollection; + using TrackSoACollectionPhase2 = TrackSoACollection; + using TrackSoACollectionHIonPhase1 = TrackSoACollection; + } // namespace pixelTrack +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace cms::alpakatools { + template + struct CopyToHost> { + template + static auto copyAsync(TQueue& queue, + ALPAKA_ACCELERATOR_NAMESPACE::TrackSoACollection const& deviceData) { + ::TrackSoAHost hostData(queue); + alpaka::memcpy(queue, hostData.buffer(), deviceData.buffer()); + return hostData; + } + }; +} // namespace cms::alpakatools + +#endif // DataFormats_Track_interface_alpaka_TrackSoACollection_h diff --git a/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h b/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h new file mode 100644 index 0000000000000..5751f7857dbe5 --- /dev/null +++ b/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h @@ -0,0 +1,197 @@ +#ifndef DataFormats_Track_interface_alpaka_TrackUtilities_h +#define DataFormats_Track_interface_alpaka_TrackUtilities_h + +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "DataFormats/TrackSoA/interface/TrackDefinitions.h" +#include "DataFormats/TrackSoA/interface/TrackLayout.h" + +// Methods that operate on View and ConstView of the TrackSoA, and cannot be class methods. +template +struct TracksUtilities { + using TrackSoAView = typename TrackSoA::template TrackSoAHeterogeneousLayout<>::View; + using TrackSoAConstView = typename TrackSoA::template TrackSoAHeterogeneousLayout<>::ConstView; + using hindex_type = typename TrackSoA::hindex_type; + + // State at the Beam spot + // phi,tip,1/pt,cotan(theta),zip + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr float charge(const TrackSoAConstView &tracks, int32_t i) { + //was: std::copysign(1.f, tracks[i].state()(2)). Will be constexpr with C++23 + float v = tracks[i].state()(2); + return float((0.0f < v) - (v < 0.0f)); + } + + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr float phi(const TrackSoAConstView &tracks, int32_t i) { + return tracks[i].state()(0); + } + + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr float tip(const TrackSoAConstView &tracks, int32_t i) { + return tracks[i].state()(1); + } + + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr float zip(const TrackSoAConstView &tracks, int32_t i) { + return tracks[i].state()(4); + } + + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr bool isTriplet(const TrackSoAConstView &tracks, int i) { + return tracks[i].nLayers() == 3; + } + + template + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr void copyFromCircle( + TrackSoAView &tracks, V3 const &cp, M3 const &ccov, V2 const &lp, M2 const &lcov, float b, int32_t i) { + tracks[i].state() << cp.template cast(), lp.template cast(); + + tracks[i].state()(2) = tracks[i].state()(2) * b; + auto cov = tracks[i].covariance(); + cov(0) = ccov(0, 0); + cov(1) = ccov(0, 1); + cov(2) = b * float(ccov(0, 2)); + cov(4) = cov(3) = 0; + cov(5) = ccov(1, 1); + cov(6) = b * float(ccov(1, 2)); + cov(8) = cov(7) = 0; + cov(9) = b * b * float(ccov(2, 2)); + cov(11) = cov(10) = 0; + cov(12) = lcov(0, 0); + cov(13) = lcov(0, 1); + cov(14) = lcov(1, 1); + } + + template + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr void copyFromDense(TrackSoAView &tracks, + V5 const &v, + M5 const &cov, + int32_t i) { + tracks[i].state() = v.template cast(); + for (int j = 0, ind = 0; j < 5; ++j) + for (auto k = j; k < 5; ++k) + tracks[i].covariance()(ind++) = cov(j, k); + } + + template + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr void copyToDense(const TrackSoAConstView &tracks, + V5 &v, + M5 &cov, + int32_t i) { + v = tracks[i].state().template cast(); + for (int j = 0, ind = 0; j < 5; ++j) { + cov(j, j) = tracks[i].covariance()(ind++); + for (auto k = j + 1; k < 5; ++k) + cov(k, j) = cov(j, k) = tracks[i].covariance()(ind++); + } + } + + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr int computeNumberOfLayers(const TrackSoAConstView &tracks, + int32_t i) { + auto pdet = tracks.detIndices().begin(i); + int nl = 1; + auto ol = pixelTopology::getLayer(*pdet); + for (; pdet < tracks.detIndices().end(i); ++pdet) { + auto il = pixelTopology::getLayer(*pdet); + if (il != ol) + ++nl; + ol = il; + } + return nl; + } + + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr int nHits(const TrackSoAConstView &tracks, int i) { + return tracks.detIndices().size(i); + } +}; + +namespace pixelTrack { + + template + struct QualityCutsT {}; + + template + struct QualityCutsT> { + using TrackSoAView = typename TrackSoA::template TrackSoAHeterogeneousLayout<>::View; + using TrackSoAConstView = typename TrackSoA::template TrackSoAHeterogeneousLayout<>::ConstView; + using tracksHelper = TracksUtilities; + float chi2Coeff[4]; + float chi2MaxPt; // GeV + float chi2Scale; + + struct Region { + float maxTip; // cm + float minPt; // GeV + float maxZip; // cm + }; + + Region triplet; + Region quadruplet; + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isHP(const TrackSoAConstView &tracks, int nHits, int it) const { + // impose "region cuts" based on the fit results (phi, Tip, pt, cotan(theta)), Zip) + // default cuts: + // - for triplets: |Tip| < 0.3 cm, pT > 0.5 GeV, |Zip| < 12.0 cm + // - for quadruplets: |Tip| < 0.5 cm, pT > 0.3 GeV, |Zip| < 12.0 cm + // (see CAHitNtupletGeneratorGPU.cc) + auto const ®ion = (nHits > 3) ? quadruplet : triplet; + return (std::abs(tracksHelper::tip(tracks, it)) < region.maxTip) and (tracks.pt(it) > region.minPt) and + (std::abs(tracksHelper::zip(tracks, it)) < region.maxZip); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool strictCut(const TrackSoAConstView &tracks, int it) const { + auto roughLog = [](float x) { + // max diff [0.5,12] at 1.25 0.16143 + // average diff 0.0662998 + union IF { + uint32_t i; + float f; + }; + IF z; + z.f = x; + uint32_t lsb = 1 < 21; + z.i += lsb; + z.i >>= 21; + auto f = z.i & 3; + int ex = int(z.i >> 2) - 127; + + // log2(1+0.25*f) + // averaged over bins + const float frac[4] = {0.160497f, 0.452172f, 0.694562f, 0.901964f}; + return float(ex) + frac[f]; + }; + + float pt = std::min(tracks.pt(it), chi2MaxPt); + float chi2Cut = chi2Scale * (chi2Coeff[0] + roughLog(pt) * chi2Coeff[1]); + if (tracks.chi2(it) >= chi2Cut) { +#ifdef NTUPLE_FIT_DEBUG + printf("Bad chi2 %d pt %f eta %f chi2 %f\n", it, tracks.pt(it), tracks.eta(it), tracks.chi2(it)); +#endif + return true; + } + return false; + } + }; + + template + struct QualityCutsT> { + using TrackSoAView = typename TrackSoA::template TrackSoAHeterogeneousLayout<>::View; + using TrackSoAConstView = typename TrackSoA::template TrackSoAHeterogeneousLayout<>::ConstView; + using tracksHelper = TracksUtilities; + + float maxChi2; + float minPt; + float maxTip; + float maxZip; + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isHP(const TrackSoAConstView &tracks, int nHits, int it) const { + return (std::abs(tracksHelper::tip(tracks, it)) < maxTip) and (tracks.pt(it) > minPt) and + (std::abs(tracksHelper::zip(tracks, it)) < maxZip); + } + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool strictCut(const TrackSoAConstView &tracks, int it) const { + return tracks.chi2(it) >= maxChi2; + } + }; + +} // namespace pixelTrack + +// TODO: Should those be placed in the ALPAKA_ACCELERATOR_NAMESPACE +template struct TracksUtilities; +template struct TracksUtilities; + +#endif diff --git a/DataFormats/TrackSoA/src/alpaka/classes_cuda.h b/DataFormats/TrackSoA/src/alpaka/classes_cuda.h new file mode 100644 index 0000000000000..0212d576ec101 --- /dev/null +++ b/DataFormats/TrackSoA/src/alpaka/classes_cuda.h @@ -0,0 +1,14 @@ + +#ifndef DataFormats_Track_src_alpaka_classes_cuda_h +#define DataFormats_Track_src_alpaka_classes_cuda_h + +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/TrackSoA/interface/TrackLayout.h" +#include "DataFormats/TrackSoA/interface/alpaka/TrackSoACollection.h" +#include "DataFormats/TrackSoA/interface/TrackSoADevice.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +using namespace pixelTopology; + +#endif // DataFormats_Track_src_alpaka_classes_cuda_h diff --git a/DataFormats/TrackSoA/src/alpaka/classes_cuda_def.xml b/DataFormats/TrackSoA/src/alpaka/classes_cuda_def.xml new file mode 100644 index 0000000000000..1406d55e4bd6e --- /dev/null +++ b/DataFormats/TrackSoA/src/alpaka/classes_cuda_def.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/DataFormats/TrackSoA/src/alpaka/classes_rocm.h b/DataFormats/TrackSoA/src/alpaka/classes_rocm.h new file mode 100644 index 0000000000000..0f59bb8fff16a --- /dev/null +++ b/DataFormats/TrackSoA/src/alpaka/classes_rocm.h @@ -0,0 +1,14 @@ + +#ifndef DataFormats_Track_src_alpaka_classes_rocm_h +#define DataFormats_Track_src_alpaka_classes_rocm_h + +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/TrackSoA/interface/TrackLayout.h" +#include "DataFormats/TrackSoA/interface/alpaka/TrackSoACollection.h" +#include "DataFormats/TrackSoA/interface/TrackSoADevice.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +using namespace pixelTopology; + +#endif // DataFormats_Track_src_alpaka_classes_rocm_h diff --git a/DataFormats/TrackSoA/src/alpaka/classes_rocm_def.xml b/DataFormats/TrackSoA/src/alpaka/classes_rocm_def.xml new file mode 100644 index 0000000000000..8ccf1f761a745 --- /dev/null +++ b/DataFormats/TrackSoA/src/alpaka/classes_rocm_def.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/DataFormats/TrackSoA/src/classes.h b/DataFormats/TrackSoA/src/classes.h new file mode 100644 index 0000000000000..c12ec31890edb --- /dev/null +++ b/DataFormats/TrackSoA/src/classes.h @@ -0,0 +1,11 @@ +#ifndef DataFormats_Track_src_classes_h +#define DataFormats_Track_src_classes_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/TrackSoA/interface/TrackLayout.h" +#include "DataFormats/TrackSoA/interface/TrackSoAHost.h" + +using namespace pixelTopology; +using namespace pixelTrack; + +#endif // DataFormats_Track_src_classes_h diff --git a/DataFormats/TrackSoA/src/classes_def.xml b/DataFormats/TrackSoA/src/classes_def.xml new file mode 100644 index 0000000000000..592458cb82c3d --- /dev/null +++ b/DataFormats/TrackSoA/src/classes_def.xml @@ -0,0 +1,63 @@ + + + + + + + + + + + + + + + + + + + + >::ROOTReadStreamer(newObj, onfile.layout_); + ]]> + + + + + + + >::ROOTReadStreamer(newObj, onfile.layout_); + ]]> + + + + + + + >::ROOTReadStreamer(newObj, onfile.layout_); + ]]> + + + + + diff --git a/DataFormats/TrackSoA/test/BuildFile.xml b/DataFormats/TrackSoA/test/BuildFile.xml new file mode 100644 index 0000000000000..ce2b273d90577 --- /dev/null +++ b/DataFormats/TrackSoA/test/BuildFile.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.cc b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.cc new file mode 100644 index 0000000000000..8b7565200c52d --- /dev/null +++ b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.cc @@ -0,0 +1,81 @@ +/** + Simple test for the pixelTrack::TrackSoA data structure + which inherits from PortableDeviceCollection. + + Creates an instance of the class (automatically allocates + memory on device), passes the view of the SoA data to + the CUDA kernels which: + - Fill the SoA with data. + - Verify that the data written is correct. + + Then, the SoA data are copied back to Host, where + a temporary host-side view (tmp_view) is created using + the same Layout to access the data on host and print it. + */ + +#include +#include +#include "DataFormats/TrackSoA/interface/alpaka/TrackSoACollection.h" +#include "DataFormats/TrackSoA/interface/TrackSoADevice.h" +#include "DataFormats/TrackSoA/interface/TrackSoAHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +using namespace std; +using namespace ALPAKA_ACCELERATOR_NAMESPACE; +using namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelTrack; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace testTrackSoA { + + template + void runKernels(TrackSoAView tracks_view, Queue& queue); + } +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +int main() { + const auto host = cms::alpakatools::host(); + const auto device = cms::alpakatools::devices()[0]; + Queue queue(device); + + // Inner scope to deallocate memory before destroying the stream + { + // Instantiate tracks on device. PortableDeviceCollection allocates + // SoA on device automatically. + TrackSoACollection tracks_d(queue); + testTrackSoA::runKernels(tracks_d.view(), queue); + + // Instantate tracks on host. This is where the data will be + // copied to from device. + TrackSoAHost tracks_h(queue); + + std::cout << tracks_h.view().metadata().size() << std::endl; + alpaka::memcpy(queue, tracks_h.buffer(), tracks_d.const_buffer()); + alpaka::wait(queue); + + // Print results + std::cout << "pt" + << "\t" + << "eta" + << "\t" + << "chi2" + << "\t" + << "quality" + << "\t" + << "nLayers" + << "\t" + << "hitIndices off" << std::endl; + + for (int i = 0; i < 10; ++i) { + std::cout << tracks_h.view()[i].pt() << "\t" << tracks_h.view()[i].eta() << "\t" << tracks_h.view()[i].chi2() + << "\t" << (int)tracks_h.view()[i].quality() << "\t" << (int)tracks_h.view()[i].nLayers() << "\t" + << tracks_h.view().hitIndices().off[i] << std::endl; + } + } + + return 0; +} diff --git a/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc new file mode 100644 index 0000000000000..dcecbcabde26e --- /dev/null +++ b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc @@ -0,0 +1,73 @@ +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "DataFormats/TrackSoA/interface/TrackDefinitions.h" +#include "DataFormats/TrackSoA/interface/alpaka/TrackSoACollection.h" +#include "DataFormats/TrackSoA/interface/TrackSoADevice.h" +#include "DataFormats/TrackSoA/interface/TrackSoAHost.h" + +using Quality = pixelTrack::Quality; +namespace ALPAKA_ACCELERATOR_NAMESPACE { + using namespace cms::alpakatools; + namespace testTrackSoA { + + // Kernel which fills the TrackSoAView with data + // to test writing to it + template + class TestFillKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, TrackSoAView tracks_view) const { + const int32_t i = alpaka::getIdx(acc)[0u]; + + if (i == 0) { + tracks_view.nTracks() = 420; + } + + for (int32_t j : elements_with_stride(acc, tracks_view.metadata().size())) { + tracks_view[j].pt() = (float)j; + tracks_view[j].eta() = (float)j; + tracks_view[j].chi2() = (float)j; + tracks_view[j].quality() = (Quality)(j % 256); + tracks_view[j].nLayers() = j % 128; + tracks_view.hitIndices().off[j] = j; + } + } + }; + + // Kernel which reads from the TrackSoAView to verify + // that it was written correctly from the fill kernel + template + class TestVerifyKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, TrackSoAConstView tracks_view) const { + const int32_t i = alpaka::getIdx(acc)[0u]; + + if (i == 0) { + ALPAKA_ASSERT_OFFLOAD(tracks_view.nTracks() == 420); + } + for (int32_t j : elements_with_stride(acc, tracks_view.metadata().size())) { + assert(abs(tracks_view[j].pt() - (float)j) < .0001); + assert(abs(tracks_view[j].eta() - (float)j) < .0001); + assert(abs(tracks_view[j].chi2() - (float)j) < .0001); + assert(tracks_view[j].quality() == (Quality)(j % 256)); + assert(tracks_view[j].nLayers() == j % 128); + assert(tracks_view.hitIndices().off[j] == j); + } + } + }; + + // Host function which invokes the two kernels above + template + void runKernels(TrackSoAView tracks_view, Queue& queue) { + uint32_t items = 64; + uint32_t groups = divide_up_by(tracks_view.metadata().size(), items); + auto workDiv = make_workdiv(groups, items); + alpaka::exec(queue, workDiv, TestFillKernel{}, tracks_view); + //alpaka::exec(queue, workDiv, TestVerifyKernel{}, tracks_view); //TODO: wait for some PR that solves this and then check it!!! + } + + template void runKernels(TrackSoAView tracks_view, Queue& queue); + template void runKernels(TrackSoAView tracks_view, Queue& queue); + + } // namespace testTrackSoA +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/DataFormats/TrackingRecHitSoA/BuildFile.xml b/DataFormats/TrackingRecHitSoA/BuildFile.xml new file mode 100644 index 0000000000000..d7c9dcbbfb86f --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/BuildFile.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h b/DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h new file mode 100644 index 0000000000000..06205906d8d2f --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h @@ -0,0 +1,20 @@ +#ifndef DataFormats_TrackingRecHitSoA_SiPixelHitStatus_H +#define DataFormats_TrackingRecHitSoA_SiPixelHitStatus_H + +#include + +// more information on bit fields : https://en.cppreference.com/w/cpp/language/bit_field +struct SiPixelHitStatus { + bool isBigX : 1; // ∈[0,1] + bool isOneX : 1; // ∈[0,1] + bool isBigY : 1; // ∈[0,1] + bool isOneY : 1; // ∈[0,1] + uint8_t qBin : 3; // ∈[0,1,...,7] +}; + +struct SiPixelHitStatusAndCharge { + SiPixelHitStatus status; + uint32_t charge : 24; +}; + +#endif diff --git a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitSoADevice.h b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitSoADevice.h new file mode 100644 index 0000000000000..451d0a4a54054 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitSoADevice.h @@ -0,0 +1,43 @@ +#ifndef DataFormats_RecHits_interface_TrackingRecHitSoADevice_h +#define DataFormats_RecHits_interface_TrackingRecHitSoADevice_h + +#include + +#include + +#include "DataFormats/Portable/interface/PortableDeviceCollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitSoAHost.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +template +class TrackingRecHitDevice : public PortableDeviceCollection, TDev> { +public: + using hitSoA = TrackingRecHitAlpakaSoA; + //Need to decorate the class with the inherited portable accessors being now a template + using PortableDeviceCollection, TDev>::view; + using PortableDeviceCollection, TDev>::const_view; + using PortableDeviceCollection, TDev>::buffer; + + TrackingRecHitDevice() = default; + + // Constructor which specifies the SoA size + template + explicit TrackingRecHitDevice(uint32_t nHits, int32_t offsetBPIX2, uint32_t const* hitsModuleStart, TQueue queue) + : PortableDeviceCollection, TDev>(nHits, queue) { + const auto device = alpaka::getDev(queue); + + auto start_h = cms::alpakatools::make_host_view(hitsModuleStart, TrackerTraits::numberOfModules + 1); + auto start_d = + cms::alpakatools::make_device_view(device, view().hitsModuleStart().data(), TrackerTraits::numberOfModules + 1); + alpaka::memcpy(queue, start_d, start_h); + + auto off_h = cms::alpakatools::make_host_view(offsetBPIX2); + auto off_d = cms::alpakatools::make_device_view(device, view().offsetBPIX2()); + alpaka::memcpy(queue, off_d, off_h); + } + + uint32_t nHits() const { return view().metadata().size(); } + uint32_t const* hitsModuleStart() const { return view().hitsModuleStart().data(); } +}; +#endif // DataFormats_RecHits_interface_TrackingRecHitSoADevice_h diff --git a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitSoAHost.h b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitSoAHost.h new file mode 100644 index 0000000000000..f5acf376d1fdd --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitSoAHost.h @@ -0,0 +1,43 @@ +#ifndef DataFormats_TrackingRecHitSoA_interface_TrackingRecHitsHost_h +#define DataFormats_TrackingRecHitSoA_interface_TrackingRecHitsHost_h + +#include + +#include + +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +template +class TrackingRecHitHost : public PortableHostCollection> { +public: + using hitSoA = TrackingRecHitAlpakaSoA; + //Need to decorate the class with the inherited portable accessors being now a template + using PortableHostCollection>::view; + using PortableHostCollection>::const_view; + using PortableHostCollection>::buffer; + + TrackingRecHitHost() = default; + + template + explicit TrackingRecHitHost(uint32_t nHits, TQueue queue) + : PortableHostCollection>(nHits, queue) {} + + // Constructor which specifies the SoA size + template + explicit TrackingRecHitHost(uint32_t nHits, int32_t offsetBPIX2, uint32_t const* hitsModuleStart, TQueue queue) + : PortableHostCollection>(nHits, queue) { + std::copy(hitsModuleStart, hitsModuleStart + TrackerTraits::numberOfModules + 1, view().hitsModuleStart().data()); + view().offsetBPIX2() = offsetBPIX2; + } + + uint32_t nHits() const { return view().metadata().size(); } + uint32_t const* hitsModuleStart() const { return view().hitsModuleStart().data(); } +}; + +using TrackingRecHitHostPhase1 = TrackingRecHitHost; +using TrackingRecHitHostPhase2 = TrackingRecHitHost; +using TrackingRecHitHostHIonPhase1 = TrackingRecHitHost; + +#endif // DataFormats_TrackingRecHitSoA_interface_TrackingRecHitsHost_h diff --git a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h new file mode 100644 index 0000000000000..2458ef10677a4 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h @@ -0,0 +1,62 @@ +#ifndef DataFormats_RecHits_TrackingRecHitsLayout_h +#define DataFormats_RecHits_TrackingRecHitsLayout_h + +#include + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" + +template +struct TrackingRecHitAlpakaSoA { + using hindex_type = typename TrackerTraits::hindex_type; + using PhiBinner = cms::alpakatools::HistoContainer; //28 for phase2 geometry + using PhiBinnerStorageType = typename PhiBinner::index_type; + using AverageGeometry = pixelTopology::AverageGeometryT; + using HitLayerStartArray = std::array; + using HitModuleStartArray = std::array; + + //Is it better to have two split? + GENERATE_SOA_LAYOUT(Layout, + SOA_COLUMN(float, xLocal), + SOA_COLUMN(float, yLocal), + SOA_COLUMN(float, xerrLocal), + SOA_COLUMN(float, yerrLocal), + SOA_COLUMN(float, xGlobal), + SOA_COLUMN(float, yGlobal), + SOA_COLUMN(float, zGlobal), + SOA_COLUMN(float, rGlobal), + SOA_COLUMN(int16_t, iphi), + SOA_COLUMN(SiPixelHitStatusAndCharge, chargeAndStatus), + SOA_COLUMN(int16_t, clusterSizeX), + SOA_COLUMN(int16_t, clusterSizeY), + SOA_COLUMN(uint16_t, detectorIndex), + SOA_SCALAR(int32_t, offsetBPIX2), + //These above could be separated in a specific + //layout since they don't depends on the template + //for the moment I'm keeping them here + SOA_COLUMN(PhiBinnerStorageType, phiBinnerStorage), + SOA_SCALAR(HitModuleStartArray, hitsModuleStart), + SOA_SCALAR(HitLayerStartArray, hitsLayerStart), + SOA_SCALAR(AverageGeometry, averageGeometry), + SOA_SCALAR(PhiBinner, phiBinner)); +}; + +template +using TrackingRecHitAlpakaLayout = + typename TrackingRecHitAlpakaSoA::template Layout<>; +template +using TrackingRecHitAlpakaSoAView = + typename TrackingRecHitAlpakaSoA::template Layout<>::View; +template +using TrackingRecHitAlpakaSoAConstView = + typename TrackingRecHitAlpakaSoA::template Layout<>::ConstView; + +#endif diff --git a/DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitSoACollection.h b/DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitSoACollection.h new file mode 100644 index 0000000000000..0a63841ad5c13 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitSoACollection.h @@ -0,0 +1,40 @@ +#ifndef DataFormats_RecHits_interface_alpaka_TrackingRecHitSoACollection_h +#define DataFormats_RecHits_interface_alpaka_TrackingRecHitSoACollection_h + +#include +#include +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitSoAHost.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitSoADevice.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { +#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + template + using TrackingRecHitAlpakaCollection = TrackingRecHitHost; +#else + template + using TrackingRecHitAlpakaCollection = TrackingRecHitDevice; +#endif + //Classes definition for Phase1/Phase2, to make the classes_def lighter. Not actually used in the code. + using TrackingRecHitAlpakaSoAPhase1 = TrackingRecHitAlpakaCollection; + using TrackingRecHitAlpakaSoAPhase2 = TrackingRecHitAlpakaCollection; + using TrackingRecHitAlpakaSoAHIonPhase1 = TrackingRecHitAlpakaCollection; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace cms::alpakatools { + template + struct CopyToHost> { + template + static auto copyAsync(TQueue& queue, + ALPAKA_ACCELERATOR_NAMESPACE::TrackingRecHitAlpakaCollection const& deviceData) { + TrackingRecHitHost hostData(deviceData.view().metadata().size(), queue); + alpaka::memcpy(queue, hostData.buffer(), deviceData.buffer()); + return hostData; + } + }; +} // namespace cms::alpakatools + +#endif // DataFormats_RecHits_interface_alpaka_TrackingRecHitSoACollection_h \ No newline at end of file diff --git a/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda.h b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda.h new file mode 100644 index 0000000000000..5626fb582225e --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda.h @@ -0,0 +1,13 @@ +#ifndef DataFormats_TrackingRecHitSoA_alpaka_alasses_cuda_h +#define DataFormats_TrackingRecHitSoA_alpaka_alasses_cuda_h + +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitSoACollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitSoADevice.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +using namespace pixelTopology; + +#endif // DataFormats_TrackingRecHitSoA_alpaka_alasses_cuda_h \ No newline at end of file diff --git a/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda_def.xml b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda_def.xml new file mode 100644 index 0000000000000..ae76e081f4cb0 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda_def.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm.h b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm.h new file mode 100644 index 0000000000000..3c772b2f1eba0 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm.h @@ -0,0 +1,10 @@ +#ifndef DataFormats_TrackingRecHitSoA_Alpaka_Classes_rocm_h +#define DataFormats_TrackingRecHitSoA_Alpaka_Classes_rocm_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitSoACollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitSoADevice.h" + +#endif // DataFormats_Track_src_alpaka_classes_rocm_h \ No newline at end of file diff --git a/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm_def.xml b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm_def.xml new file mode 100644 index 0000000000000..afa188cca6c2c --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm_def.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/DataFormats/TrackingRecHitSoA/src/classes.h b/DataFormats/TrackingRecHitSoA/src/classes.h new file mode 100644 index 0000000000000..dcc412e32effc --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/src/classes.h @@ -0,0 +1,11 @@ +#ifndef DataFormats_TrackingRecHitSoA_src_classes_h +#define DataFormats_TrackingRecHitSoA_src_classes_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitSoAHost.h" + +using namespace pixelTopology; + +#endif // DataFormats_TrackingRecHitSoA_src_classes_h \ No newline at end of file diff --git a/DataFormats/TrackingRecHitSoA/src/classes_def.xml b/DataFormats/TrackingRecHitSoA/src/classes_def.xml new file mode 100644 index 0000000000000..78d14230a1062 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/src/classes_def.xml @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + + + >::ROOTReadStreamer(newObj, onfile.layout_); + ]]> + + + + + + + >::ROOTReadStreamer(newObj, onfile.layout_); + ]]> + + + + + + + >::ROOTReadStreamer(newObj, onfile.layout_); + ]]> + + + + + diff --git a/DataFormats/TrackingRecHitSoA/test/BuildFile.xml b/DataFormats/TrackingRecHitSoA/test/BuildFile.xml new file mode 100644 index 0000000000000..5b61a3460fb7d --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/test/BuildFile.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc new file mode 100644 index 0000000000000..88942a516f7ce --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc @@ -0,0 +1,47 @@ +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitSoAHost.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitSoADevice.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitSoACollection.h" + +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +#include +#include + +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace testTrackingRecHitSoA { + + template + void runKernels(TrackingRecHitAlpakaSoAView& hits, Queue& queue); + + } +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +int main() { + const auto host = cms::alpakatools::host(); + const auto device = cms::alpakatools::devices()[0]; + Queue queue(device); + + // inner scope to deallocate memory before destroying the queue + { + uint32_t nHits = 2000; + int32_t offset = 100; + uint32_t moduleStart[pixelTopology::Phase1::numberOfModules + 1]; + + for (size_t i = 0; i < pixelTopology::Phase1::numberOfModules + 1; i++) { + moduleStart[i] = i * 2; + } + TrackingRecHitAlpakaCollection tkhit(nHits, offset, &moduleStart[0], queue); + + testTrackingRecHitSoA::runKernels(tkhit.view(), queue); + alpaka::wait(queue); + } + return 0; +} diff --git a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc new file mode 100644 index 0000000000000..2ef69aa101a09 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc @@ -0,0 +1,67 @@ +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitSoADevice.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitSoACollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" + +using namespace alpaka; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using namespace cms::alpakatools; + namespace testTrackingRecHitSoA { + + template + class TestFillKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, TrackingRecHitAlpakaSoAView soa) const { + const uint32_t i(alpaka::getIdx(acc)[0u]); + const uint32_t j(alpaka::getIdx(acc)[0u]); + + if (i == 0 and j == 0) { + soa.offsetBPIX2() = 22; + soa[10].xLocal() = 1.11; + } + + soa[i].iphi() = i % 10; + soa.hitsLayerStart()[j] = j; + } + }; + + template + class ShowKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, TrackingRecHitAlpakaSoAConstView soa) const { + const uint32_t i(alpaka::getIdx(acc)[0u]); + const uint32_t j(alpaka::getIdx(acc)[0u]); + + if (i == 0 and j == 0) { + printf("nbins = %d \n", soa.phiBinner().nbins()); + printf("offsetBPIX %d ->%d \n", i, soa.offsetBPIX2()); + printf("nHits %d ->%d \n", i, soa.metadata().size()); + //printf("hitsModuleStart %d ->%d \n", i, soa.hitsModuleStart().at(28)); + } + + if (i < 10) // can be increased to soa.nHits() for debugging + printf("iPhi %d ->%d \n", i, soa[i].iphi()); + } + }; + + template + void runKernels(TrackingRecHitAlpakaSoAView& view, Queue& queue) { + uint32_t items = 64; + uint32_t groups = divide_up_by(view.metadata().size(), items); + auto workDiv = make_workdiv(groups, items); + alpaka::exec(queue, workDiv, TestFillKernel{}, view); + alpaka::exec(queue, workDiv, ShowKernel{}, view); + } + + template void runKernels(TrackingRecHitAlpakaSoAView& view, + Queue& queue); + template void runKernels(TrackingRecHitAlpakaSoAView& view, + Queue& queue); + + } // namespace testTrackingRecHitSoA +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/DataFormats/Vertex/BuildFile.xml b/DataFormats/Vertex/BuildFile.xml new file mode 100644 index 0000000000000..b0a2f14d16326 --- /dev/null +++ b/DataFormats/Vertex/BuildFile.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/DataFormats/Vertex/README.md b/DataFormats/Vertex/README.md new file mode 100644 index 0000000000000..3e495d15f776e --- /dev/null +++ b/DataFormats/Vertex/README.md @@ -0,0 +1,45 @@ +# Vertex CUDA Data Formats + +`CUDADataFormat`s meant to be used on Host (CPU) or Device (CUDA GPU) for +storing information about vertices created during the Pixel-local Reconstruction +chain. It stores data in an SoA manner. It contains the data that was previously +contained in the deprecated `ZVertexSoA` class. + +The host format is inheriting from `CUDADataFormats/Common/interface/PortableHostCollection.h`, +while the device format is inheriting from `CUDADataFormats/Common/interface/PortableDeviceCollection.h` + +Both formats use the same SoA Layout (`ZVertexSoAHeterogeneousLayout`) which is generated +via the `GENERATE_SOA_LAYOUT` macro in the `ZVertexUtilities.h` file. + +## Notes + +- Initially, `ZVertexSoA` had distinct array sizes for each attribute (e.g. `zv` was `MAXVTX` elements +long, `ndof` was `MAXTRACKS` elements long). All columns are now of uniform `MAXTRACKS` size, +meaning that there will be some wasted space (appx. 190kB). +- Host and Device classes should **not** be created via inheritance, as they're done here, +but via composition. See [this discussion](https://github.com/cms-sw/cmssw/pull/40465#discussion_r1066039309). + +## ZVertexHeterogeneousHost + +The version of the data format to be used for storing vertex data on the CPU. +Instances of this class are to be used for: + +- Having a place to copy data to host from device, via `cudaMemcpy`, or +- Running host-side algorithms using data stored in an SoA manner. + +## ZVertexHeterogeneousDevice + +The version of the data format to be used for storing vertex data on the GPU. + +Instances of `ZVertexHeterogeneousDevice` are to be created on host and be +used on device only. To do so, the instance's `view()` method is to be called +to pass a `View` to any kernel launched. Accessing data from the `view()` is not +possible on the host side. + +## Utilities + +Apart from `ZVertexSoAHeterogeneousLayout`, `ZVertexUtilities.h` also contains +a collection of methods which were originally +defined as class methods inside the `ZVertexSoA` class +which have been adapted to operate on `View` instances, so that they are callable +from within `__global__` kernels, on both CPU and CPU. diff --git a/DataFormats/Vertex/interface/ZVertexDefinitions.h b/DataFormats/Vertex/interface/ZVertexDefinitions.h new file mode 100644 index 0000000000000..12842109bedb2 --- /dev/null +++ b/DataFormats/Vertex/interface/ZVertexDefinitions.h @@ -0,0 +1,11 @@ +#ifndef DataFormats_Vertex_ZVertexDefinitions_h +#define DataFormats_Vertex_ZVertexDefinitions_h + +namespace zVertex { + + constexpr uint32_t MAXTRACKS = 32 * 1024; + constexpr uint32_t MAXVTX = 1024; + +} // namespace zVertex + +#endif diff --git a/DataFormats/Vertex/interface/ZVertexLayout.h b/DataFormats/Vertex/interface/ZVertexLayout.h new file mode 100644 index 0000000000000..5c8fc6265b0eb --- /dev/null +++ b/DataFormats/Vertex/interface/ZVertexLayout.h @@ -0,0 +1,27 @@ +#ifndef DataFormats_Vertex_ZVertexLayout_h +#define DataFormats_Vertex_ZVertexLayout_h + +#include +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +GENERATE_SOA_LAYOUT(ZVertexSoAHeterogeneousLayout, + SOA_COLUMN(int16_t, idv), + SOA_COLUMN(float, zv), + SOA_COLUMN(float, wv), + SOA_COLUMN(float, chi2), + SOA_COLUMN(float, ptv2), + SOA_COLUMN(int32_t, ndof), + SOA_COLUMN(uint16_t, sortInd), + SOA_SCALAR(uint32_t, nvFinal)) + +// Previous ZVertexSoA class methods. +// They operate on View and ConstView of the ZVertexSoA. +namespace zVertex { + // Common types for both Host and Device code + using ZVertexSoALayout = ZVertexSoAHeterogeneousLayout<>; + using ZVertexSoAView = ZVertexSoAHeterogeneousLayout<>::View; + using ZVertexSoAConstView = ZVertexSoAHeterogeneousLayout<>::ConstView; + +} // namespace zVertex + +#endif diff --git a/DataFormats/Vertex/interface/ZVertexSoADevice.h b/DataFormats/Vertex/interface/ZVertexSoADevice.h new file mode 100644 index 0000000000000..600f0edc7ca42 --- /dev/null +++ b/DataFormats/Vertex/interface/ZVertexSoADevice.h @@ -0,0 +1,29 @@ +#ifndef DataFormats_Vertex_interface_ZVertexSoADevice_h +#define DataFormats_Vertex_interface_ZVertexSoADevice_h + +#include + +#include +#include "DataFormats/Vertex/interface/ZVertexLayout.h" +#include "DataFormats/Vertex/interface/ZVertexDefinitions.h" +#include "DataFormats/Vertex/interface/alpaka/ZVertexUtilities.h" +#include "DataFormats/Vertex/interface/ZVertexSoAHost.h" +#include "DataFormats/Portable/interface/PortableDeviceCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" + +template +class ZVertexSoADevice : public PortableDeviceCollection, TDev> { +public: + ZVertexSoADevice() = default; // necessary for ROOT dictionaries + + // Constructor which specifies the SoA size + template + explicit ZVertexSoADevice(TQueue queue) : PortableDeviceCollection, TDev>(S, queue) {} +}; + +using namespace ::zVertex; +template +using ZVertexDevice = ZVertexSoADevice; + +#endif // DataFormats_Vertex_interface_ZVertexSoADevice_h diff --git a/DataFormats/Vertex/interface/ZVertexSoAHost.h b/DataFormats/Vertex/interface/ZVertexSoAHost.h new file mode 100644 index 0000000000000..49c863a0da4fb --- /dev/null +++ b/DataFormats/Vertex/interface/ZVertexSoAHost.h @@ -0,0 +1,29 @@ +#ifndef DataFormats_Vertex_ZVertexSoAHost_H +#define DataFormats_Vertex_ZVertexSoAHost_H + +#include + +#include +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "DataFormats/Vertex/interface/ZVertexLayout.h" +#include "DataFormats/Vertex/interface/ZVertexDefinitions.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +template +class ZVertexSoAHost : public PortableHostCollection> { +public: + ZVertexSoAHost() = default; + + // Constructor which specifies the queue + template + explicit ZVertexSoAHost(TQueue queue) : PortableHostCollection>(S, queue) {} + + // Constructor which specifies the DevHost + explicit ZVertexSoAHost(alpaka_common::DevHost const& host) + : PortableHostCollection>(S, host) {} +}; + +using namespace ::zVertex; +using ZVertexHost = ZVertexSoAHost; + +#endif // DataFormats_Vertex_ZVertexSoAHost_H diff --git a/DataFormats/Vertex/interface/alpaka/ZVertexSoACollection.h b/DataFormats/Vertex/interface/alpaka/ZVertexSoACollection.h new file mode 100644 index 0000000000000..76e09e5e75e8f --- /dev/null +++ b/DataFormats/Vertex/interface/alpaka/ZVertexSoACollection.h @@ -0,0 +1,36 @@ +#ifndef DataFormats_Vertex_interface_ZVertexSoACollection_h +#define DataFormats_Vertex_interface_ZVertexSoACollection_h + +#include + +#include +#include "DataFormats/Vertex/interface/ZVertexLayout.h" +#include "DataFormats/Vertex/interface/ZVertexDefinitions.h" +#include "DataFormats/Vertex/interface/alpaka/ZVertexUtilities.h" +#include "DataFormats/Vertex/interface/ZVertexSoAHost.h" +#include "DataFormats/Vertex/interface/ZVertexSoADevice.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { +#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + using ZVertexCollection = ZVertexHost; +#else + using ZVertexCollection = ZVertexDevice; +#endif +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace cms::alpakatools { + template <> + struct CopyToHost { + template + static auto copyAsync(TQueue& queue, ALPAKA_ACCELERATOR_NAMESPACE::ZVertexCollection const& deviceData) { + ZVertexHost hostData(queue); + alpaka::memcpy(queue, hostData.buffer(), deviceData.buffer()); + return hostData; + } + }; +} // namespace cms::alpakatools + +#endif // DataFormats_Vertex_interface_ZVertexSoACollection_h diff --git a/DataFormats/Vertex/interface/alpaka/ZVertexUtilities.h b/DataFormats/Vertex/interface/alpaka/ZVertexUtilities.h new file mode 100644 index 0000000000000..0b8e11e1d929e --- /dev/null +++ b/DataFormats/Vertex/interface/alpaka/ZVertexUtilities.h @@ -0,0 +1,20 @@ +#ifndef DataFormats_Vertex_ZVertexUtilities_h +#define DataFormats_Vertex_ZVertexUtilities_h + +#include "DataFormats/Vertex/interface/ZVertexLayout.h" +#include "DataFormats/Vertex/interface/ZVertexDefinitions.h" + +// Previous ZVertexSoA class methods. +// They operate on View and ConstView of the ZVertexSoA. +namespace zVertex { + namespace utilities { + using ZVertexSoALayout = ZVertexSoAHeterogeneousLayout<>; + using ZVertexSoAView = ZVertexSoAHeterogeneousLayout<>::View; + using ZVertexSoAConstView = ZVertexSoAHeterogeneousLayout<>::ConstView; + + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void init(ZVertexSoAView &vertices) { vertices.nvFinal() = 0; } + + } // namespace utilities +} // namespace zVertex + +#endif diff --git a/DataFormats/Vertex/src/alpaka/classes_cuda.h b/DataFormats/Vertex/src/alpaka/classes_cuda.h new file mode 100644 index 0000000000000..33ae44d696b8f --- /dev/null +++ b/DataFormats/Vertex/src/alpaka/classes_cuda.h @@ -0,0 +1,10 @@ +#ifndef DataFormats_Vertex_src_alpaka_classes_cuda_h +#define DataFormats_Vertex_src_alpaka_classes_cuda_h + +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/Vertex/interface/ZVertexLayout.h" +#include "DataFormats/Vertex/interface//ZVertexSoADevice.h" +#include "DataFormats/Vertex/interface/alpaka/ZVertexSoACollection.h" + +#endif // DataFormats_Vertex_src_alpaka_classes_cuda_h diff --git a/DataFormats/Vertex/src/alpaka/classes_cuda_def.xml b/DataFormats/Vertex/src/alpaka/classes_cuda_def.xml new file mode 100644 index 0000000000000..e38a9c40df99a --- /dev/null +++ b/DataFormats/Vertex/src/alpaka/classes_cuda_def.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/DataFormats/Vertex/src/alpaka/classes_rocm.h b/DataFormats/Vertex/src/alpaka/classes_rocm.h new file mode 100644 index 0000000000000..cd0cf1ca0a1b4 --- /dev/null +++ b/DataFormats/Vertex/src/alpaka/classes_rocm.h @@ -0,0 +1,9 @@ +#ifndef DataFormats_Vertex_src_alpaka_classes_rocm_h +#define DataFormats_Vertex_src_alpaka_classes_rocm_h + +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/Vertex/interface/ZVertexLayout.h" +#include "DataFormats/Vertex/interface//ZVertexSoADevice.h" +#include "DataFormats/Vertex/interface/alpaka/ZVertexSoACollection.h" +#endif // DataFormats_Vertex_src_alpaka_classes_rocm_h diff --git a/DataFormats/Vertex/src/alpaka/classes_rocm_def.xml b/DataFormats/Vertex/src/alpaka/classes_rocm_def.xml new file mode 100644 index 0000000000000..435d4ebad8283 --- /dev/null +++ b/DataFormats/Vertex/src/alpaka/classes_rocm_def.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/DataFormats/Vertex/src/classes.h b/DataFormats/Vertex/src/classes.h new file mode 100644 index 0000000000000..8f295cac21d4e --- /dev/null +++ b/DataFormats/Vertex/src/classes.h @@ -0,0 +1,8 @@ +#ifndef DataFormats_Vertex_src_classes_h +#define DataFormats_Vertex_src_classes_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/Vertex/interface/ZVertexLayout.h" +#include "DataFormats/Vertex/interface/ZVertexSoAHost.h" + +#endif // DataFormats_Vertex_src_classes_h diff --git a/DataFormats/Vertex/src/classes_def.xml b/DataFormats/Vertex/src/classes_def.xml new file mode 100644 index 0000000000000..3e97f973b2e3e --- /dev/null +++ b/DataFormats/Vertex/src/classes_def.xml @@ -0,0 +1,18 @@ + + + + + >::ROOTReadStreamer(newObj, onfile.layout_); + ]]> + + + + + diff --git a/DataFormats/Vertex/test/BuildFile.xml b/DataFormats/Vertex/test/BuildFile.xml new file mode 100644 index 0000000000000..49dee4babd8a1 --- /dev/null +++ b/DataFormats/Vertex/test/BuildFile.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/DataFormats/Vertex/test/alpaka/ZVertexSoA_test.cc b/DataFormats/Vertex/test/alpaka/ZVertexSoA_test.cc new file mode 100644 index 0000000000000..1099fc3fd7ed8 --- /dev/null +++ b/DataFormats/Vertex/test/alpaka/ZVertexSoA_test.cc @@ -0,0 +1,81 @@ +/** + Simple test for the zVertex::ZVertexSoA data structure + which inherits from Portable{Host}Collection. + + Creates an instance of the class (automatically allocates + memory on device), passes the view of the SoA data to + the kernels which: + - Fill the SoA with data. + - Verify that the data written is correct. + + Then, the SoA data are copied back to Host, where + a temporary host-side view (tmp_view) is created using + the same Layout to access the data on host and print it. + */ + +#include +#include +#include "DataFormats/Vertex/interface/alpaka/ZVertexSoACollection.h" +#include "DataFormats/Vertex/interface/ZVertexSoADevice.h" +#include "DataFormats/Vertex/interface/ZVertexSoAHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +using namespace std; +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace testZVertexSoAT { + void runKernels(ZVertexSoAView zvertex_view, Queue& queue); + } +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +int main() { + const auto host = cms::alpakatools::host(); + const auto device = cms::alpakatools::devices()[0]; + Queue queue(device); + + // Inner scope to deallocate memory before destroying the stream + { + // Instantiate vertices on device. PortableCollection allocates + // SoA on device automatically. + ZVertexCollection zvertex_d(queue); + testZVertexSoAT::runKernels(zvertex_d.view(), queue); + + // Instantate vertices on host. This is where the data will be + // copied to from device. + ZVertexHost zvertex_h(queue); + std::cout << zvertex_h.view().metadata().size() << std::endl; + alpaka::memcpy(queue, zvertex_h.buffer(), zvertex_d.const_buffer()); + alpaka::wait(queue); + + // Print results + std::cout << "idv" + << "\t" + << "zv" + << "\t" + << "wv" + << "\t" + << "chi2" + << "\t" + << "ptv2" + << "\t" + << "ndof" + << "\t" + << "sortInd" + << "\t" + << "nvFinal" << std::endl; + + for (int i = 0; i < 10; ++i) { + std::cout << (int)zvertex_h.view()[i].idv() << "\t" << zvertex_h.view()[i].zv() << "\t" + << zvertex_h.view()[i].wv() << "\t" << zvertex_h.view()[i].chi2() << "\t" << zvertex_h.view()[i].ptv2() + << "\t" << (int)zvertex_h.view()[i].ndof() << "\t" << (int)zvertex_h.view()[i].sortInd() << "\t" + << (int)zvertex_h.view().nvFinal() << std::endl; + } + } + + return 0; +} diff --git a/DataFormats/Vertex/test/alpaka/ZVertexSoA_test.dev.cc b/DataFormats/Vertex/test/alpaka/ZVertexSoA_test.dev.cc new file mode 100644 index 0000000000000..cbd6e7f2d5146 --- /dev/null +++ b/DataFormats/Vertex/test/alpaka/ZVertexSoA_test.dev.cc @@ -0,0 +1,64 @@ +#include "DataFormats/Vertex/interface/alpaka/ZVertexSoACollection.h" +#include "DataFormats/Vertex/interface/ZVertexSoADevice.h" +#include "DataFormats/Vertex/interface/ZVertexSoAHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" // Check if this is really needed; code doesn't compile without it + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + using namespace alpaka; + using namespace cms::alpakatools; + + namespace testZVertexSoAT { + + class TestFillKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, zVertex::ZVertexSoAView zvertex_view) const { + const int32_t i = alpaka::getIdx(acc)[0u]; + if (i == 0) { + zvertex_view.nvFinal() = 420; + } + + for (int32_t j : elements_with_stride(acc, zvertex_view.metadata().size())) { + zvertex_view[j].idv() = (int16_t)j; + zvertex_view[j].zv() = (float)j; + zvertex_view[j].wv() = (float)j; + zvertex_view[j].chi2() = (float)j; + zvertex_view[j].ptv2() = (float)j; + zvertex_view[j].ndof() = (int32_t)j; + zvertex_view[j].sortInd() = (uint16_t)j; + } + } + }; + + class TestVerifyKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, zVertex::ZVertexSoAView zvertex_view) const { + const int32_t i = alpaka::getIdx(acc)[0u]; + if (i == 0) { + ALPAKA_ASSERT_OFFLOAD(zvertex_view.nvFinal() == 420); + } + + for (int32_t j : elements_with_stride(acc, zvertex_view.metadata().size())) { + assert(zvertex_view[j].idv() == j); + assert(zvertex_view[j].zv() - (float)j < 0.0001); + assert(zvertex_view[j].wv() - (float)j < 0.0001); + assert(zvertex_view[j].chi2() - (float)j < 0.0001); + assert(zvertex_view[j].ptv2() - (float)j < 0.0001); + assert(zvertex_view[j].ndof() == j); + assert(zvertex_view[j].sortInd() == j); + } + } + }; + + void runKernels(zVertex::ZVertexSoAView zvertex_view, Queue& queue) { + uint32_t items = 64; + uint32_t groups = divide_up_by(zvertex_view.metadata().size(), items); + auto workDiv = make_workdiv(groups, items); + alpaka::exec(queue, workDiv, TestFillKernel{}, zvertex_view); + //alpaka::exec(queue, workDiv, TestVerifyKernel{}, zvertex_view); + } + + } // namespace testZVertexSoAT + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoAAlpaka.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoAAlpaka.cc new file mode 100644 index 0000000000000..ace90593e55ba --- /dev/null +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoAAlpaka.cc @@ -0,0 +1,132 @@ +#include + +#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" +#include "DataFormats/Common/interface/DetSetVector.h" +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/DetId/interface/DetIdCollection.h" +#include "DataFormats/FEDRawData/interface/FEDNumbering.h" +#include "DataFormats/SiPixelDetId/interface/PixelFEDChannel.h" +#include "DataFormats/SiPixelDigi/interface/PixelDigi.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" +#include "EventFilter/SiPixelRawToDigi/interface/PixelDataFormatter.h" +#include "FWCore/Framework/interface/ESTransientHandle.h" +#include "FWCore/Framework/interface/ESWatcher.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h" + +class SiPixelDigiErrorsFromSoAAlpaka : public edm::stream::EDProducer<> { +public: + explicit SiPixelDigiErrorsFromSoAAlpaka(const edm::ParameterSet& iConfig); + ~SiPixelDigiErrorsFromSoAAlpaka() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; + + const edm::ESGetToken cablingToken_; + const edm::EDGetTokenT digiErrorsSoAGetToken_; + const edm::EDGetTokenT fmtErrorsGetToken_; + const edm::EDPutTokenT> errorPutToken_; + const edm::EDPutTokenT tkErrorPutToken_; + const edm::EDPutTokenT userErrorPutToken_; + const edm::EDPutTokenT> disabledChannelPutToken_; + + edm::ESWatcher cablingWatcher_; + std::unique_ptr cabling_; + + const std::vector tkerrorlist_; + const std::vector usererrorlist_; + + const bool usePhase1_; +}; + +SiPixelDigiErrorsFromSoAAlpaka::SiPixelDigiErrorsFromSoAAlpaka(const edm::ParameterSet& iConfig) + : cablingToken_(esConsumes(edm::ESInputTag("", iConfig.getParameter("CablingMapLabel")))), + digiErrorsSoAGetToken_{consumes(iConfig.getParameter("digiErrorSoASrc"))}, + fmtErrorsGetToken_{consumes(iConfig.getParameter("fmtErrorsSoASrc"))}, + errorPutToken_{produces>()}, + tkErrorPutToken_{produces()}, + userErrorPutToken_{produces("UserErrorModules")}, + disabledChannelPutToken_{produces>()}, + tkerrorlist_(iConfig.getParameter>("ErrorList")), + usererrorlist_(iConfig.getParameter>("UserErrorList")), + usePhase1_(iConfig.getParameter("UsePhase1")) {} + +void SiPixelDigiErrorsFromSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("digiErrorSoASrc", edm::InputTag("siPixelDigis")); + desc.add("fmtErrorsSoASrc", edm::InputTag("siPixelDigis")); + // the configuration parameters here are named following those in SiPixelRawToDigi + desc.add("CablingMapLabel", "")->setComment("CablingMap label"); + desc.add("UsePhase1", false)->setComment("## Use phase1"); + desc.add>("ErrorList", std::vector{29}) + ->setComment("## ErrorList: list of error codes used by tracking to invalidate modules"); + desc.add>("UserErrorList", std::vector{40}) + ->setComment("## UserErrorList: list of error codes used by Pixel experts for investigation"); + descriptions.addWithDefaultLabel(desc); +} + +void SiPixelDigiErrorsFromSoAAlpaka::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { + // pack errors into collection + + // initialize cabling map or update if necessary + if (cablingWatcher_.check(iSetup)) { + // cabling map, which maps online address (fed->link->ROC->local pixel) to offline (DetId->global pixel) + const SiPixelFedCablingMap* cablingMap = &iSetup.getData(cablingToken_); + cabling_ = cablingMap->cablingTree(); + LogDebug("map version:") << cabling_->version(); + } + + const auto& digiErrors = iEvent.get(digiErrorsSoAGetToken_); + const auto& formatterErrors = iEvent.get(fmtErrorsGetToken_); + + edm::DetSetVector errorcollection{}; + DetIdCollection tkerror_detidcollection{}; + DetIdCollection usererror_detidcollection{}; + edmNew::DetSetVector disabled_channelcollection{}; + + PixelDataFormatter formatter(cabling_.get(), usePhase1_); // for phase 1 & 0 + // assert(formatterErrors != nullptr); // TODO: check what is happening here + auto errors = formatterErrors; // make a copy + PixelDataFormatter::DetErrors nodeterrors; + + // if (digiErrors.view().size() > 0) { // TODO: need to know if this size will be useful or not and how to use it + uint32_t size = digiErrors.view().metadata().size(); + for (auto i = 0U; i < size; i++) { + SiPixelErrorCompact err = digiErrors.view()[i].pixelErrors(); + if (err.errorType != 0) { + SiPixelRawDataError error(err.word, err.errorType, err.fedId + FEDNumbering::MINSiPixeluTCAFEDID); + errors[err.rawId].push_back(error); + } + } + // } + + formatter.unpackFEDErrors(errors, + tkerrorlist_, + usererrorlist_, + errorcollection, + tkerror_detidcollection, + usererror_detidcollection, + disabled_channelcollection, + nodeterrors); + + const uint32_t dummydetid = 0xffffffff; + edm::DetSet& errorDetSet = errorcollection.find_or_insert(dummydetid); + errorDetSet.data = nodeterrors; + + iEvent.emplace(errorPutToken_, std::move(errorcollection)); + iEvent.emplace(tkErrorPutToken_, std::move(tkerror_detidcollection)); + iEvent.emplace(userErrorPutToken_, std::move(usererror_detidcollection)); + iEvent.emplace(disabledChannelPutToken_, std::move(disabled_channelcollection)); +} + +DEFINE_FWK_MODULE(SiPixelDigiErrorsFromSoAAlpaka); diff --git a/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py b/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py index b5484afd2fafa..f5139f1cb418b 100644 --- a/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py +++ b/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py @@ -23,6 +23,12 @@ from EventFilter.SiPixelRawToDigi.siPixelDigiErrorsFromSoA_cfi import siPixelDigiErrorsFromSoA as _siPixelDigiErrorsFromSoA siPixelDigiErrors = _siPixelDigiErrorsFromSoA.clone() +# Alpaka modifier +from Configuration.ProcessModifiers.alpaka_cff import alpaka +from EventFilter.SiPixelRawToDigi.siPixelDigiErrorsFromSoAAlpaka_cfi import siPixelDigiErrorsFromSoAAlpaka as _siPixelDigiErrorsFromSoAAlpaka + +alpaka.toReplaceWith(siPixelDigiErrors, _siPixelDigiErrorsFromSoAAlpaka.clone()) + # use the Phase 1 settings from Configuration.Eras.Modifier_phase1Pixel_cff import phase1Pixel phase1Pixel.toModify(siPixelDigiErrors, diff --git a/HLTrigger/Configuration/python/customizeHLTforPatatrack.py b/HLTrigger/Configuration/python/customizeHLTforPatatrack.py new file mode 100644 index 0000000000000..55aa3479d0375 --- /dev/null +++ b/HLTrigger/Configuration/python/customizeHLTforPatatrack.py @@ -0,0 +1,487 @@ +import FWCore.ParameterSet.Config as cms + +def customizeHLTforDQMGPUvsCPUPixel(process): + '''Ad-hoc changes to test HLT config containing only DQM_PixelReconstruction_v and DQMGPUvsCPU stream + only up to the Pixel Local Reconstruction + ''' + dqmPixelRecoPathName = None + for pathName in process.paths_(): + if pathName.startswith('DQM_PixelReconstruction_v'): + dqmPixelRecoPathName = pathName + break + + if dqmPixelRecoPathName == None: + return process + + process.hltPixelConsumerGPU.eventProducts = [ + 'hltSiPixelClusters', + 'hltSiPixelClustersLegacyFormat', + 'hltSiPixelDigiErrorsLegacyFormat', + 'hltSiPixelRecHits', + 'hltSiPixelRecHitsLegacyFormat', + 'hltPixelTracks', + 'hltPixelTracksLegacyFormat', + 'hltPixelVertices', + 'hltPixelVerticesLegacyFormat', + ] + + process.hltPixelConsumerCPU.eventProducts = [] + for foo in process.hltPixelConsumerGPU.eventProducts: + process.hltPixelConsumerCPU.eventProducts += [foo+'CPUSerial'] + + # modify EventContent of DQMGPUvsCPU stream + if hasattr(process, 'hltOutputDQMGPUvsCPU'): + process.hltOutputDQMGPUvsCPU.outputCommands = [ + 'drop *', + 'keep *Cluster*_hltSiPixelClustersLegacyFormat_*_*', + 'keep *Cluster*_hltSiPixelClustersLegacyFormatCPUSerial_*_*', + 'keep *_hltSiPixelDigiErrorsLegacyFormat_*_*', + 'keep *_hltSiPixelDigiErrorsLegacyFormatCPUSerial_*_*', + 'keep *RecHit*_hltSiPixelRecHitsLegacyFormat_*_*', + 'keep *RecHit*_hltSiPixelRecHitsLegacyFormatCPUSerial_*_*', + 'keep *_hltPixelTracksLegacyFormat_*_*', + 'keep *_hltPixelTracksLegacyFormatCPUSerial_*_*', + 'keep *_hltPixelVerticesLegacyFormat_*_*', + 'keep *_hltPixelVerticesLegacyFormatCPUSerial_*_*', + ] + + # PixelRecHits: monitor of CPUSerial product (Alpaka backend: 'serial_sync') + process.hltSiPixelRecHitsSoAMonitorCPU = cms.EDProducer('SiPixelPhase1MonitorRecHitsSoAAlpaka', + pixelHitsSrc = cms.InputTag( 'hltSiPixelRecHitsCPUSerial' ), + TopFolderName = cms.string( 'SiPixelHeterogeneous/PixelRecHitsCPU' ) + ) + + # PixelRecHits: monitor of GPU product (Alpaka backend: '') + process.hltSiPixelRecHitsSoAMonitorGPU = cms.EDProducer('SiPixelPhase1MonitorRecHitsSoAAlpaka', + pixelHitsSrc = cms.InputTag( 'hltSiPixelRecHits' ), + TopFolderName = cms.string( 'SiPixelHeterogeneous/PixelRecHitsGPU' ) + ) + + # PixelRecHits: 'GPUvsCPU' comparisons + process.hltSiPixelRecHitsSoACompareGPUvsCPU = cms.EDProducer('SiPixelPhase1CompareRecHitsSoAAlpaka', + pixelHitsSrcCPU = cms.InputTag( 'hltSiPixelRecHitsCPUSerial' ), + pixelHitsSrcGPU = cms.InputTag( 'hltSiPixelRecHits' ), + topFolderName = cms.string( 'SiPixelHeterogeneous/PixelRecHitsCompareGPUvsCPU' ), + minD2cut = cms.double( 1.0E-4 ) + ) + + process.hltSiPixelTrackSoAMonitorCPU = cms.EDProducer("SiPixelPhase1MonitorTrackSoAAlpaka", + mightGet = cms.optional.untracked.vstring, + minQuality = cms.string('loose'), + pixelTrackSrc = cms.InputTag('hltPixelTracksCPUSerial'), + topFolderName = cms.string('SiPixelHeterogeneous/PixelTrackCPU'), + useQualityCut = cms.bool(True) + ) + + process.hltSiPixelTrackSoAMonitorGPU = cms.EDProducer("SiPixelPhase1MonitorTrackSoAAlpaka", + mightGet = cms.optional.untracked.vstring, + minQuality = cms.string('loose'), + pixelTrackSrc = cms.InputTag('hltPixelTracks'), + topFolderName = cms.string('SiPixelHeterogeneous/PixelTrackGPU'), + useQualityCut = cms.bool(True) + ) + + process.hltSiPixelTrackSoACompareGPUvsCPU = cms.EDProducer("SiPixelPhase1CompareTrackSoAAlpaka", + deltaR2cut = cms.double(0.04), + mightGet = cms.optional.untracked.vstring, + minQuality = cms.string('loose'), + pixelTrackSrcCPU = cms.InputTag("hltPixelTracksCPUSerial"), + pixelTrackSrcGPU = cms.InputTag("hltPixelTracks"), + topFolderName = cms.string('SiPixelHeterogeneous/PixelTrackCompareGPUvsCPU'), + useQualityCut = cms.bool(True) + ) + + process.hltSiPixelVertexSoAMonitorCPU = cms.EDProducer("SiPixelMonitorVertexSoAAlpaka", + beamSpotSrc = cms.InputTag("hltOnlineBeamSpot"), + mightGet = cms.optional.untracked.vstring, + pixelVertexSrc = cms.InputTag("hltPixelVerticesCPUSerial"), + topFolderName = cms.string('SiPixelHeterogeneous/PixelVertexCPU') + ) + + process.hltSiPixelVertexSoAMonitorGPU = cms.EDProducer("SiPixelMonitorVertexSoAAlpaka", + beamSpotSrc = cms.InputTag("hltOnlineBeamSpot"), + mightGet = cms.optional.untracked.vstring, + pixelVertexSrc = cms.InputTag("hltPixelVertices"), + topFolderName = cms.string('SiPixelHeterogeneous/PixelVertexGPU') + ) + + process.hltSiPixelVertexSoACompareGPUvsCPU = cms.EDProducer("SiPixelCompareVertexSoAAlpaka", + beamSpotSrc = cms.InputTag("hltOnlineBeamSpot"), + dzCut = cms.double(1), + mightGet = cms.optional.untracked.vstring, + pixelVertexSrcCPU = cms.InputTag("hltPixelVerticesCPUSerial"), + pixelVertexSrcGPU = cms.InputTag("hltPixelVertices"), + topFolderName = cms.string('SiPixelHeterogeneous/PixelVertexCompareGPUvsCPU') + ) + + process.HLTDQMPixelReconstruction = cms.Sequence( + process.hltSiPixelRecHitsSoAMonitorCPU + + process.hltSiPixelRecHitsSoAMonitorGPU + + process.hltSiPixelRecHitsSoACompareGPUvsCPU + + process.hltSiPixelTrackSoAMonitorCPU + + process.hltSiPixelTrackSoAMonitorGPU + + process.hltSiPixelTrackSoACompareGPUvsCPU + + process.hltSiPixelVertexSoAMonitorCPU + + process.hltSiPixelVertexSoAMonitorGPU + + process.hltSiPixelVertexSoACompareGPUvsCPU + ) + + # Add CPUSerial sequences to DQM_PixelReconstruction_v Path + dqmPixelRecoPath = getattr(process, dqmPixelRecoPathName) + try: + dqmPixelRecoPathIndex = dqmPixelRecoPath.index(process.HLTRecopixelvertexingSequence) + 1 + for cpuSeqName in [ + 'HLTDoLocalPixelCPUSerialSequence', + 'HLTRecopixelvertexingCPUSerialSequence', + ]: + dqmPixelRecoPath.insert(dqmPixelRecoPathIndex, getattr(process, cpuSeqName)) + dqmPixelRecoPathIndex += 1 + except: + dqmPixelRecoPathIndex = None + + return process + +def customizeHLTforAlpakaPixelRecoLocal(process): + '''Customisation to introduce the Local Pixel Reconstruction in Alpaka + ''' + process.hltESPSiPixelCablingSoA = cms.ESProducer('SiPixelCablingSoAESProducer@alpaka', + ComponentName = cms.string(''), + CablingMapLabel = cms.string(''), + UseQualityInfo = cms.bool(False), + appendToDataLabel = cms.string(''), + alpaka = cms.untracked.PSet( + backend = cms.untracked.string('') + ) + ) + + process.hltESPSiPixelGainCalibrationForHLTSoA = cms.ESProducer('SiPixelGainCalibrationForHLTSoAESProducer@alpaka', + appendToDataLabel = cms.string(''), + alpaka = cms.untracked.PSet( + backend = cms.untracked.string('') + ) + ) + + process.hltESPPixelCPEFastParamsPhase1 = cms.ESProducer('PixelCPEFastParamsESProducerAlpakaPhase1@alpaka', + ComponentName = cms.string('PixelCPEFastParams'), + appendToDataLabel = cms.string(''), + alpaka = cms.untracked.PSet( + backend = cms.untracked.string('') + ) + ) + + ### + + # alpaka EDProducer + # consumes + # - reco::BeamSpot + # produces + # - BeamSpotDeviceProduct + process.hltOnlineBeamSpotDevice = cms.EDProducer('BeamSpotDeviceProducer@alpaka', + src = cms.InputTag('hltOnlineBeamSpot'), + alpaka = cms.untracked.PSet( + backend = cms.untracked.string('') + ) + ) + + # alpaka EDProducer + # consumes + # - FEDRawDataCollection + # produces (* optional) + # - SiPixelClustersSoA + # - SiPixelDigisCollection + # - SiPixelDigiErrorsCollection * + # - SiPixelFormatterErrors * + process.hltSiPixelClusters = cms.EDProducer('SiPixelRawToClusterPhase1@alpaka', + mightGet = cms.optional.untracked.vstring, + IncludeErrors = cms.bool(True), + UseQualityInfo = cms.bool(False), + clusterThreshold_layer1 = cms.int32(4000), + clusterThreshold_otherLayers = cms.int32(4000), + VCaltoElectronGain = cms.double(1), # all gains=1, pedestals=0 + VCaltoElectronGain_L1 = cms.double(1), + VCaltoElectronOffset = cms.double(0), + VCaltoElectronOffset_L1 = cms.double(0), + InputLabel = cms.InputTag('rawDataCollector'), + Regions = cms.PSet( + inputs = cms.optional.VInputTag, + deltaPhi = cms.optional.vdouble, + maxZ = cms.optional.vdouble, + beamSpot = cms.optional.InputTag + ), + CablingMapLabel = cms.string(''), + # autoselect the alpaka backend + alpaka = cms.untracked.PSet( + backend = cms.untracked.string('') + ) + ) + + process.hltSiPixelClustersLegacyFormat = cms.EDProducer('SiPixelDigisClustersFromSoAAlpakaPhase1', + src = cms.InputTag('hltSiPixelClusters'), + clusterThreshold_layer1 = cms.int32(4000), + clusterThreshold_otherLayers = cms.int32(4000), + produceDigis = cms.bool(False), + storeDigis = cms.bool(False) + ) + + process.hltSiPixelClustersCache = cms.EDProducer('SiPixelClusterShapeCacheProducer', + src = cms.InputTag( 'hltSiPixelClustersLegacyFormat' ), + onDemand = cms.bool( False ) + ) + + # legacy EDProducer + # consumes + # - SiPixelDigiErrorsHost + # - SiPixelFormatterErrors + # produces + # - edm::DetSetVector + # - DetIdCollection + # - DetIdCollection, 'UserErrorModules' + # - edmNew::DetSetVector + process.hltSiPixelDigiErrorsLegacyFormat = cms.EDProducer('SiPixelDigiErrorsFromSoA', + digiErrorSoASrc = cms.InputTag('hltSiPixelClusters'), + fmtErrorsSoASrc = cms.InputTag('hltSiPixelClusters'), + CablingMapLabel = cms.string(''), + UsePhase1 = cms.bool(True), + ErrorList = cms.vint32(29), + UserErrorList = cms.vint32(40) + ) + + # alpaka EDProducer + # consumes + # - BeamSpotDeviceProduct + # - SiPixelClustersSoA + # - SiPixelDigisCollection + # produces + # - TrackingRecHitAlpakaCollection + process.hltSiPixelRecHits = cms.EDProducer('SiPixelRecHitAlpakaPhase1@alpaka', + beamSpot = cms.InputTag('hltOnlineBeamSpotDevice'), + src = cms.InputTag('hltSiPixelClusters'), + CPE = cms.string('PixelCPEFastParams'), + mightGet = cms.optional.untracked.vstring, + # autoselect the alpaka backend + alpaka = cms.untracked.PSet( + backend = cms.untracked.string('') + ) + ) + + process.hltSiPixelRecHitsLegacyFormat = cms.EDProducer('SiPixelRecHitFromSoAAlpakaPhase1', + pixelRecHitSrc = cms.InputTag('hltSiPixelRecHits'), + src = cms.InputTag('hltSiPixelClustersLegacyFormat'), + ) + + ### + ### Task: Pixel Local Reconstruction + ### + process.HLTDoLocalPixelTask = cms.ConditionalTask( + process.hltOnlineBeamSpotDevice, + process.hltSiPixelClusters, + process.hltSiPixelClustersLegacyFormat, # was: hltSiPixelClusters + process.hltSiPixelClustersCache, # really needed ?? + process.hltSiPixelDigiErrorsLegacyFormat, # was: hltSiPixelDigis + process.hltSiPixelRecHits, + process.hltSiPixelRecHitsLegacyFormat, # was: hltSiPixelRecHits + ) + + ### + ### CPUSerial version of Pixel Local Reconstruction + ### + process.hltOnlineBeamSpotDeviceCPUSerial = process.hltOnlineBeamSpotDevice.clone( + alpaka = dict( backend = 'serial_sync' ) + ) + + process.hltSiPixelClustersCPUSerial = process.hltSiPixelClusters.clone( + alpaka = dict( backend = 'serial_sync' ) + ) + + process.hltSiPixelClustersLegacyFormatCPUSerial = process.hltSiPixelClustersLegacyFormat.clone( + src = 'hltSiPixelClustersCPUSerial' + ) + + process.hltSiPixelDigiErrorsLegacyFormatCPUSerial = process.hltSiPixelDigiErrorsLegacyFormat.clone( + digiErrorSoASrc = 'hltSiPixelClustersCPUSerial', + fmtErrorsSoASrc = 'hltSiPixelClustersCPUSerial', + ) + + process.hltSiPixelRecHitsCPUSerial = process.hltSiPixelRecHits.clone( + beamSpot = 'hltOnlineBeamSpotDeviceCPUSerial', + src = 'hltSiPixelClustersCPUSerial', + alpaka = dict( backend = 'serial_sync' ) + ) + + process.hltSiPixelRecHitsLegacyFormatCPUSerial = process.hltSiPixelRecHitsLegacyFormat.clone( + pixelRecHitSrc = 'hltSiPixelRecHitsCPUSerial', + src = 'hltSiPixelClustersLegacyFormatCPUSerial', + ) + + process.HLTDoLocalPixelCPUSerialTask = cms.ConditionalTask( + process.hltOnlineBeamSpotDeviceCPUSerial, + process.hltSiPixelClustersCPUSerial, + process.hltSiPixelClustersLegacyFormatCPUSerial, + process.hltSiPixelDigiErrorsLegacyFormatCPUSerial, + process.hltSiPixelRecHitsCPUSerial, + process.hltSiPixelRecHitsLegacyFormatCPUSerial, + ) + + process.HLTDoLocalPixelCPUSerialSequence = cms.Sequence( process.HLTDoLocalPixelCPUSerialTask ) + + return process + +def customizeHLTforAlpakaPixelRecoTracking(process): + '''Customisation to introduce the Pixel-Track Reconstruction in Alpaka + ''' + + # alpaka EDProducer + # consumes + # - TrackingRecHitAlpakaCollection + # produces + # - TkSoADevice + process.hltPixelTracks = cms.EDProducer('CAHitNtupletAlpakaPhase1@alpaka', + pixelRecHitSrc = cms.InputTag('hltSiPixelRecHits'), + CPE = cms.string('PixelCPEFastParams'), + ptmin = cms.double(0.89999997615814209), + CAThetaCutBarrel = cms.double(0.0020000000949949026), + CAThetaCutForward = cms.double(0.0030000000260770321), + hardCurvCut = cms.double(0.032840722495894911), + dcaCutInnerTriplet = cms.double(0.15000000596046448), + dcaCutOuterTriplet = cms.double(0.25), + earlyFishbone = cms.bool(True), + lateFishbone = cms.bool(False), + fillStatistics = cms.bool(False), + minHitsPerNtuplet = cms.uint32(3), + maxNumberOfDoublets = cms.uint32(524288), + minHitsForSharingCut = cms.uint32(10), + fitNas4 = cms.bool(False), + doClusterCut = cms.bool(True), + doZ0Cut = cms.bool(True), + doPtCut = cms.bool(True), + useRiemannFit = cms.bool(False), + doSharedHitCut = cms.bool(True), + dupPassThrough = cms.bool(False), + useSimpleTripletCleaner = cms.bool(True), + idealConditions = cms.bool(False), + includeJumpingForwardDoublets = cms.bool(True), + trackQualityCuts = cms.PSet( + chi2MaxPt = cms.double(10), + chi2Coeff = cms.vdouble(0.9, 1.8), + chi2Scale = cms.double(8), + tripletMinPt = cms.double(0.5), + tripletMaxTip = cms.double(0.3), + tripletMaxZip = cms.double(12), + quadrupletMinPt = cms.double(0.3), + quadrupletMaxTip = cms.double(0.5), + quadrupletMaxZip = cms.double(12) + ), + # autoselect the alpaka backend + alpaka = cms.untracked.PSet( + backend = cms.untracked.string('') + ) + ) + + process.hltPixelTracksCPUSerial = process.hltPixelTracks.clone( + pixelRecHitSrc = 'hltSiPixelRecHitsCPUSerial', + alpaka = dict( backend = 'serial_sync' ) + ) + + process.hltPixelTracksLegacyFormat = cms.EDProducer("PixelTrackProducerFromSoAAlpakaPhase1", + beamSpot = cms.InputTag("hltOnlineBeamSpot"), + minNumberOfHits = cms.int32(0), + minQuality = cms.string('loose'), + pixelRecHitLegacySrc = cms.InputTag("hltSiPixelRecHitsLegacyFormat"), + trackSrc = cms.InputTag("hltPixelTracks") + ) + + process.hltPixelTracksLegacyFormatCPUSerial = process.hltPixelTracksLegacyFormat.clone( + pixelRecHitLegacySrc = cms.InputTag("hltSiPixelRecHitsLegacyFormatCPUSerial"), + trackSrc = cms.InputTag("hltPixelTracksCPUSerial") + ) + + process.HLTRecoPixelTracksTask = cms.ConditionalTask( + process.hltPixelTracks, + process.hltPixelTracksLegacyFormat, + ) + + process.HLTRecoPixelTracksCPUSerialTask = cms.ConditionalTask( + process.hltPixelTracksCPUSerial, + process.hltPixelTracksLegacyFormatCPUSerial, + ) + + process.HLTRecoPixelTracksCPUSerialSequence = cms.Sequence( process.HLTRecoPixelTracksCPUSerialTask ) + + return process + +def customizeHLTforAlpakaPixelRecoVertexing(process): + '''Customisation to introduce the Pixel-Vertex Reconstruction in Alpaka + ''' + + # alpaka EDProducer + # consumes + # - TkSoADevice + # produces + # - ZVertexDevice + process.hltPixelVertices = cms.EDProducer('PixelVertexProducerAlpakaPhase1@alpaka', + oneKernel = cms.bool(True), + useDensity = cms.bool(True), + useDBSCAN = cms.bool(False), + useIterative = cms.bool(False), + minT = cms.int32(2), + eps = cms.double(0.07), + errmax = cms.double(0.01), + chi2max = cms.double(9), + PtMin = cms.double(0.5), + PtMax = cms.double(75), + pixelTrackSrc = cms.InputTag('hltPixelTracks'), + # autoselect the alpaka backend + alpaka = cms.untracked.PSet( + backend = cms.untracked.string('') + ) + ) + + process.hltPixelVerticesCPUSerial = process.hltPixelVertices.clone( + pixelTrackSrc = 'hltPixelTracksCPUSerial', + alpaka = dict( backend = 'serial_sync' ) + ) + + process.hltPixelVerticesLegacyFormat = cms.EDProducer("PixelVertexProducerFromSoAAlpaka", + TrackCollection = cms.InputTag("hltPixelTracksLegacyFormat"), + beamSpot = cms.InputTag("hltOnlineBeamSpot"), + src = cms.InputTag("hltPixelVertices") + ) + + process.hltPixelVerticesLegacyFormatCPUSerial = process.hltPixelVerticesLegacyFormat.clone( + TrackCollection = cms.InputTag("hltPixelTracksLegacyFormatCPUSerial"), + src = cms.InputTag("hltPixelVerticesCPUSerial") + ) + + process.HLTRecopixelvertexingTask = cms.ConditionalTask( + process.HLTRecoPixelTracksTask, + process.hltPixelVertices, + process.hltPixelVerticesLegacyFormat, + ) + + process.HLTRecopixelvertexingCPUSerialTask = cms.ConditionalTask( + process.HLTRecoPixelTracksCPUSerialTask, + process.hltPixelVerticesCPUSerial, + process.hltPixelVerticesLegacyFormatCPUSerial, + ) + + process.HLTRecopixelvertexingCPUSerialSequence = cms.Sequence( process.HLTRecopixelvertexingCPUSerialTask ) + + return process + +def customizeHLTforAlpakaPixelReco(process): + '''Customisation to introduce the Pixel Local+Track+Vertex Reconstruction in Alpaka + ''' + process.load('Configuration.StandardSequences.Accelerators_cff') + process.load('HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka_cfi') + + process = customizeHLTforAlpakaPixelRecoLocal(process) + process = customizeHLTforAlpakaPixelRecoTracking(process) + process = customizeHLTforAlpakaPixelRecoVertexing(process) + + return process + +def customizeHLTforPatatrack(process): + '''Customize HLT configuration introducing latest Patatrack developments + ''' + process = customizeHLTforAlpakaPixelReco(process) + return process diff --git a/HLTrigger/Configuration/test/testHLTWithAlpakaPixelReco.sh b/HLTrigger/Configuration/test/testHLTWithAlpakaPixelReco.sh new file mode 100755 index 0000000000000..f5712144ef9a3 --- /dev/null +++ b/HLTrigger/Configuration/test/testHLTWithAlpakaPixelReco.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +https_proxy=http://cmsproxy.cms:3128/ \ +hltGetConfiguration /frozen/2023/2e34/v1.2/HLT \ + --globaltag 130X_dataRun3_HLT_v2 \ + --data \ + --unprescale \ + --output all \ + --max-events 200 \ + --paths DQM_PixelReco*,*DQMGPUvsCPU* \ + --input /store/data/Run2023C/EphemeralHLTPhysics0/RAW/v1/000/368/822/00000/6e1268da-f96a-49f6-a5f0-89933142dd89.root \ + --customise \ +HLTrigger/Configuration/customizeHLTforPatatrack.customizeHLTforPatatrack,\ +HLTrigger/Configuration/customizeHLTforPatatrack.customizeHLTforDQMGPUvsCPUPixel \ + > hlt.py + +cat <> hlt.py +process.options.numberOfThreads = 1 +process.options.numberOfStreams = 0 + +del process.MessageLogger +process.load('FWCore.MessageLogger.MessageLogger_cfi') + +# assign only DQM_PixelReconstruction_v to the DQMGPUvsCPU Primary Dataset +process.hltDatasetDQMGPUvsCPU.triggerConditions = ['DQM_PixelReconstruction_v*'] + +# remove FinalPaths running OutputModules, except for the DQMGPUvsCPU and DQM ones +finalPathsToRemove = [] +for fpath in process.finalpaths_(): + if fpath not in ['DQMOutput', 'DQMGPUvsCPUOutput']: + finalPathsToRemove += [fpath] +for fpath in finalPathsToRemove: + process.__delattr__(fpath) + +# do not produce output of DQM stream (not needed) +del process.hltOutputDQM + +# rename DQMIO output file +process.dqmOutput.fileName = '___JOBNAME____DQMIO.root' + +# rename output of DQMGPUvsCPU stream +process.hltOutputDQMGPUvsCPU.fileName = '___JOBNAME___.root' +EOF + +JOBNAME=hlt0 +sed "s|___JOBNAME___|${JOBNAME}|" hlt.py > "${JOBNAME}".py +edmConfigDump --prune "${JOBNAME}".py > "${JOBNAME}"_dump.py +echo "${JOBNAME}" ... && cmsRun "${JOBNAME}".py &> "${JOBNAME}".log diff --git a/HeterogeneousCore/AlpakaInterface/test/BuildFile.xml b/HeterogeneousCore/AlpakaInterface/test/BuildFile.xml deleted file mode 100644 index 5f9c5fe81981f..0000000000000 --- a/HeterogeneousCore/AlpakaInterface/test/BuildFile.xml +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - - - - - - - - - diff --git a/RecoLocalTracker/Records/BuildFile.xml b/RecoLocalTracker/Records/BuildFile.xml index 629b1aa7a1ebc..fae441b2f4ac2 100644 --- a/RecoLocalTracker/Records/BuildFile.xml +++ b/RecoLocalTracker/Records/BuildFile.xml @@ -8,3 +8,4 @@ + diff --git a/RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h b/RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h new file mode 100644 index 0000000000000..b2eeac7d65cf6 --- /dev/null +++ b/RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h @@ -0,0 +1,27 @@ +#ifndef RecoLocalTracker_Records_PixelCPEFastParamsRecord_h +#define RecoLocalTracker_Records_PixelCPEFastParamsRecord_h + +#include "FWCore/Framework/interface/EventSetupRecordImplementation.h" +#include "FWCore/Framework/interface/DependentRecordImplementation.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/Records/interface/IdealGeometryRecord.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" +#include "CondFormats/DataRecord/interface/SiPixelLorentzAngleRcd.h" +#include "CondFormats/DataRecord/interface/SiPixelGenErrorDBObjectRcd.h" +#include "CalibTracker/Records/interface/SiPixelTemplateDBObjectESProducerRcd.h" +#include "CalibTracker/Records/interface/SiPixel2DTemplateDBObjectESProducerRcd.h" + +#include "FWCore/Utilities/interface/mplVector.h" + +class PixelCPEFastParamsRecord + : public edm::eventsetup::DependentRecordImplementation > {}; + +#endif // RecoLocalTracker_Records_PixelCPEFastParamsRecord_h diff --git a/RecoLocalTracker/Records/src/PixelCPEFastParamsRecord.cc b/RecoLocalTracker/Records/src/PixelCPEFastParamsRecord.cc new file mode 100644 index 0000000000000..1410d7c1e66bf --- /dev/null +++ b/RecoLocalTracker/Records/src/PixelCPEFastParamsRecord.cc @@ -0,0 +1,5 @@ +#include "RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h" +#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" +#include "FWCore/Utilities/interface/typelookup.h" + +EVENTSETUP_RECORD_REG(PixelCPEFastParamsRecord); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml index 1bc0c60a0d298..565b70b7fefbb 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml @@ -1,16 +1,20 @@ - - - + + - + + + + + + @@ -18,3 +22,14 @@ + + + + + + + + + + + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc index 0bf734b6cd589..9a98821970157 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc @@ -20,7 +20,7 @@ // local include(s) #include "PixelClusterizerBase.h" #include "SiPixelClusterThresholds.h" - +// #define GPU_DEBUG template class SiPixelDigisClustersFromSoAT : public edm::global::EDProducer<> { public: @@ -122,7 +122,7 @@ void SiPixelDigisClustersFromSoAT::produce(edm::StreamID, for (int32_t ic = 0; ic < nclus + 1; ++ic) { auto const& acluster = aclusters[ic]; // in any case we cannot go out of sync with gpu... - if (!std::is_base_of::value and acluster.charge < clusterThreshold) + if (acluster.charge < clusterThreshold) edm::LogWarning("SiPixelDigisClustersFromSoA") << "cluster below charge Threshold " << "Layer/DetId/clusId " << layer << '/' << detId << '/' << ic << " size/charge " << acluster.isize << '/' << acluster.charge; @@ -148,6 +148,10 @@ void SiPixelDigisClustersFromSoAT::produce(edm::StreamID, spc.abort(); }; + #ifdef GPU_DEBUG + std::cout << "Dumping all digis. nDigis = "<< nDigis << std::endl; + #endif + for (uint32_t i = 0; i < nDigis; i++) { // check for uninitialized digis if (digis.rawIdArr(i) == 0) @@ -161,6 +165,9 @@ void SiPixelDigisClustersFromSoAT::produce(edm::StreamID, assert(digis.rawIdArr(i) > 109999); #endif if (detId != digis.rawIdArr(i)) { + #ifdef GPU_DEBUG + std::cout << ">> Closed module --"<< detId << "; nclus = " << nclus <::produce(edm::StreamID, } } PixelDigi dig(digis.pdigi(i)); + + #ifdef GPU_DEBUG + std::cout << i << ";" + << digis.rawIdArr(i) << ";" + << digis.clus(i) << ";" + << digis.pdigi(i) << ";" + << digis.adc(i) << ";" + << dig.row() << ";" + << dig.column() << std::endl; + #endif + if (storeDigis_) (*detDigis).data.emplace_back(dig); // fill clusters diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoAAlpaka.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoAAlpaka.cc new file mode 100644 index 0000000000000..1ab92a07aa704 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoAAlpaka.cc @@ -0,0 +1,244 @@ +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/Common/interface/DetSetVector.h" +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/DetId/interface/DetId.h" +#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" +#include "DataFormats/SiPixelDigi/interface/PixelDigi.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +// local include(s) +#include "PixelClusterizerBase.h" +#include "SiPixelClusterThresholds.h" +// #define EDM_ML_DEBUG +// #define GPU_DEBUG +template +class SiPixelDigisClustersFromSoAAlpaka : public edm::global::EDProducer<> { +public: + explicit SiPixelDigisClustersFromSoAAlpaka(const edm::ParameterSet& iConfig); + ~SiPixelDigisClustersFromSoAAlpaka() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + edm::ESGetToken const topoToken_; + edm::EDGetTokenT const digisHostToken_; + const SiPixelClusterThresholds clusterThresholds_; // Cluster threshold in electrons + const bool produceDigis_; + const bool storeDigis_; + + edm::EDPutTokenT> digisPutToken_; + edm::EDPutTokenT clustersPutToken_; +}; + +template +SiPixelDigisClustersFromSoAAlpaka::SiPixelDigisClustersFromSoAAlpaka(const edm::ParameterSet& iConfig) + : topoToken_(esConsumes()), + digisHostToken_(consumes(iConfig.getParameter("src"))), + clusterThresholds_(iConfig.getParameter("clusterThreshold_layer1"), + iConfig.getParameter("clusterThreshold_otherLayers")), + produceDigis_(iConfig.getParameter("produceDigis")), + storeDigis_(iConfig.getParameter("produceDigis") && iConfig.getParameter("storeDigis")), + clustersPutToken_(produces()) { + if (produceDigis_) + digisPutToken_ = produces>(); +} + +template +void SiPixelDigisClustersFromSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("src", edm::InputTag("siPixelDigisSoA")); + desc.add("clusterThreshold_layer1", pixelClustering::clusterThresholdLayerOne); + desc.add("clusterThreshold_otherLayers", pixelClustering::clusterThresholdOtherLayers); + desc.add("produceDigis", true); + desc.add("storeDigis", true); + + descriptions.addWithDefaultLabel(desc); +} + +template +void SiPixelDigisClustersFromSoAAlpaka::produce(edm::StreamID, + edm::Event& iEvent, + const edm::EventSetup& iSetup) const { + const auto& digisHost = iEvent.get(digisHostToken_); + const auto& digisView = digisHost.const_view(); + const uint32_t nDigis = digisHost.nDigis(); + + const auto& ttopo = iSetup.getData(topoToken_); + constexpr auto maxModules = TrackerTraits::numberOfModules; + + std::unique_ptr> outputDigis; + if (produceDigis_) + outputDigis = std::make_unique>(); + if (storeDigis_) + outputDigis->reserve(maxModules); + auto outputClusters = std::make_unique(); + outputClusters->reserve(maxModules, nDigis / 2); + + edm::DetSet* detDigis = nullptr; + uint32_t detId = 0; + + for (uint32_t i = 0; i < nDigis; i++) { + // check for uninitialized digis + // this is set in RawToDigi_kernel in SiPixelRawToClusterGPUKernel.cu + if (digisView[i].rawIdArr() == 0) + continue; + + // check for noisy/dead pixels (electrons set to 0) + if (digisView[i].adc() == 0) + continue; + + detId = digisView[i].rawIdArr(); + if (storeDigis_) { + + detDigis = &outputDigis->find_or_insert(detId); + + if ((*detDigis).empty()) + (*detDigis).data.reserve(64); // avoid the first relocations + } + + break; + } + + int32_t nclus = -1; + PixelClusterizerBase::AccretionCluster aclusters[TrackerTraits::maxNumClustersPerModules]; +#ifdef EDM_ML_DEBUG + auto totClustersFilled = 0; +#endif + + auto fillClusters = [&](uint32_t detId) { + if (nclus < 0) + return; // this in reality should never happen + edmNew::DetSetVector::FastFiller spc(*outputClusters, detId); + auto layer = (DetId(detId).subdetId() == 1) ? ttopo.pxbLayer(detId) : 0; + auto clusterThreshold = clusterThresholds_.getThresholdForLayerOnCondition(layer == 1); + for (int32_t ic = 0; ic < nclus + 1; ++ic) { + auto const& acluster = aclusters[ic]; + // in any case we cannot go out of sync with gpu... + if (acluster.charge < clusterThreshold) + edm::LogWarning("SiPixelDigisClustersFromSoAAlpaka") << "cluster below charge Threshold " + << "Layer/DetId/clusId " << layer << '/' << detId << '/' << ic + << " size/charge " << acluster.isize << '/' << acluster.charge << "\n"; + // sort by row (x) + spc.emplace_back(acluster.isize, acluster.adc, acluster.x, acluster.y, acluster.xmin, acluster.ymin, ic); + aclusters[ic].clear(); +#ifdef EDM_ML_DEBUG + ++totClustersFilled; + const auto& cluster{spc.back()}; + // LogDebug("SiPixelDigisClustersFromSoAAlpaka") + std::cout << "putting in this cluster " << ic << " " << cluster.charge() << " " << cluster.pixelADC().size() << "\n"; +#endif + std::push_heap(spc.begin(), spc.end(), [](SiPixelCluster const& cl1, SiPixelCluster const& cl2) { + return cl1.minPixelRow() < cl2.minPixelRow(); + }); + } + nclus = -1; + // sort by row (x) + std::sort_heap(spc.begin(), spc.end(), [](SiPixelCluster const& cl1, SiPixelCluster const& cl2) { + return cl1.minPixelRow() < cl2.minPixelRow(); + }); + if (spc.empty()) + spc.abort(); + }; + + + #ifdef GPU_DEBUG + std::cout << "Dumping all digis. nDigis = "<< nDigis << std::endl; + #endif + for (uint32_t i = 0; i < nDigis; i++) { + + #ifdef GPU_DEBUG + PixelDigi dig2{digisView[i].pdigi()}; + std::cout << i << ";" + << digisView[i].rawIdArr() << ";" + << digisView[i].clus() << ";" + << digisView[i].pdigi() << ";" + << digisView[i].adc() << ";" + << dig2.row() << ";" + << dig2.column() << std::endl; + #endif + + // check for uninitialized digis + if (digisView[i].rawIdArr() == 0) + continue; + // check for noisy/dead pixels (electrons set to 0) + if (digisView[i].adc() == 0) + continue; + if (digisView[i].clus() >= -pixelClustering::invalidClusterId) + continue; // not in cluster; TODO add an assert for the size + if (digisView[i].clus() == pixelClustering::invalidModuleId) + continue; // from clusters killed by charge cut +#ifdef EDM_ML_DEBUG + assert(digisView[i].rawIdArr() > 109999); +#endif + if (detId != digisView[i].rawIdArr()) { + #ifdef GPU_DEBUG + std::cout << ">> Closed module --"<< detId << "; nclus = " << nclus <find_or_insert(detId); + if ((*detDigis).empty()) + (*detDigis).data.reserve(64); // avoid the first relocations + else { + edm::LogWarning("SiPixelDigisClustersFromSoAAlpaka") + << "Problem det present twice in input! " << (*detDigis).detId(); + } + } + } + PixelDigi dig{digisView[i].pdigi()}; + + if (storeDigis_) + (*detDigis).data.emplace_back(dig); + // fill clusters +#ifdef EDM_ML_DEBUG + assert(digisView[i].clus() >= 0); + assert(digisView[i].clus() < static_cast(TrackerTraits::maxNumClustersPerModules)); +#endif + nclus = std::max(digisView[i].clus(), nclus); + auto row = dig.row(); + auto col = dig.column(); + SiPixelCluster::PixelPos pix(row, col); + aclusters[digisView[i].clus()].add(pix, digisView[i].adc()); + } + + // fill final clusters + if (detId > 0) + fillClusters(detId); + +#ifdef EDM_ML_DEBUG + LogDebug("SiPixelDigisClustersFromSoAAlpaka") << "filled " << totClustersFilled << " clusters"; +#endif + + if (produceDigis_) + iEvent.put(digisPutToken_, std::move(outputDigis)); + + iEvent.put(clustersPutToken_, std::move(outputClusters)); +} + +#include "FWCore/Framework/interface/MakerMacros.h" + +using SiPixelDigisClustersFromSoAAlpakaPhase1 = SiPixelDigisClustersFromSoAAlpaka; +DEFINE_FWK_MODULE(SiPixelDigisClustersFromSoAAlpakaPhase1); + +using SiPixelDigisClustersFromSoAAlpakaPhase2 = SiPixelDigisClustersFromSoAAlpaka; +DEFINE_FWK_MODULE(SiPixelDigisClustersFromSoAAlpakaPhase2); + +using SiPixelDigisClustersFromSoAAlpakaHIonPhase1 = SiPixelDigisClustersFromSoAAlpaka; +DEFINE_FWK_MODULE(SiPixelDigisClustersFromSoAAlpakaHIonPhase1); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index 802ad2eb42c7e..873ba60e84822 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -131,6 +131,13 @@ namespace pixelgpudetails { digis_d.setNModulesDigis(nModules_Clusters_h[0], nDigis); assert(nModules_Clusters_h[2] <= nModules_Clusters_h[1]); clusters_d.setNClusters(nModules_Clusters_h[1], nModules_Clusters_h[2]); + + std::cout << "SiPixelClusterizerCUDA results:" << std::endl + << " > no. of digis: " << nDigis << std::endl + << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl + << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl + << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl; + // need to explicitly deallocate while the associated CUDA // stream is still alive // diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h new file mode 100644 index 0000000000000..f7331e3a2e5b2 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h @@ -0,0 +1,128 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_CalibPixel_h +#define RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_CalibPixel_h + +#include +#include +#include +#include + +#include + +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTUtilities.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoAv2.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" +#include "RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterThresholds.h" + +// #define GPU_DEBUG + +namespace calibPixel { + using namespace cms::alpakatools; + constexpr uint16_t InvId = std::numeric_limits::max() - 1; + // must be > MaxNumModules + struct calibDigis { + template + ALPAKA_FN_ACC void operator()(const TAcc& acc, + SiPixelClusterThresholds clusterThresholds, + SiPixelDigisSoAv2View view, + SiPixelClustersSoAView clus_view, + const SiPixelGainCalibrationForHLTSoAConstView gains, + int numElements) const { + const uint32_t threadIdxGlobal(alpaka::getIdx(acc)[0u]); + const float VCaltoElectronGain = clusterThresholds.vCaltoElectronGain; + const float VCaltoElectronGain_L1 = clusterThresholds.vCaltoElectronGain_L1; + const float VCaltoElectronOffset = clusterThresholds.vCaltoElectronOffset; + const float VCaltoElectronOffset_L1 = clusterThresholds.vCaltoElectronOffset_L1; + // zero for next kernels... + if (threadIdxGlobal == 0) { + clus_view[0].clusModuleStart() = clus_view[0].moduleStart() = 0; + } + cms::alpakatools::for_each_element_in_grid_strided( + acc, phase1PixelTopology::numberOfModules, [&](uint32_t i) { clus_view[i].clusInModule() = 0; }); + cms::alpakatools::for_each_element_in_grid_strided(acc, numElements, [&](uint32_t i) { + auto dvgi = view[i]; + if (dvgi.moduleId() != InvId) { + + bool isDeadColumn = false, isNoisyColumn = false; + int row = dvgi.xx(); + int col = dvgi.yy(); + auto ret = SiPixelGainUtilities::getPedAndGain(gains, dvgi.moduleId(), col, row, isDeadColumn, isNoisyColumn); + float pedestal = ret.first; + float gain = ret.second; + if (isDeadColumn | isNoisyColumn) { + dvgi.moduleId() = InvId; + dvgi.adc() = 0; + printf("bad pixel at %d in %d\n", i, dvgi.moduleId()); + } else { + float vcal = dvgi.adc() * gain - pedestal * gain; + + float conversionFactor = dvgi.moduleId() < 96 ? VCaltoElectronGain_L1 : VCaltoElectronGain; + float offset = dvgi.moduleId() < 96 ? VCaltoElectronOffset_L1 : VCaltoElectronOffset; + #ifdef GPU_DEBUG + auto old_adc = dvgi.adc(); + #endif + dvgi.adc() = std::max(100, int(vcal * conversionFactor + offset)); + #ifdef GPU_DEBUG + if (threadIdxGlobal == 0) + printf("module %d pixel %d -> old_adc = %d; vcal = %.2f; conversionFactor = %.2f; offset = %.2f; new_adc = %d \n", + dvgi.moduleId(), i, old_adc,vcal,conversionFactor,offset,dvgi.adc()); + #endif + } + } + }); + } + }; + struct calibDigisPhase2 { + template + ALPAKA_FN_ACC void operator()(const TAcc& acc, + SiPixelClusterThresholds clusterThresholds, + SiPixelDigisSoAv2View view, + SiPixelClustersSoAView clus_view, + int numElements + ) const { + const float ElectronPerADCGain = clusterThresholds.electronPerADCGain; + const int8_t Phase2ReadoutMode = clusterThresholds.phase2ReadoutMode; + const uint16_t Phase2DigiBaseline = clusterThresholds.phase2DigiBaseline; + const uint8_t Phase2KinkADC = clusterThresholds.phase2KinkADC; + + const uint32_t threadIdxGlobal(alpaka::getIdx(acc)[0u]); + // zero for next kernels... + if (0 == threadIdxGlobal) + clus_view[0].clusModuleStart() = clus_view[0].moduleStart() = 0; + + cms::alpakatools::for_each_element_in_grid_strided( + acc, phase2PixelTopology::numberOfModules, [&](uint32_t i) { clus_view[i].clusInModule() = 0; }); + cms::alpakatools::for_each_element_in_grid_strided(acc, numElements, [&](uint32_t i) { + auto dvgi = view[i]; + if (pixelClustering::invalidModuleId != dvgi.moduleId()) { + const int mode = (Phase2ReadoutMode < -1 ? -1 : Phase2ReadoutMode); + int adc_int = dvgi.adc(); + if (mode < 0) + adc_int = int(adc_int * ElectronPerADCGain); + else { + if (adc_int < Phase2KinkADC) + adc_int = int((adc_int + 0.5) * ElectronPerADCGain); + else { + const int8_t dspp = (Phase2ReadoutMode < 10 ? Phase2ReadoutMode : 10); + const int8_t ds = int8_t(dspp <= 1 ? 1 : (dspp - 1) * (dspp - 1)); + adc_int -= Phase2KinkADC; + adc_int *= ds; + adc_int += Phase2KinkADC; + adc_int = ((adc_int + 0.5 * ds) * ElectronPerADCGain); + } + adc_int += int(Phase2DigiBaseline); + } + dvgi.adc() = std::min(adc_int, int(std::numeric_limits::max())); + } + }); + } + }; +} // namespace calibPixel + +#endif // RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_CalibPixel_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h new file mode 100644 index 0000000000000..9e2c2d6d984a4 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h @@ -0,0 +1,208 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_alpaka_ClusterChargeCut_h +#define RecoLocalTracker_SiPixelClusterizer_alpaka_ClusterChargeCut_h + +#include +#include + +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoAv2.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h" +#include "RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterThresholds.h" + +// #define GPU_DEBUG +// namespace ALPAKA_ACCELERATOR_NAMESPACE { +namespace pixelClustering { + + template + struct clusterChargeCut { + template + ALPAKA_FN_ACC void operator()( + const TAcc& acc, + SiPixelDigisSoAv2View digi_view, + SiPixelClustersSoAView clus_view, + SiPixelClusterThresholds clusterThresholds, // charge cut on cluster in electrons (for layer 1 and for other layers) + const uint32_t numElements) const { + constexpr int startBPIX2 = TrackerTraits::layerStart[1]; + constexpr int32_t maxNumClustersPerModules = TrackerTraits::maxNumClustersPerModules; + [[maybe_unused]] constexpr int nMaxModules = TrackerTraits::numberOfModules; + + const uint32_t blockIdx(alpaka::getIdx(acc)[0u]); + auto firstModule = blockIdx; + auto endModule = clus_view[0].moduleStart(); + if (blockIdx >= endModule) + return; + + auto& charge = alpaka::declareSharedVar(acc); + auto& ok = alpaka::declareSharedVar(acc); + auto& newclusId = alpaka::declareSharedVar(acc); + + const uint32_t gridDimension(alpaka::getWorkDiv(acc)[0u]); + + for (auto module = firstModule; module < endModule; module += gridDimension) { + auto firstPixel = clus_view[1 + module].moduleStart(); + auto thisModuleId = digi_view[firstPixel].moduleId(); + + ALPAKA_ASSERT_OFFLOAD(nMaxModules < maxNumModules); + ALPAKA_ASSERT_OFFLOAD(startBPIX2 < nMaxModules); + + uint32_t nclus = clus_view[thisModuleId].clusInModule(); + if (nclus == 0) + return; + + const uint32_t threadIdxLocal(alpaka::getIdx(acc)[0u]); + if (threadIdxLocal == 0 && nclus > maxNumClustersPerModules) + printf("Warning too many clusters in module %d in block %d: %d > %d\n", + thisModuleId, + module, + nclus, + maxNumClustersPerModules); + + // Stride = block size. + const uint32_t blockDimension(alpaka::getWorkDiv(acc)[0u]); + + // Get thread / CPU element indices in block. + const auto& [firstElementIdxNoStride, endElementIdxNoStride] = + cms::alpakatools::element_index_range_in_block(acc, firstPixel); + + if (nclus > maxNumClustersPerModules) { + uint32_t firstElementIdx = firstElementIdxNoStride; + uint32_t endElementIdx = endElementIdxNoStride; + // remove excess FIXME find a way to cut charge first.... + for (uint32_t i = firstElementIdx; i < numElements; ++i) { + if (not cms::alpakatools::next_valid_element_index_strided( + i, firstElementIdx, endElementIdx, blockDimension, numElements)) + break; + if (digi_view[i].moduleId() == invalidModuleId) + continue; // not valid + if (digi_view[i].moduleId() != thisModuleId) + break; // end of module + if (digi_view[i].clus() >= maxNumClustersPerModules) { + digi_view[i].moduleId() = invalidModuleId; + digi_view[i].clus() = invalidModuleId; + } + } + nclus = maxNumClustersPerModules; + } + + #ifdef GPU_DEBUG + if (thisModuleId % 100 == 1) + if (threadIdxLocal == 0) + printf("start cluster charge cut for module %d in block %d\n", thisModuleId, module); + #endif + + ALPAKA_ASSERT_OFFLOAD(nclus <= maxNumClustersPerModules); + cms::alpakatools::for_each_element_in_block_strided(acc, nclus, [&](uint32_t i) { charge[i] = 0; }); + alpaka::syncBlockThreads(acc); + + uint32_t firstElementIdx = firstElementIdxNoStride; + uint32_t endElementIdx = endElementIdxNoStride; + for (uint32_t i = firstElementIdx; i < numElements; ++i) { + if (not cms::alpakatools::next_valid_element_index_strided( + i, firstElementIdx, endElementIdx, blockDimension, numElements)) + break; + if (digi_view[i].moduleId() == invalidModuleId) + continue; // not valid + if (digi_view[i].moduleId() != thisModuleId) + break; // end of module + alpaka::atomicAdd(acc, + &charge[digi_view[i].clus()], + static_cast(digi_view[i].adc()), + alpaka::hierarchy::Threads{}); + } + alpaka::syncBlockThreads(acc); + + auto chargeCut = clusterThresholds.getThresholdForLayerOnCondition(thisModuleId < startBPIX2); + bool allGood = true; + + cms::alpakatools::for_each_element_in_block_strided( + acc, nclus, [&](uint32_t i) { + newclusId[i] = ok[i] = (charge[i] > chargeCut) ? 1 : 0; + if (ok[i]==0) + allGood = allGood && false; + + // #ifdef GPU_DEBUG + // printf("module %d -> chargeCut = %d; cluster %d; charge = %d; ok = %s\n",thisModuleId, chargeCut,i,charge[i],ok[i] > 0 ? " -> good" : "-> cut"); + // #endif + }); + alpaka::syncBlockThreads(acc); + + // if all clusters above threshold do nothing + // if (allGood) + // continue; + + // renumber + auto& ws = alpaka::declareSharedVar(acc); + constexpr uint32_t maxThreads = 1024; + auto minClust = std::min(nclus,maxThreads); + + cms::alpakatools::blockPrefixScan(acc, newclusId, minClust, ws); + + if constexpr (maxNumClustersPerModules > maxThreads) //only if needed + { + + for (uint32_t offset = maxThreads; offset < nclus; offset += maxThreads) { + cms::alpakatools::blockPrefixScan(acc, newclusId + offset, nclus - offset, ws); + + cms::alpakatools::for_each_element_in_block_strided( + acc, nclus - offset, [&](uint32_t i) { + uint32_t prevBlockEnd = ((i + offset/ maxThreads) * maxThreads) - 1; + newclusId[i] += newclusId[prevBlockEnd]; + } + ); + alpaka::syncBlockThreads(acc); + } + } + + ALPAKA_ASSERT_OFFLOAD(nclus >= newclusId[nclus - 1]); + + if (nclus == newclusId[nclus - 1]) + return; + + clus_view[thisModuleId].clusInModule() = newclusId[nclus - 1]; + // alpaka::syncBlockThreads(acc); + + #ifdef GPU_DEBUG + if (thisModuleId % 100 == 1) + if (threadIdxLocal == 0) + printf("module %d -> chargeCut = %d; nclus (pre cut) = %d; nclus (after cut) = %d\n",thisModuleId, chargeCut,nclus,clus_view[thisModuleId].clusInModule()); + #endif + // // mark bad cluster again + // cms::alpakatools::for_each_element_in_block_strided(acc, nclus, [&](uint32_t i) { + // if (0 == ok[i]) + // newclusId[i] = invalidModuleId + 1; + // }); + + alpaka::syncBlockThreads(acc); + + // reassign id + firstElementIdx = firstElementIdxNoStride; + endElementIdx = endElementIdxNoStride; + for (uint32_t i = firstElementIdx; i < numElements; ++i) { + if (not cms::alpakatools::next_valid_element_index_strided( + i, firstElementIdx, endElementIdx, blockDimension, numElements)) + break; + if (digi_view[i].moduleId() == invalidModuleId) + continue; // not valid + if (digi_view[i].moduleId() != thisModuleId) + break; // end of module + if (0 == ok[digi_view[i].clus()]) + digi_view[i].moduleId() = digi_view[i].clus() = invalidModuleId; + else + digi_view[i].clus() = newclusId[digi_view[i].clus()] - 1; + // digi_view[i].clus() = newclusId[digi_view[i].clus()] - 1; + // if (digi_view[i].clus() == invalidModuleId) + // digi_view[i].moduleId() = invalidModuleId; + } + + alpaka::syncBlockThreads(acc); + + //done + } + } + }; + +} // namespace pixelClustering + +#endif // diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h new file mode 100644 index 0000000000000..0c650cd850ed4 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h @@ -0,0 +1,452 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_alpaka_PixelClustering_h +#define RecoLocalTracker_SiPixelClusterizer_alpaka_PixelClustering_h + +#include +#include +#include +#include +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" + +// #define GPU_DEBUG + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + namespace pixelClustering { + +#ifdef GPU_DEBUG + template >> + ALPAKA_STATIC_ACC_MEM_GLOBAL uint32_t gMaxHit = 0; +#endif + + namespace PixelStatus { + // Phase-1 pixel modules + constexpr uint32_t pixelSizeX = pixelTopology::Phase1::numRowsInModule; + constexpr uint32_t pixelSizeY = pixelTopology::Phase1::numColsInModule; + + // Use 0x00, 0x01, 0x03 so each can be OR'ed on top of the previous ones + enum Status : uint32_t { kEmpty = 0x00, kFound = 0x01, kDuplicate = 0x03 }; + + constexpr uint32_t bits = 2; + constexpr uint32_t mask = (0x01 << bits) - 1; + constexpr uint32_t valuesPerWord = sizeof(uint32_t) * 8 / bits; + constexpr uint32_t size = pixelSizeX * pixelSizeY / valuesPerWord; + + ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr static uint32_t getIndex(uint16_t x, uint16_t y) { + return (pixelSizeX * y + x) / valuesPerWord; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr static uint32_t getShift(uint16_t x, uint16_t y) { + return (x % valuesPerWord) * 2; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr static Status getStatus(uint32_t const* __restrict__ status, + uint16_t x, + uint16_t y) { + uint32_t index = getIndex(x, y); + uint32_t shift = getShift(x, y); + return Status{(status[index] >> shift) & mask}; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr static bool isDuplicate(uint32_t const* __restrict__ status, + uint16_t x, + uint16_t y) { + return getStatus(status, x, y) == kDuplicate; + } + + template >> + ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr static void promote(TAcc const& acc, + uint32_t* __restrict__ status, + const uint16_t x, + const uint16_t y) { + uint32_t index = getIndex(x, y); + uint32_t shift = getShift(x, y); + uint32_t old_word = status[index]; + uint32_t expected = old_word; + do { + expected = old_word; + Status old_status{(old_word >> shift) & mask}; + if (kDuplicate == old_status) { + // nothing to do + return; + } + Status new_status = (kEmpty == old_status) ? kFound : kDuplicate; + uint32_t new_word = old_word | (static_cast(new_status) << shift); + old_word = alpaka::atomicCas(acc, &status[index], expected, new_word, alpaka::hierarchy::Blocks{}); + } while (expected != old_word); + } + + } // struct PixelStatus + + template + struct countModules { + template + ALPAKA_FN_ACC void operator()(const TAcc& acc, + SiPixelDigisSoAv2View digi_view, + SiPixelClustersSoAView clus_view, + const unsigned int numElements) const { + [[maybe_unused]] constexpr int nMaxModules = TrackerTraits::numberOfModules; + + #ifdef GPU_DEBUG + const uint32_t threadIdxGlobal(alpaka::getIdx(acc)[0u]); + // zero for next kernels... + if (0 == threadIdxGlobal) + printf("Starting to count modules to set module starts:"); + #endif + cms::alpakatools::for_each_element_in_grid_strided(acc, numElements, [&](uint32_t i) { + digi_view[i].clus() = i; + if (::pixelClustering::invalidModuleId != digi_view[i].moduleId()) { + int j = i - 1; + while (j >= 0 and digi_view[j].moduleId() == ::pixelClustering::invalidModuleId) + --j; + if (j < 0 or digi_view[j].moduleId() != digi_view[i].moduleId()) { + // boundary... + auto loc = alpaka::atomicInc( + acc, clus_view.moduleStart(), std::decay_t(nMaxModules), alpaka::hierarchy::Blocks{}); + #ifdef GPU_DEBUG + printf("> New module (no. %d) found at digi %d \n",loc,i); + #endif + clus_view[loc + 1].moduleStart() = i; + } + } + }); + } + }; + + template + struct findClus { + template + ALPAKA_FN_ACC void operator()(const TAcc& acc, + SiPixelDigisSoAv2View digi_view, + SiPixelClustersSoAView clus_view, + const unsigned int numElements) const { + + constexpr bool isPhase2 = std::is_base_of::value; + constexpr const uint32_t pixelStatusSize = isPhase2 ? 1 : PixelStatus::size; + + // packed words array used to store the PixelStatus of each pixel + auto& status = alpaka::declareSharedVar(acc); + + // find the index of the first pixel not belonging to this module (or invalid) + auto& msize = alpaka::declareSharedVar(acc); + + const uint32_t blockIdx(alpaka::getIdx(acc)[0u]); + if (blockIdx >= clus_view[0].moduleStart()) + return; + + auto firstModule = blockIdx; + auto endModule = clus_view[0].moduleStart(); + + const uint32_t gridDimension(alpaka::getWorkDiv(acc)[0u]); + + for (auto module = firstModule; module < endModule; module += gridDimension) { + auto firstPixel = clus_view[1 + module].moduleStart(); + auto thisModuleId = digi_view[firstPixel].moduleId(); + ALPAKA_ASSERT_OFFLOAD(thisModuleId < TrackerTraits::numberOfModules); + const uint32_t threadIdxLocal(alpaka::getIdx(acc)[0u]); +#ifdef GPU_DEBUG + if (thisModuleId % 100 == 1) + if (threadIdxLocal == 0) + printf("start clusterizer for module %d in block %d\n", thisModuleId, module); +#endif + + msize = numElements; + alpaka::syncBlockThreads(acc); + + // Stride = block size. + const uint32_t blockDimension(alpaka::getWorkDiv(acc)[0u]); + + // Get thread / CPU element indices in block. + const auto& [firstElementIdxNoStride, endElementIdxNoStride] = + cms::alpakatools::element_index_range_in_block(acc, firstPixel); + uint32_t firstElementIdx = firstElementIdxNoStride; + uint32_t endElementIdx = endElementIdxNoStride; + + // skip threads not associated to an existing pixel + for (uint32_t i = firstElementIdx; i < numElements; ++i) { + if (not cms::alpakatools::next_valid_element_index_strided( + i, firstElementIdx, endElementIdx, blockDimension, numElements)) + break; + auto id = digi_view[i].moduleId(); + if (id == ::pixelClustering::invalidModuleId) // skip invalid pixels + continue; + if (id != thisModuleId) { // find the first pixel in a different module + alpaka::atomicMin(acc, &msize, i, alpaka::hierarchy::Threads{}); + break; + } + } + //init hist (ymax=416 < 512 : 9bits) + constexpr uint32_t maxPixInModule = TrackerTraits::maxPixInModule; + constexpr auto nbins = TrackerTraits::clusterBinning; + constexpr auto nbits = TrackerTraits::clusterBits; + using Hist = cms::alpakatools::HistoContainer; + auto& hist = alpaka::declareSharedVar(acc); + auto& ws = alpaka::declareSharedVar(acc); + cms::alpakatools::for_each_element_in_block_strided( + acc, Hist::totbins(), [&](uint32_t j) { hist.off[j] = 0; }); + alpaka::syncBlockThreads(acc); + ALPAKA_ASSERT_OFFLOAD((msize == numElements) or + ((msize < numElements) and (digi_view[msize].moduleId() != thisModuleId))); + // limit to maxPixInModule (FIXME if recurrent (and not limited to simulation with low threshold) one will need to implement something cleverer) + if (0 == threadIdxLocal) { + if (msize - firstPixel > maxPixInModule) { + printf("too many pixels in module %d: %d > %d\n", thisModuleId, msize - firstPixel, maxPixInModule); + msize = maxPixInModule + firstPixel; + } + } + alpaka::syncBlockThreads(acc); + ALPAKA_ASSERT_OFFLOAD(msize - firstPixel <= maxPixInModule); + +#ifdef GPU_DEBUG + auto& totGood = alpaka::declareSharedVar(acc); + totGood = 0; + alpaka::syncBlockThreads(acc); +#endif + // remove duplicate pixels + if constexpr (not isPhase2 and false) { //FIXME remove THIS + if (msize > 1) { + cms::alpakatools::for_each_element_in_block_strided( + acc, PixelStatus::size, [&](uint32_t i) { status[i] = 0; }); + alpaka::syncBlockThreads(acc); + + cms::alpakatools::for_each_element_in_block_strided(acc, msize - 1, firstElementIdx, [&](uint32_t i) { + // skip invalid pixels + if (digi_view[i].moduleId() == ::pixelClustering::invalidModuleId) + return; + PixelStatus::promote(acc, status, digi_view[i].xx(), digi_view[i].yy()); + }); + alpaka::syncBlockThreads(acc); + cms::alpakatools::for_each_element_in_block_strided(acc, msize - 1, firstElementIdx, [&](uint32_t i) { + // skip invalid pixels + if (digi_view[i].moduleId() == ::pixelClustering::invalidModuleId) + return; + if (PixelStatus::isDuplicate(status, digi_view[i].xx(), digi_view[i].yy())) { + digi_view[i].moduleId() = ::pixelClustering::invalidModuleId; + digi_view[i].rawIdArr() = 0; + } + }); + alpaka::syncBlockThreads(acc); + } + } + // fill histo + cms::alpakatools::for_each_element_in_block_strided(acc, msize, firstPixel, [&](uint32_t i) { + if (digi_view[i].moduleId() != ::pixelClustering::invalidModuleId) { // skip invalid pixels + hist.countHist(acc, digi_view[i].yy()); +#ifdef GPU_DEBUG + alpaka::atomicAdd(acc, &totGood, 1u, alpaka::hierarchy::Blocks{}); +#endif + } + }); + alpaka::syncBlockThreads(acc); + cms::alpakatools::for_each_element_in_block(acc, 32u, [&](uint32_t i) { + ws[i] = 0; // used by prefix scan... + }); + alpaka::syncBlockThreads(acc); + hist.finalize(acc, ws); + alpaka::syncBlockThreads(acc); +#ifdef GPU_DEBUG + ALPAKA_ASSERT_OFFLOAD(hist.size() == totGood); + if (thisModuleId % 100 == 1) + if (threadIdxLocal == 0) + printf("histo size %d\n", hist.size()); +#endif + cms::alpakatools::for_each_element_in_block_strided(acc, msize, firstPixel, [&](uint32_t i) { + if (digi_view[i].moduleId() != ::pixelClustering::invalidModuleId) { // skip invalid pixels + hist.fillHist(acc, digi_view[i].yy(), i - firstPixel); + } + }); + // Assume that we can cover the whole module with up to 16 blockDimension-wide iterations + // This maxiter value was tuned for GPU, with 256 or 512 threads per block. + // Hence, also works for CPU case, with 256 or 512 elements per thread. + // Real constrainst is maxiter = hist.size() / blockDimension, + // with blockDimension = threadPerBlock * elementsPerThread. + // Hence, maxiter can be tuned accordingly to the workdiv. + constexpr unsigned int maxiter = 16; + ALPAKA_ASSERT_OFFLOAD((hist.size() / blockDimension) <= maxiter); +#ifndef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + constexpr uint32_t threadDimension = 1; +#else + // NB: can be tuned. + constexpr uint32_t threadDimension = 256; +#endif +#ifndef NDEBUG + [[maybe_unused]] const uint32_t runTimeThreadDimension( + alpaka::getWorkDiv(acc)[0u]); + ALPAKA_ASSERT_OFFLOAD(runTimeThreadDimension <= threadDimension); +#endif + + // nearest neighbour + // allocate space for duplicate pixels: a pixel can appear more than once with different charge in the same event + constexpr int maxNeighbours = 10; + uint16_t nn[maxiter][threadDimension][maxNeighbours]; + uint8_t nnn[maxiter][threadDimension]; // number of nn + for (uint32_t elementIdx = 0; elementIdx < threadDimension; ++elementIdx) { + for (uint32_t k = 0; k < maxiter; ++k) { + nnn[k][elementIdx] = 0; + } + } + + alpaka::syncBlockThreads(acc); // for hit filling! + +#ifdef GPU_DEBUG + // look for anomalous high occupancy + auto& n40 = alpaka::declareSharedVar(acc); + auto& n60 = alpaka::declareSharedVar(acc); + n40 = n60 = 0; + alpaka::syncBlockThreads(acc); + cms::alpakatools::for_each_element_in_block_strided(acc, Hist::nbins(), [&](uint32_t j) { + if (hist.size(j) > 60) + alpaka::atomicAdd(acc, &n60, 1u, alpaka::hierarchy::Blocks{}); + if (hist.size(j) > 40) + alpaka::atomicAdd(acc, &n40, 1u, alpaka::hierarchy::Blocks{}); + }); + alpaka::syncBlockThreads(acc); + if (0 == threadIdxLocal) { + if (n60 > 0) + printf("columns with more than 60 px %d in %d\n", n60, thisModuleId); + else if (n40 > 0) + printf("columns with more than 40 px %d in %d\n", n40, thisModuleId); + } + alpaka::syncBlockThreads(acc); +#endif + // fill NN + uint32_t k = 0u; + cms::alpakatools::for_each_element_in_block_strided(acc, hist.size(), [&](uint32_t j) { + const uint32_t jEquivalentClass = j % threadDimension; + k = j / blockDimension; + ALPAKA_ASSERT_OFFLOAD(k < maxiter); + auto p = hist.begin() + j; + auto i = *p + firstPixel; + ALPAKA_ASSERT_OFFLOAD(digi_view[i].moduleId() != ::pixelClustering::invalidModuleId); + ALPAKA_ASSERT_OFFLOAD(digi_view[i].moduleId() == thisModuleId); // same module + int be = Hist::bin(digi_view[i].yy() + 1); + auto e = hist.end(be); + ++p; + ALPAKA_ASSERT_OFFLOAD(0 == nnn[k][jEquivalentClass]); + for (; p < e; ++p) { + auto m = (*p) + firstPixel; + ALPAKA_ASSERT_OFFLOAD(m != i); + ALPAKA_ASSERT_OFFLOAD(int(digi_view[m].yy()) - int(digi_view[i].yy()) >= 0); + ALPAKA_ASSERT_OFFLOAD(int(digi_view[m].yy()) - int(digi_view[i].yy()) <= 1); + if (std::abs(int(digi_view[m].xx()) - int(digi_view[i].xx())) <= 1) { + auto l = nnn[k][jEquivalentClass]++; + ALPAKA_ASSERT_OFFLOAD(l < maxNeighbours); + nn[k][jEquivalentClass][l] = *p; + } + } + }); + // for each pixel, look at all the pixels until the end of the module; + // when two valid pixels within +/- 1 in x or y are found, set their id to the minimum; + // after the loop, all the pixel in each cluster should have the id equeal to the lowest + // pixel in the cluster ( clus[i] == i ). + bool more = true; + int nloops = 0; + while (alpaka::syncBlockThreadsPredicate(acc, more)) { + if (1 == nloops % 2) { + cms::alpakatools::for_each_element_in_block_strided(acc, hist.size(), [&](uint32_t j) { + auto p = hist.begin() + j; + auto i = *p + firstPixel; + auto m = digi_view[i].clus(); + while (m != digi_view[m].clus()) + m = digi_view[m].clus(); + digi_view[i].clus() = m; + }); + } else { + more = false; + uint32_t k = 0u; + cms::alpakatools::for_each_element_in_block_strided(acc, hist.size(), [&](uint32_t j) { + k = j / blockDimension; + const uint32_t jEquivalentClass = j % threadDimension; + auto p = hist.begin() + j; + auto i = *p + firstPixel; + for (int kk = 0; kk < nnn[k][jEquivalentClass]; ++kk) { + auto l = nn[k][jEquivalentClass][kk]; + auto m = l + firstPixel; + ALPAKA_ASSERT_OFFLOAD(m != i); + auto old = + alpaka::atomicMin(acc, &digi_view[m].clus(), digi_view[i].clus(), alpaka::hierarchy::Blocks{}); + if (old != digi_view[i].clus()) { + // end the loop only if no changes were applied + more = true; + } + alpaka::atomicMin(acc, &digi_view[i].clus(), old, alpaka::hierarchy::Blocks{}); + } // nnloop + }); // pixel loop + } + ++nloops; + } // end while +#ifdef GPU_DEBUG + { + auto& n0 = alpaka::declareSharedVar(acc); + if (threadIdxLocal == 0) + n0 = nloops; + alpaka::syncBlockThreads(acc); +#ifndef NDEBUG + [[maybe_unused]] auto ok = n0 == nloops; + ALPAKA_ASSERT_OFFLOAD(alpaka::syncBlockThreadsPredicate(acc, ok)); +#endif + if (thisModuleId % 100 == 1) + if (threadIdxLocal == 0) + printf("# loops %d\n", nloops); + } +#endif + auto& foundClusters = alpaka::declareSharedVar(acc); + foundClusters = 0; + alpaka::syncBlockThreads(acc); + + // find the number of different clusters, identified by a pixels with clus[i] == i; + // mark these pixels with a negative id. + cms::alpakatools::for_each_element_in_block_strided(acc, msize, firstPixel, [&](uint32_t i) { + if (digi_view[i].moduleId() != ::pixelClustering::invalidModuleId) { // skip invalid pixels + if (digi_view[i].clus() == static_cast(i)) { + auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{}); + digi_view[i].clus() = -(old + 1); + } + } + }); + alpaka::syncBlockThreads(acc); + + // propagate the negative id to all the pixels in the cluster. + cms::alpakatools::for_each_element_in_block_strided(acc, msize, firstPixel, [&](uint32_t i) { + if (digi_view[i].moduleId() != ::pixelClustering::invalidModuleId) { // skip invalid pixels + if (digi_view[i].clus() >= 0) { + // mark each pixel in a cluster with the same id as the first one + digi_view[i].clus() = digi_view[digi_view[i].clus()].clus(); + } + } + }); + alpaka::syncBlockThreads(acc); + + // adjust the cluster id to be a positive value starting from 0 + cms::alpakatools::for_each_element_in_block_strided(acc, msize, firstPixel, [&](uint32_t i) { + if (digi_view[i].moduleId() == ::pixelClustering::invalidModuleId) { // skip invalid pixels + digi_view[i].clus() = ::pixelClustering::invalidClusterId; + } else { + digi_view[i].clus() = -digi_view[i].clus() - 1; + } + }); + alpaka::syncBlockThreads(acc); + if (threadIdxLocal == 0) { + clus_view[thisModuleId].clusInModule() = foundClusters; + clus_view[module].moduleId() = thisModuleId; +#ifdef GPU_DEBUG + if (foundClusters > gMaxHit) { + gMaxHit = foundClusters; + if (foundClusters > 8) + printf("max hit %d in %d\n", foundClusters, thisModuleId); + } + // if (thisModuleId % 100 == 1) + printf("%d clusters in module %d\n", foundClusters, thisModuleId); +#endif + } + } // module loop + } + }; + } // namespace pixelClustering +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif // plugin_SiPixelClusterizer_alpaka_PixelClustering.h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelPhase2DigiToCluster.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelPhase2DigiToCluster.cc new file mode 100644 index 0000000000000..54795b3f5a26b --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelPhase2DigiToCluster.cc @@ -0,0 +1,166 @@ +// C++ includes +#include +#include +#include +#include +#include + +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersCollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisCollection.h" + +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "DataFormats/Common/interface/DetSetVector.h" +#include "DataFormats/SiPixelDigi/interface/PixelDigi.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "Geometry/CommonDetUnit/interface/GeomDet.h" + +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "RecoTracker/Record/interface/CkfComponentsRecord.h" + +#include "FWCore/Utilities/interface/InputTag.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/SynchronizingEDProducer.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDPutToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESGetToken.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h" + +#include "RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterThresholds.h" +#include "SiPixelRawToClusterKernel.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class SiPixelPhase2DigiToCluster : public stream::SynchronizingEDProducer<> { + public: + explicit SiPixelPhase2DigiToCluster(const edm::ParameterSet& iConfig); + ~SiPixelPhase2DigiToCluster() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + using Algo = pixelDetails::SiPixelRawToClusterKernel; + + private: + void acquire(device::Event const& iEvent, device::EventSetup const& iSetup) override; + void produce(device::Event& iEvent, device::EventSetup const& iSetup) override; + + const edm::ESGetToken geomToken_; + const edm::EDGetTokenT> pixelDigiToken_; + + device::EDPutToken digiPutToken_; + device::EDPutToken digiErrorPutToken_; + device::EDPutToken clusterPutToken_; + + Algo Algo_; + + const bool includeErrors_; + const SiPixelClusterThresholds clusterThresholds_; + uint32_t nDigis_ = 0; + + SiPixelDigisCollection digis_d; + }; + + SiPixelPhase2DigiToCluster::SiPixelPhase2DigiToCluster(const edm::ParameterSet& iConfig) + : geomToken_(esConsumes()), + pixelDigiToken_(consumes>(iConfig.getParameter("InputDigis"))), + digiPutToken_(produces()), + clusterPutToken_(produces()), + includeErrors_(iConfig.getParameter("IncludeErrors")), + clusterThresholds_{iConfig.getParameter("clusterThreshold_layer1"), + iConfig.getParameter("clusterThreshold_otherLayers"), + static_cast(iConfig.getParameter("ElectronPerADCGain")), + static_cast(iConfig.getParameter("Phase2ReadoutMode")), + static_cast(iConfig.getParameter("Phase2DigiBaseline")), + static_cast(iConfig.getParameter("Phase2KinkADC"))} { + if (includeErrors_) { + digiErrorPutToken_ = produces(); + } + } + + void SiPixelPhase2DigiToCluster::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("IncludeErrors", true); + desc.add("clusterThreshold_layer1", pixelClustering::clusterThresholdPhase2LayerOne); //FIXME (fix the CUDA) + desc.add("clusterThreshold_otherLayers", pixelClustering::clusterThresholdPhase2OtherLayers); + desc.add("ElectronPerADCGain", 1500.); + desc.add("Phase2ReadoutMode", 3); + desc.add("Phase2DigiBaseline", 1000); + desc.add("Phase2KinkADC", 8); + desc.add("InputDigis", edm::InputTag("simSiPixelDigis:Pixel")); + descriptions.addWithDefaultLabel(desc); + } + + void SiPixelPhase2DigiToCluster::acquire(device::Event const& iEvent, device::EventSetup const& iSetup) { + auto const& input = iEvent.get(pixelDigiToken_); + + const TrackerGeometry* geom_ = &iSetup.getData(geomToken_); + + uint32_t nDigis = 0; + for (auto DSViter = input.begin(); DSViter != input.end(); DSViter++) { + nDigis = nDigis + DSViter->size(); + } + SiPixelDigisHost digis_h(nDigis, iEvent.queue()); + nDigis_ = nDigis; + + if (nDigis_ == 0) + return; + + nDigis = 0; + for (auto DSViter = input.begin(); DSViter != input.end(); DSViter++) { + unsigned int detid = DSViter->detId(); + DetId detIdObject(detid); + const GeomDetUnit* genericDet = geom_->idToDetUnit(detIdObject); + auto const gind = genericDet->index(); + for (auto const& px : *DSViter) { + digis_h.view()[nDigis].moduleId() = uint16_t(gind); + + digis_h.view()[nDigis].xx() = uint16_t(px.row()); + digis_h.view()[nDigis].yy() = uint16_t(px.column()); + digis_h.view()[nDigis].adc() = uint16_t(px.adc()); + + digis_h.view()[nDigis].pdigi() = uint32_t(px.packedData()); + + digis_h.view()[nDigis].rawIdArr() = uint32_t(detid); + + nDigis++; + } + } + + digis_d = SiPixelDigisCollection(nDigis, iEvent.queue()); + alpaka::memcpy(iEvent.queue(), digis_d.buffer(), digis_h.buffer()); + + Algo_.makePhase2ClustersAsync(clusterThresholds_, digis_d.view(), nDigis, iEvent.queue()); + } + + void SiPixelPhase2DigiToCluster::produce(device::Event& iEvent, device::EventSetup const& iSetup) { + if (nDigis_ == 0) { + SiPixelClustersCollection clusters_d{pixelTopology::Phase1::numberOfModules, iEvent.queue()}; + iEvent.emplace(digiPutToken_, std::move(digis_d)); + iEvent.emplace(clusterPutToken_, std::move(clusters_d)); + if (includeErrors_) { + iEvent.emplace(digiErrorPutToken_, SiPixelDigiErrorsCollection()); + } + return; + } + + digis_d.setNModulesDigis(Algo_.nModules(), nDigis_); + + iEvent.emplace(digiPutToken_, std::move(digis_d)); + iEvent.emplace(clusterPutToken_, Algo_.getClusters()); + if (includeErrors_) { + iEvent.emplace(digiErrorPutToken_, Algo_.getErrors()); + } + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +// define as framework plugin +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(SiPixelPhase2DigiToCluster); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc new file mode 100644 index 0000000000000..9aa799c87f2ed --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc @@ -0,0 +1,299 @@ +#include +#include +#include +#include +#include + +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersCollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisCollection.h" + +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingUtilities.h" +#include "CalibTracker/Records/interface/SiPixelMappingSoARecord.h" + +#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h" +#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" + +#include "DataFormats/FEDRawData/interface/FEDNumbering.h" +#include "DataFormats/FEDRawData/interface/FEDRawData.h" +#include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h" +#include "EventFilter/SiPixelRawToDigi/interface/PixelDataFormatter.h" +#include "EventFilter/SiPixelRawToDigi/interface/PixelUnpackingRegions.h" +#include "FWCore/Framework/interface/ESWatcher.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "RecoTracker/Record/interface/CkfComponentsRecord.h" + +#include "FWCore/Utilities/interface/InputTag.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/SynchronizingEDProducer.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDPutToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESGetToken.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h" + +#include "RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterThresholds.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" +#include "SiPixelRawToClusterKernel.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + template + class SiPixelRawToCluster : public stream::SynchronizingEDProducer<> { + public: + explicit SiPixelRawToCluster(const edm::ParameterSet& iConfig); + ~SiPixelRawToCluster() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + using Algo = pixelDetails::SiPixelRawToClusterKernel; + + private: + void acquire(device::Event const& iEvent, device::EventSetup const& iSetup) override; + void produce(device::Event& iEvent, device::EventSetup const& iSetup) override; + + edm::EDGetTokenT rawGetToken_; + edm::EDPutTokenT fmtErrorToken_; + device::EDPutToken digiPutToken_; + device::EDPutToken digiErrorPutToken_; + device::EDPutToken clusterPutToken_; + + edm::ESWatcher recordWatcher_; + const device::ESGetToken mapToken_; + const device::ESGetToken gainsToken_; + const edm::ESGetToken cablingMapToken_; + + std::unique_ptr cabling_; + std::vector fedIds_; + const SiPixelFedCablingMap* cablingMap_ = nullptr; + std::unique_ptr regions_; + + Algo Algo_; + PixelDataFormatter::Errors errors_; + + const bool includeErrors_; + const bool useQuality_; + uint32_t nDigis_; + const SiPixelClusterThresholds clusterThresholds_; + }; + + template + SiPixelRawToCluster::SiPixelRawToCluster(const edm::ParameterSet& iConfig) + : rawGetToken_(consumes(iConfig.getParameter("InputLabel"))), + digiPutToken_(produces()), + clusterPutToken_(produces()), + mapToken_(esConsumes()), + gainsToken_(esConsumes()), + cablingMapToken_(esConsumes( + edm::ESInputTag("", iConfig.getParameter("CablingMapLabel")))), + includeErrors_(iConfig.getParameter("IncludeErrors")), + useQuality_(iConfig.getParameter("UseQualityInfo")), + clusterThresholds_{iConfig.getParameter("clusterThreshold_layer1"), + iConfig.getParameter("clusterThreshold_otherLayers"), + static_cast(iConfig.getParameter("VCaltoElectronGain")), + static_cast(iConfig.getParameter("VCaltoElectronGain_L1")), + static_cast(iConfig.getParameter("VCaltoElectronOffset")), + static_cast(iConfig.getParameter("VCaltoElectronOffset_L1"))} { + if (includeErrors_) { + digiErrorPutToken_ = produces(); + fmtErrorToken_ = produces(); + } + + // regions + if (!iConfig.getParameter("Regions").getParameterNames().empty()) { + regions_ = std::make_unique(iConfig, consumesCollector()); + } + } + + template + void SiPixelRawToCluster::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("IncludeErrors", true); + desc.add("UseQualityInfo", false); + // Note: this parameter is obsolete: it is ignored and will have no effect. + // It is kept to avoid breaking older configurations, and will not be printed in the generated cfi.py file. + desc.addOptionalNode(edm::ParameterDescription("MaxFEDWords", 0, true), false) + ->setComment("This parameter is obsolete and will be ignored."); + desc.add("clusterThreshold_layer1", pixelClustering::clusterThresholdLayerOne); + desc.add("clusterThreshold_otherLayers", pixelClustering::clusterThresholdOtherLayers); + desc.add("VCaltoElectronGain", 47.f); + desc.add("VCaltoElectronGain_L1", 50.f); + desc.add("VCaltoElectronOffset", -60.f); + desc.add("VCaltoElectronOffset_L1", -670.f); + + desc.add("InputLabel", edm::InputTag("rawDataCollector")); + { + edm::ParameterSetDescription psd0; + psd0.addOptional>("inputs"); + psd0.addOptional>("deltaPhi"); + psd0.addOptional>("maxZ"); + psd0.addOptional("beamSpot"); + desc.add("Regions", psd0) + ->setComment("## Empty Regions PSet means complete unpacking"); + } + desc.add("CablingMapLabel", "")->setComment("CablingMap label"); //Tav + descriptions.addWithDefaultLabel(desc); + } + + template + void SiPixelRawToCluster::acquire(device::Event const& iEvent, device::EventSetup const& iSetup) { + [[maybe_unused]] auto const& hMap = iSetup.getData(mapToken_); + auto const& dGains = iSetup.getData(gainsToken_); + auto Gains = SiPixelGainCalibrationForHLTDevice(1, iEvent.queue()); + auto modulesToUnpackRegional = + cms::alpakatools::make_device_buffer(iEvent.queue(), ::pixelgpudetails::MAX_SIZE); + const unsigned char* modulesToUnpack; + // initialize cabling map or update if necessary + if (recordWatcher_.check(iSetup)) { + // cabling map, which maps online address (fed->link->ROC->local pixel) to offline (DetId->global pixel) + cablingMap_ = &iSetup.getData(cablingMapToken_); + fedIds_ = cablingMap_->fedIds(); + cabling_ = cablingMap_->cablingTree(); + LogDebug("map version:") << cablingMap_->version(); + } + if (regions_) { + regions_->run(iEvent, iSetup); + LogDebug("SiPixelRawToCluster") << "region2unpack #feds: " << regions_->nFEDs(); + LogDebug("SiPixelRawToCluster") << "region2unpack #modules (BPIX,EPIX,total): " << regions_->nBarrelModules() + << " " << regions_->nForwardModules() << " " << regions_->nModules(); + + modulesToUnpackRegional = SiPixelMappingUtilities::getModToUnpRegionalAsync( + *(regions_->modulesToUnpack()), cabling_.get(), fedIds_, iEvent.queue()); + modulesToUnpack = modulesToUnpackRegional.data(); + } else { + modulesToUnpack = hMap->modToUnpDefault(); + } + + const auto& buffers = iEvent.get(rawGetToken_); + + errors_.clear(); + + // GPU specific: Data extraction for RawToDigi GPU + unsigned int wordCounter = 0; + unsigned int fedCounter = 0; + bool errorsInEvent = false; + std::vector index(fedIds_.size(), 0); + std::vector start(fedIds_.size(), nullptr); + std::vector words(fedIds_.size(), 0); + // In CPU algorithm this loop is part of PixelDataFormatter::interpretRawData() + ErrorChecker errorcheck; + for (uint32_t i = 0; i < fedIds_.size(); ++i) { + const int fedId = fedIds_[i]; + if (regions_ && !regions_->mayUnpackFED(fedId)) + continue; + + // for GPU + // first 150 index stores the fedId and next 150 will store the + // start index of word in that fed + assert(fedId >= FEDNumbering::MINSiPixeluTCAFEDID); + fedCounter++; + + // get event data for this fed + const FEDRawData& rawData = buffers.FEDData(fedId); + + // GPU specific + int nWords = rawData.size() / sizeof(cms_uint64_t); + if (nWords == 0) { + continue; + } + // check CRC bit + const cms_uint64_t* trailer = reinterpret_cast(rawData.data()) + (nWords - 1); + if (not errorcheck.checkCRC(errorsInEvent, fedId, trailer, errors_)) { + continue; + } + // check headers + const cms_uint64_t* header = reinterpret_cast(rawData.data()); + header--; + bool moreHeaders = true; + while (moreHeaders) { + header++; + bool headerStatus = errorcheck.checkHeader(errorsInEvent, fedId, header, errors_); + moreHeaders = headerStatus; + } + + // check trailers + bool moreTrailers = true; + trailer++; + while (moreTrailers) { + trailer--; + bool trailerStatus = errorcheck.checkTrailer(errorsInEvent, fedId, nWords, trailer, errors_); + moreTrailers = trailerStatus; + } + + const cms_uint32_t* bw = (const cms_uint32_t*)(header + 1); + const cms_uint32_t* ew = (const cms_uint32_t*)(trailer); + + assert(0 == (ew - bw) % 2); + index[i] = wordCounter; + start[i] = bw; + words[i] = (ew - bw); + wordCounter += (ew - bw); + + } // end of for loop + nDigis_ = wordCounter; + if (nDigis_ == 0) + return; + + // copy the FED data to a single cpu buffer + typename Algo::WordFedAppender wordFedAppender(nDigis_); + for (uint32_t i = 0; i < fedIds_.size(); ++i) { + wordFedAppender.initializeWordFed(fedIds_[i], index[i], start[i], words[i]); + } + Algo_.makePhase1ClustersAsync(clusterThresholds_, + hMap.const_view(), + modulesToUnpack, + dGains.const_view(), + wordFedAppender, + wordCounter, + fedCounter, + useQuality_, + includeErrors_, + edm::MessageDrop::instance()->debugEnabled, + iEvent.queue()); + } + + template + void SiPixelRawToCluster::produce(device::Event& iEvent, device::EventSetup const& iSetup) { + if (nDigis_ == 0) { + // Cannot use the default constructor here, as it would not allocate memory. + // In the case of no digis, clusters_d are not being instantiated, but are + // still used downstream to initialize TrackingRecHitSoADevice. If there + // are no valid pointers to clusters' Collection columns, instantiation + // of TrackingRecHits fail. Example: workflow 11604.0 + + iEvent.emplace(digiPutToken_, nDigis_, iEvent.queue()); + iEvent.emplace(clusterPutToken_, pixelTopology::Phase1::numberOfModules, iEvent.queue()); + if (includeErrors_) { + iEvent.emplace(digiErrorPutToken_); + iEvent.emplace(fmtErrorToken_); + } + return; + } + + iEvent.emplace(digiPutToken_, Algo_.getDigis()); + iEvent.emplace(clusterPutToken_, Algo_.getClusters()); + if (includeErrors_) { + iEvent.emplace(digiErrorPutToken_, Algo_.getErrors()); + iEvent.emplace(fmtErrorToken_, std::move(errors_)); + } + } + + using SiPixelRawToClusterPhase1 = SiPixelRawToCluster; + using SiPixelRawToClusterHIonPhase1 = SiPixelRawToCluster; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +// define as framework plugin +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(SiPixelRawToClusterPhase1); +DEFINE_FWK_ALPAKA_MODULE(SiPixelRawToClusterHIonPhase1); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc new file mode 100644 index 0000000000000..ac56527b7bde0 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc @@ -0,0 +1,806 @@ +// C++ includes +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// CMSSW includes +#include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h" +#include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h" +#include "DataFormats/SiPixelDigi/interface/SiPixelDigiConstants.h" + +// local includes +#include "CalibPixel.h" +#include "ClusterChargeCut.h" +#include "PixelClustering.h" +#include "SiPixelRawToClusterKernel.h" + +// #define GPU_DEBUG + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace pixelDetails { + + //////////////////// + + ALPAKA_FN_ACC uint32_t getLink(uint32_t ww) { + return ((ww >> ::sipixelconstants::LINK_shift) & ::sipixelconstants::LINK_mask); + } + + ALPAKA_FN_ACC uint32_t getRoc(uint32_t ww) { + return ((ww >> ::sipixelconstants::ROC_shift) & ::sipixelconstants::ROC_mask); + } + + ALPAKA_FN_ACC uint32_t getADC(uint32_t ww) { + return ((ww >> ::sipixelconstants::ADC_shift) & ::sipixelconstants::ADC_mask); + } + + ALPAKA_FN_ACC bool isBarrel(uint32_t rawId) { return (1 == ((rawId >> 25) & 0x7)); } + + ALPAKA_FN_ACC ::pixelDetails::DetIdGPU getRawId(const SiPixelMappingSoAConstView &cablingMap, + uint8_t fed, + uint32_t link, + uint32_t roc) { + using namespace ::pixelDetails; + uint32_t index = fed * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc; + ::pixelDetails::DetIdGPU detId = { + cablingMap.rawId()[index], cablingMap.rocInDet()[index], cablingMap.moduleId()[index]}; + return detId; + } + + //reference http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/dd/d31/FrameConversion_8cc_source.html + //http://cmslxr.fnal.gov/source/CondFormats/SiPixelObjects/src/PixelROC.cc?v=CMSSW_9_2_0#0071 + // Convert local pixel to pixelDetails::global pixel + ALPAKA_FN_ACC ::pixelDetails::Pixel frameConversion( + bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, ::pixelDetails::Pixel local) { + int slopeRow = 0, slopeCol = 0; + int rowOffset = 0, colOffset = 0; + + if (bpix) { + if (side == -1 && layer != 1) { // -Z side: 4 non-flipped modules oriented like 'dddd', except Layer 1 + if (rocIdInDetUnit < 8) { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1; + } else { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1; + colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc; + } // if roc + } else { // +Z side: 4 non-flipped modules oriented like 'pppp', but all 8 in layer1 + if (rocIdInDetUnit < 8) { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1; + colOffset = rocIdInDetUnit * ::pixelDetails::numColsInRoc; + } else { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (16 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1; + } + } + + } else { // fpix + if (side == -1) { // pannel 1 + if (rocIdInDetUnit < 8) { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1; + } else { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1; + colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc; + } + } else { // pannel 2 + if (rocIdInDetUnit < 8) { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1; + } else { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1; + colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc; + } + + } // side + } + + uint32_t gRow = rowOffset + slopeRow * local.row; + uint32_t gCol = colOffset + slopeCol * local.col; + ::pixelDetails::Pixel global = {gRow, gCol}; + return global; + } + + ALPAKA_FN_ACC uint8_t conversionError(uint8_t fedId, uint8_t status, bool debug = false) { + uint8_t errorType = 0; + + switch (status) { + case (1): { + if (debug) + printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId); + errorType = 35; + break; + } + case (2): { + if (debug) + printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId); + errorType = 36; + break; + } + case (3): { + if (debug) + printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId); + errorType = 37; + break; + } + case (4): { + if (debug) + printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId); + errorType = 38; + break; + } + default: + if (debug) + printf("Cabling check returned unexpected result, status = %i\n", status); + }; + + return errorType; + } + + ALPAKA_FN_ACC bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) { + uint32_t numRowsInRoc = 80; + uint32_t numColsInRoc = 52; + + /// row and collumn in ROC representation + return ((rocRow < numRowsInRoc) & (rocCol < numColsInRoc)); + } + + ALPAKA_FN_ACC bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); } + + ALPAKA_FN_ACC uint8_t checkROC(uint32_t errorWord, + uint8_t fedId, + uint32_t link, + const SiPixelMappingSoAConstView &cablingMap, + bool debug = false) { + uint8_t errorType = (errorWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ERROR_mask; + if (errorType < 25) + return 0; + bool errorFound = false; + + switch (errorType) { + case (25): { + errorFound = true; + uint32_t index = + fedId * ::pixelDetails::MAX_LINK * ::pixelDetails::MAX_ROC + (link - 1) * ::pixelDetails::MAX_ROC + 1; + if (index > 1 && index <= cablingMap.size()) { + if (!(link == cablingMap.link()[index] && 1 == cablingMap.roc()[index])) + errorFound = false; + } + if (debug and errorFound) + printf("Invalid ROC = 25 found (errorType = 25)\n"); + break; + } + case (26): { + if (debug) + printf("Gap word found (errorType = 26)\n"); + errorFound = true; + break; + } + case (27): { + if (debug) + printf("Dummy word found (errorType = 27)\n"); + errorFound = true; + break; + } + case (28): { + if (debug) + printf("Error fifo nearly full (errorType = 28)\n"); + errorFound = true; + break; + } + case (29): { + if (debug) + printf("Timeout on a channel (errorType = 29)\n"); + if ((errorWord >> ::pixelDetails::OMIT_ERR_shift) & ::pixelDetails::OMIT_ERR_mask) { + if (debug) + printf("...first errorType=29 error, this gets masked out\n"); + } + errorFound = true; + break; + } + case (30): { + if (debug) + printf("TBM error trailer (errorType = 30)\n"); + int StateMatch_bits = 4; + int StateMatch_shift = 8; + uint32_t StateMatch_mask = ~(~uint32_t(0) << StateMatch_bits); + int StateMatch = (errorWord >> StateMatch_shift) & StateMatch_mask; + if (StateMatch != 1 && StateMatch != 8) { + if (debug) + printf("FED error 30 with unexpected State Bits (errorType = 30)\n"); + } + if (StateMatch == 1) + errorType = 40; // 1=Overflow -> 40, 8=number of ROCs -> 30 + errorFound = true; + break; + } + case (31): { + if (debug) + printf("Event number error (errorType = 31)\n"); + errorFound = true; + break; + } + default: + errorFound = false; + }; + + return errorFound ? errorType : 0; + } + + ALPAKA_FN_ACC uint32_t getErrRawID(uint8_t fedId, + uint32_t errWord, + uint32_t errorType, + const SiPixelMappingSoAConstView &cablingMap, + bool debug = false) { + uint32_t rID = 0xffffffff; + + switch (errorType) { + case 25: + case 30: + case 31: + case 36: + case 40: { + uint32_t roc = 1; + uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask; + uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId; + if (rID_temp != 9999) + rID = rID_temp; + break; + } + case 29: { + int chanNmbr = 0; + const int DB0_shift = 0; + const int DB1_shift = DB0_shift + 1; + const int DB2_shift = DB1_shift + 1; + const int DB3_shift = DB2_shift + 1; + const int DB4_shift = DB3_shift + 1; + const uint32_t DataBit_mask = ~(~uint32_t(0) << 1); + + int CH1 = (errWord >> DB0_shift) & DataBit_mask; + int CH2 = (errWord >> DB1_shift) & DataBit_mask; + int CH3 = (errWord >> DB2_shift) & DataBit_mask; + int CH4 = (errWord >> DB3_shift) & DataBit_mask; + int CH5 = (errWord >> DB4_shift) & DataBit_mask; + int BLOCK_bits = 3; + int BLOCK_shift = 8; + uint32_t BLOCK_mask = ~(~uint32_t(0) << BLOCK_bits); + int BLOCK = (errWord >> BLOCK_shift) & BLOCK_mask; + int localCH = 1 * CH1 + 2 * CH2 + 3 * CH3 + 4 * CH4 + 5 * CH5; + if (BLOCK % 2 == 0) + chanNmbr = (BLOCK / 2) * 9 + localCH; + else + chanNmbr = ((BLOCK - 1) / 2) * 9 + 4 + localCH; + if ((chanNmbr < 1) || (chanNmbr > 36)) + break; // signifies unexpected result + + uint32_t roc = 1; + uint32_t link = chanNmbr; + uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId; + if (rID_temp != 9999) + rID = rID_temp; + break; + } + case 37: + case 38: { + uint32_t roc = (errWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ROC_mask; + uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask; + uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId; + if (rID_temp != 9999) + rID = rID_temp; + break; + } + default: + break; + }; + + return rID; + } + + // Kernel to perform Raw to Digi conversion + struct RawToDigi_kernel { + template + ALPAKA_FN_ACC void operator()(const TAcc &acc, + const SiPixelMappingSoAConstView &cablingMap, + const unsigned char *modToUnp, + const uint32_t wordCounter, + const uint32_t *word, + const uint8_t *fedIds, + SiPixelDigisSoAv2View digisView, + SiPixelDigiErrorsSoAView err, + bool useQualityInfo, + bool includeErrors, + bool debug) const { + cms::alpakatools::for_each_element_in_grid_strided(acc, wordCounter, [&](uint32_t iloop) { + auto gIndex = iloop; + auto dvgi = digisView[gIndex]; + dvgi.xx() = 0; + dvgi.yy() = 0; + dvgi.adc() = 0; + bool skipROC = false; + + if (gIndex == 0) + err[gIndex].size() = 0; + + err[gIndex].pixelErrors() = SiPixelErrorCompact{0, 0, 0, 0}; + + uint8_t fedId = fedIds[gIndex / 2]; // +1200; + + // initialize (too many coninue below) + dvgi.pdigi() = 0; + dvgi.rawIdArr() = 0; + constexpr uint16_t invalidModuleId = std::numeric_limits::max() - 1; + dvgi.moduleId() = invalidModuleId; + + uint32_t ww = word[gIndex]; // Array containing 32 bit raw data + if (ww == 0) { + // 0 is an indicator of a noise/dead channel, skip these pixels during clusterization + return; + } + + uint32_t link = getLink(ww); // Extract link + uint32_t roc = getRoc(ww); // Extract Roc in link + ::pixelDetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc); + + uint8_t errorType = checkROC(ww, fedId, link, cablingMap, debug); + skipROC = (roc < ::pixelDetails::maxROCIndex) ? false : (errorType != 0); + if (includeErrors and skipROC) { + uint32_t rID = getErrRawID(fedId, ww, errorType, cablingMap, debug); + err[gIndex].pixelErrors() = SiPixelErrorCompact{rID, ww, errorType, fedId}; + alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Threads{}); + return; + } + + uint32_t rawId = detId.RawId; + uint32_t rocIdInDetUnit = detId.rocInDet; + bool barrel = isBarrel(rawId); + + uint32_t index = + fedId * ::pixelDetails::MAX_LINK * ::pixelDetails::MAX_ROC + (link - 1) * ::pixelDetails::MAX_ROC + roc; + if (useQualityInfo) { + skipROC = cablingMap.badRocs()[index]; + if (skipROC) + return; + } + skipROC = modToUnp[index]; + if (skipROC) + return; + + uint32_t layer = 0; //, ladder =0; + int side = 0, panel = 0, module = 0; //disk = 0, blade = 0 + + if (barrel) { + layer = (rawId >> ::pixelDetails::layerStartBit) & ::pixelDetails::layerMask; + module = (rawId >> ::pixelDetails::moduleStartBit) & ::pixelDetails::moduleMask; + side = (module < 5) ? -1 : 1; + } else { + // endcap ids + layer = 0; + panel = (rawId >> ::pixelDetails::panelStartBit) & ::pixelDetails::panelMask; + //disk = (rawId >> diskStartBit_) & diskMask_; + side = (panel == 1) ? -1 : 1; + //blade = (rawId >> bladeStartBit_) & bladeMask_; + } + + // ***special case of layer to 1 be handled here + ::pixelDetails::Pixel localPix; + if (layer == 1) { + uint32_t col = (ww >> ::pixelDetails::COL_shift) & ::pixelDetails::COL_mask; + uint32_t row = (ww >> ::pixelDetails::ROW_shift) & ::pixelDetails::ROW_mask; + localPix.row = row; + localPix.col = col; + if (includeErrors) { + if (not rocRowColIsValid(row, col)) { + uint8_t error = conversionError(fedId, 3, debug); //use the device function and fill the arrays + err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId}; + alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Threads{}); + if (debug) + printf("BPIX1 Error status: %i\n", error); + return; + } + } + } else { + // ***conversion rules for dcol and pxid + uint32_t dcol = (ww >> ::pixelDetails::DCOL_shift) & ::pixelDetails::DCOL_mask; + uint32_t pxid = (ww >> ::pixelDetails::PXID_shift) & ::pixelDetails::PXID_mask; + uint32_t row = ::pixelDetails::numRowsInRoc - pxid / 2; + uint32_t col = dcol * 2 + pxid % 2; + localPix.row = row; + localPix.col = col; + if (includeErrors and not dcolIsValid(dcol, pxid)) { + uint8_t error = conversionError(fedId, 3, debug); + err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId}; + alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Threads{}); + if (debug) + printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc); + return; + } + } + + ::pixelDetails::Pixel globalPix = frameConversion(barrel, side, layer, rocIdInDetUnit, localPix); + dvgi.xx() = globalPix.row; // origin shifting by 1 0-159 + dvgi.yy() = globalPix.col; // origin shifting by 1 0-415 + dvgi.adc() = getADC(ww); + dvgi.pdigi() = ::pixelDetails::pack(globalPix.row, globalPix.col, dvgi.adc()); + dvgi.moduleId() = detId.moduleId; + dvgi.rawIdArr() = rawId; + }); // end of stride on grid + + } // end of Raw to Digi kernel operator() + }; // end of Raw to Digi struct + + template + struct fillHitsModuleStart { + template + ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const { + ALPAKA_ASSERT_OFFLOAD(TrackerTraits::numberOfModules < 2048); // easy to extend at least till 32*1024 + + constexpr int nMaxModules = TrackerTraits::numberOfModules; + constexpr uint32_t maxHitsInModule = TrackerTraits::maxHitsInModule; + +#ifndef NDEBUG + [[maybe_unused]] const uint32_t blockIdxLocal(alpaka::getIdx(acc)[0u]); + ALPAKA_ASSERT_OFFLOAD(0 == blockIdxLocal); + [[maybe_unused]] const uint32_t gridDimension(alpaka::getWorkDiv(acc)[0u]); + ALPAKA_ASSERT_OFFLOAD(1 == gridDimension); +#endif + + // limit to maxHitsInModule; + cms::alpakatools::for_each_element_in_block_strided(acc, nMaxModules, [&](uint32_t i) { + clus_view[i + 1].clusModuleStart() = std::min(maxHitsInModule, clus_view[i].clusInModule()); + }); + + constexpr bool isPhase2 = std::is_base_of::value; + constexpr auto leftModules = isPhase2 ? 1024 : nMaxModules - 1024; + + auto &&ws = alpaka::declareSharedVar(acc); + + cms::alpakatools::blockPrefixScan( + acc, clus_view.clusModuleStart() + 1, clus_view.clusModuleStart() + 1, 1024, ws); + + cms::alpakatools::blockPrefixScan( + acc, clus_view.clusModuleStart() + 1024 + 1, clus_view.clusModuleStart() + 1024 + 1, leftModules, ws); + + if constexpr (isPhase2) { + cms::alpakatools::blockPrefixScan( + acc, clus_view.clusModuleStart() + 2048 + 1, clus_view.clusModuleStart() + 2048 + 1, 1024, ws); + cms::alpakatools::blockPrefixScan( + acc, clus_view.clusModuleStart() + 3072 + 1, clus_view.clusModuleStart() + 3072 + 1, nMaxModules - 3072, ws); + } + + constexpr auto lastModule = isPhase2 ? 2049u : nMaxModules + 1; + cms::alpakatools::for_each_element_in_block_strided(acc, lastModule, 1025u, [&](uint32_t i) { + clus_view[i].clusModuleStart() += clus_view[1024].clusModuleStart(); + }); + alpaka::syncBlockThreads(acc); + + if constexpr (isPhase2) { + cms::alpakatools::for_each_element_in_block_strided(acc, 3073u, 2049u, [&](uint32_t i) { + clus_view[i].clusModuleStart() += clus_view[2048].clusModuleStart(); + }); + alpaka::syncBlockThreads(acc); + + cms::alpakatools::for_each_element_in_block_strided(acc, nMaxModules + 1, 3073u, [&](uint32_t i) { + clus_view[i].clusModuleStart() += clus_view[3072].clusModuleStart(); + }); + alpaka::syncBlockThreads(acc); + } +#ifdef GPU_DEBUG + ALPAKA_ASSERT_OFFLOAD(0 == clus_view[0].moduleStart()); + auto c0 = std::min(maxHitsInModule, clus_view[1].clusModuleStart()); + ALPAKA_ASSERT_OFFLOAD(c0 == clus_view[1].moduleStart()); + ALPAKA_ASSERT_OFFLOAD(clus_view[1024].moduleStart() >= clus_view[1023].moduleStart()); + ALPAKA_ASSERT_OFFLOAD(clus_view[1025].moduleStart() >= clus_view[1024].moduleStart()); + ALPAKA_ASSERT_OFFLOAD(clus_view[nMaxModules].moduleStart() >= clus_view[1025].moduleStart()); + + cms::alpakatools::for_each_element_in_block_strided(acc, nMaxModules + 1, [&](uint32_t i) { + if (0 != i) + ALPAKA_ASSERT_OFFLOAD(clus_view[i].moduleStart() >= clus_view[i - i].moduleStart()); + // Check BPX2 (1), FP1 (4) + constexpr auto bpix2 = TrackerTraits::layerStart[1]; + constexpr auto fpix1 = TrackerTraits::layerStart[4]; + if (i == bpix2 || i == fpix1) + printf("moduleStart %d %d\n", i, clus_view[i].moduleStart()); + }); +#endif + // avoid overflow + constexpr auto MAX_HITS = TrackerTraits::maxNumberOfHits; + cms::alpakatools::for_each_element_in_block_strided(acc, nMaxModules + 1, [&](uint32_t i) { + if (clus_view[i].clusModuleStart() > MAX_HITS) + clus_view[i].clusModuleStart() = MAX_HITS; + }); + + } // end of fillHitsModuleStart kernel operator() + }; // end of fillHitsModuleStart struct + + // Interface to outside + template + void SiPixelRawToClusterKernel::makePhase1ClustersAsync(const SiPixelClusterThresholds clusterThresholds, + const SiPixelMappingSoAConstView &cablingMap, + const unsigned char *modToUnp, + const SiPixelGainCalibrationForHLTSoAConstView &gains, + const WordFedAppender &wordFed, + const uint32_t wordCounter, + const uint32_t fedCounter, + bool useQualityInfo, + bool includeErrors, + bool debug, + Queue &queue) { + nDigis = wordCounter; + +#ifdef GPU_DEBUG + std::cout << "decoding " << wordCounter << " digis." << std::endl; +#endif + constexpr int numberOfModules = TrackerTraits::numberOfModules; + digis_d = SiPixelDigisCollection(wordCounter, queue); + if (includeErrors) { + digiErrors_d = SiPixelDigiErrorsCollection(wordCounter, queue); + } + clusters_d = SiPixelClustersCollection(numberOfModules, queue); + if (wordCounter) // protect in case of empty event.... + { + const int threadsPerBlockOrElementsPerThread = []() { + if constexpr (std::is_same_v) { + // NB: MPORTANT: This could be tuned to benefit from innermost loop. + return 32; + } else { + return 512; + } + }(); + // fill it all + const uint32_t blocks = cms::alpakatools::divide_up_by(wordCounter, threadsPerBlockOrElementsPerThread); + const auto workDiv = cms::alpakatools::make_workdiv(blocks, threadsPerBlockOrElementsPerThread); + ALPAKA_ASSERT_OFFLOAD(0 == wordCounter % 2); + // wordCounter is the total no of words in each event to be trasfered on device + auto word_d = cms::alpakatools::make_device_buffer(queue, wordCounter); + // NB: IMPORTANT: fedId_d: In legacy, wordCounter elements are allocated. + // However, only the first half of elements end up eventually used: + // hence, here, only wordCounter/2 elements are allocated. + auto fedId_d = cms::alpakatools::make_device_buffer(queue, wordCounter / 2); + alpaka::memcpy(queue, word_d, wordFed.word(), wordCounter); + alpaka::memcpy(queue, fedId_d, wordFed.fedId(), wordCounter / 2); + // Launch rawToDigi kernel + alpaka::exec(queue, + workDiv, + RawToDigi_kernel(), + cablingMap, + modToUnp, + wordCounter, + word_d.data(), + fedId_d.data(), + digis_d->view(), + digiErrors_d->view(), + useQualityInfo, + includeErrors, + debug); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "RawToDigi_kernel was run smoothly!" << std::endl; +#endif + + } + // End of Raw2Digi and passing data for clustering + + { + // clusterizer + using namespace pixelClustering; + // calibrations + using namespace calibPixel; + const int threadsPerBlockOrElementsPerThread = []() { + if constexpr (std::is_same_v) { + // NB: MPORTANT: This could be tuned to benefit from innermost loop. + return 32; + } else { + return 256; + } + }(); + const auto blocks = cms::alpakatools::divide_up_by(std::max(wordCounter, numberOfModules), + threadsPerBlockOrElementsPerThread); + const auto workDiv = cms::alpakatools::make_workdiv(blocks, threadsPerBlockOrElementsPerThread); + + alpaka::exec(queue, workDiv, calibDigis(), clusterThresholds, digis_d->view(), clusters_d->view(), gains, wordCounter); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "countModules kernel launch with " << blocks << " blocks of " + << threadsPerBlockOrElementsPerThread << " threadsPerBlockOrElementsPerThread\n"; +#endif + + alpaka::exec( + queue, workDiv, countModules(), digis_d->view(), clusters_d->view(), wordCounter); + + auto moduleStartFirstElement = + cms::alpakatools::make_device_view(alpaka::getDev(queue), clusters_d->view().moduleStart(), 1u); + alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement); + const auto threadsPerBlockFindClus = 512;//((TrackerTraits::maxPixInModule / 16 + 128 - 1) / 128) * 128; /// should be larger than maxPixInModule/16 aka (maxPixInModule/maxiter in the kernel) + const auto workDivMaxNumModules = cms::alpakatools::make_workdiv(numberOfModules, threadsPerBlockFindClus); + // NB: With present findClus() / chargeCut() algorithm, + // threadPerBlock (GPU) or elementsPerThread (CPU) = 256 show optimal performance. + // Though, it does not have to be the same number for CPU/GPU cases. + +#ifdef GPU_DEBUG + std::cout << " findClus kernel launch with " << numberOfModules << " blocks of " << threadsPerBlockFindClus + << " threadsPerBlockOrElementsPerThread\n"; +#endif + + alpaka::exec(queue, + workDivMaxNumModules, + findClus(), + digis_d->view(), + clusters_d->view(), + wordCounter); + +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + + // apply charge cut + alpaka::exec(queue, + workDivMaxNumModules, + ::pixelClustering::clusterChargeCut(), + digis_d->view(), + clusters_d->view(), + clusterThresholds, + wordCounter); + // count the module start indices already here (instead of + // rechits) so that the number of clusters/hits can be made + // available in the rechit producer without additional points of + // synchronization/ExternalWork + + // MUST be ONE block + const auto workDivOneBlock = cms::alpakatools::make_workdiv(1u, 1024u); + alpaka::exec( + queue, workDivOneBlock, fillHitsModuleStart(), clusters_d->view()); + + // last element holds the number of all clusters + const auto clusModuleStartLastElement = cms::alpakatools::make_device_view( + alpaka::getDev(queue), + const_cast(clusters_d->view().clusModuleStart() + numberOfModules), + 1u); + constexpr int startBPIX2 = TrackerTraits::layerStart[1]; + + // element startBPIX2 hold the number of clusters until BPIX2 + const auto bpix2ClusterStart = cms::alpakatools::make_device_view( + alpaka::getDev(queue), const_cast(clusters_d->view().clusModuleStart() + startBPIX2), 1u); + auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u); + alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement); + + auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u); + alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart); + + #ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "SiPixelClusterizerAlpaka results:" << std::endl + << " > no. of digis: " << nDigis << std::endl + << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl + << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl + << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl; + #endif + + } // end clusterizer scope + } + + template + void SiPixelRawToClusterKernel::makePhase2ClustersAsync(const SiPixelClusterThresholds clusterThresholds, + SiPixelDigisSoAv2View &digis_view, + const uint32_t numDigis, + Queue &queue) { + + using namespace pixelClustering; + using pixelTopology::Phase2; + nDigis = numDigis; + constexpr int numberOfModules = pixelTopology::Phase2::numberOfModules; + clusters_d = SiPixelClustersCollection(numberOfModules, queue); + const auto threadsPerBlockOrElementsPerThread = 512; + const auto blocks = + cms::alpakatools::divide_up_by(std::max(numDigis, numberOfModules), threadsPerBlockOrElementsPerThread); + const auto workDiv = cms::alpakatools::make_workdiv(blocks, threadsPerBlockOrElementsPerThread); + + alpaka::exec(queue, workDiv, calibPixel::calibDigisPhase2{}, clusterThresholds, digis_view, clusters_d->view(), numDigis); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "countModules kernel launch with " << blocks << " blocks of " + << threadsPerBlockOrElementsPerThread << " threadsPerBlockOrElementsPerThread\n"; +#endif + alpaka::exec(queue, workDiv, countModules(), digis_view, clusters_d->view(), numDigis); + + auto moduleStartFirstElement = + cms::alpakatools::make_device_view(alpaka::getDev(queue), clusters_d->view().moduleStart(), 1u); + alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement); + + const auto threadsPerBlockFindClus = ((TrackerTraits::maxPixInModule / 16 + 128 - 1) / 128) * 128; /// should be larger than maxPixInModule/16 aka (maxPixInModule/maxiter in the kernel) + const auto workDivMaxNumModules = cms::alpakatools::make_workdiv(numberOfModules, threadsPerBlockFindClus); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "findClus kernel launch with " << numberOfModules << " blocks of " << threadsPerBlockFindClus + << " threadsPerBlockOrElementsPerThread\n"; +#endif + alpaka::exec( + queue, workDivMaxNumModules, findClus(), digis_view, clusters_d->view(), numDigis); +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + + // apply charge cut + alpaka::exec(queue, + workDivMaxNumModules, + ::pixelClustering::clusterChargeCut(), + digis_view, + clusters_d->view(), + clusterThresholds, + numDigis); + + // count the module start indices already here (instead of + // rechits) so that the number of clusters/hits can be made + // available in the rechit producer without additional points of + // synchronization/ExternalWork + + // MUST be ONE block + const auto workDivOneBlock = cms::alpakatools::make_workdiv(1u, 1024u); + alpaka::exec( + queue, workDivOneBlock, fillHitsModuleStart(), clusters_d->view()); + + // last element holds the number of all clusters + const auto clusModuleStartLastElement = cms::alpakatools::make_device_view( + alpaka::getDev(queue), + const_cast(clusters_d->view().clusModuleStart() + numberOfModules), + 1u); + constexpr int startBPIX2 = pixelTopology::Phase2::layerStart[1]; + // element startBPIX2 hold the number of clusters until BPIX2 + const auto bpix2ClusterStart = cms::alpakatools::make_device_view( + alpaka::getDev(queue), const_cast(clusters_d->view().clusModuleStart() + startBPIX2), 1u); + auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u); + alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement); + + auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u); + alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart); + + #ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "SiPixelPhase2DigiToCluster: results \n" + << " > no. of digis: " << numDigis << std::endl + << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl + << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl + << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl; + #endif + } // + + template class SiPixelRawToClusterKernel; + template class SiPixelRawToClusterKernel; + template class SiPixelRawToClusterKernel; + + } // namespace pixelDetails + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h new file mode 100644 index 0000000000000..6fa1185e5aa5c --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h @@ -0,0 +1,203 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_SiPixelRawToClusterKernel_h +#define RecoLocalTracker_SiPixelClusterizer_SiPixelRawToClusterKernel_h + +#include +#include +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersCollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" + +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h" + +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" +#include "DataFormats/SiPixelDetId/interface/PixelChannelIdentifier.h" + +namespace pixelDetails { + + constexpr auto MAX_LINK = pixelgpudetails::MAX_LINK; + constexpr auto MAX_SIZE = pixelgpudetails::MAX_SIZE; + constexpr auto MAX_ROC = pixelgpudetails::MAX_ROC; + // Phase 1 geometry constants + const uint32_t layerStartBit = 20; + const uint32_t ladderStartBit = 12; + const uint32_t moduleStartBit = 2; + + const uint32_t panelStartBit = 10; + const uint32_t diskStartBit = 18; + const uint32_t bladeStartBit = 12; + + const uint32_t layerMask = 0xF; + const uint32_t ladderMask = 0xFF; + const uint32_t moduleMask = 0x3FF; + const uint32_t panelMask = 0x3; + const uint32_t diskMask = 0xF; + const uint32_t bladeMask = 0x3F; + + const uint32_t LINK_bits = 6; + const uint32_t ROC_bits = 5; + const uint32_t DCOL_bits = 5; + const uint32_t PXID_bits = 8; + const uint32_t ADC_bits = 8; + + // special for layer 1 + const uint32_t LINK_bits_l1 = 6; + const uint32_t ROC_bits_l1 = 5; + const uint32_t COL_bits_l1 = 6; + const uint32_t ROW_bits_l1 = 7; + const uint32_t OMIT_ERR_bits = 1; + + const uint32_t maxROCIndex = 8; + const uint32_t numRowsInRoc = 80; + const uint32_t numColsInRoc = 52; + + const uint32_t MAX_WORD = 2000; + + const uint32_t ADC_shift = 0; + const uint32_t PXID_shift = ADC_shift + ADC_bits; + const uint32_t DCOL_shift = PXID_shift + PXID_bits; + const uint32_t ROC_shift = DCOL_shift + DCOL_bits; + const uint32_t LINK_shift = ROC_shift + ROC_bits_l1; + // special for layer 1 ROC + const uint32_t ROW_shift = ADC_shift + ADC_bits; + const uint32_t COL_shift = ROW_shift + ROW_bits_l1; + const uint32_t OMIT_ERR_shift = 20; + + const uint32_t LINK_mask = ~(~uint32_t(0) << LINK_bits_l1); + const uint32_t ROC_mask = ~(~uint32_t(0) << ROC_bits_l1); + const uint32_t COL_mask = ~(~uint32_t(0) << COL_bits_l1); + const uint32_t ROW_mask = ~(~uint32_t(0) << ROW_bits_l1); + const uint32_t DCOL_mask = ~(~uint32_t(0) << DCOL_bits); + const uint32_t PXID_mask = ~(~uint32_t(0) << PXID_bits); + const uint32_t ADC_mask = ~(~uint32_t(0) << ADC_bits); + const uint32_t ERROR_mask = ~(~uint32_t(0) << ROC_bits_l1); + const uint32_t OMIT_ERR_mask = ~(~uint32_t(0) << OMIT_ERR_bits); + + struct DetIdGPU { + uint32_t RawId; + uint32_t rocInDet; + uint32_t moduleId; + }; + + struct Pixel { + uint32_t row; + uint32_t col; + }; + + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr pixelchannelidentifierimpl::Packing packing() { + return PixelChannelIdentifier::thePacking; + } + + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr uint32_t pack(uint32_t row, + uint32_t col, + uint32_t adc, + uint32_t flag = 0) { + constexpr pixelchannelidentifierimpl::Packing thePacking = packing(); + adc = std::min(adc, uint32_t(thePacking.max_adc)); + + return (row << thePacking.row_shift) | (col << thePacking.column_shift) | (adc << thePacking.adc_shift); + } + + constexpr uint32_t pixelToChannel(int row, int col) { + constexpr pixelchannelidentifierimpl::Packing thePacking = packing(); + return (row << thePacking.column_width) | col; + } + +} // namespace pixelDetails + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace pixelDetails { + + template + class SiPixelRawToClusterKernel { + public: + class WordFedAppender { + public: + WordFedAppender(); + ~WordFedAppender() = default; + + WordFedAppender(uint32_t words) + :word_{cms::alpakatools::make_host_buffer(words)}, + fedId_{cms::alpakatools::make_host_buffer(words)} {}; + + + void initializeWordFed(int fedId, + unsigned int wordCounterGPU, + const uint32_t *src, + unsigned int length) { + std::memcpy(word_.data() + wordCounterGPU, src, sizeof(uint32_t) * length); + std::memset(fedId_.data() + wordCounterGPU / 2, fedId - 1200, length / 2); + } + auto word() const { return word_; } + auto fedId() const { return fedId_; } + + private: + cms::alpakatools::host_buffer word_; + cms::alpakatools::host_buffer fedId_; + }; + + SiPixelRawToClusterKernel() : nModules_Clusters_h{cms::alpakatools::make_host_buffer(3u)} {} + + ~SiPixelRawToClusterKernel() = default; + + SiPixelRawToClusterKernel(const SiPixelRawToClusterKernel&) = delete; + SiPixelRawToClusterKernel(SiPixelRawToClusterKernel&&) = delete; + SiPixelRawToClusterKernel& operator=(const SiPixelRawToClusterKernel&) = delete; + SiPixelRawToClusterKernel& operator=(SiPixelRawToClusterKernel&&) = delete; + + void makePhase1ClustersAsync(const SiPixelClusterThresholds clusterThresholds, + const SiPixelMappingSoAConstView& cablingMap, + const unsigned char* modToUnp, + const SiPixelGainCalibrationForHLTSoAConstView& gains, + const WordFedAppender& wordFed, + const uint32_t wordCounter, + const uint32_t fedCounter, + bool useQualityInfo, + bool includeErrors, + bool debug, + Queue& queue); + + void makePhase2ClustersAsync(const SiPixelClusterThresholds clusterThresholds, + SiPixelDigisSoAv2View& digis_view, + const uint32_t numDigis, + Queue& queue); + + SiPixelDigisCollection getDigis() { + digis_d->setNModulesDigis(nModules_Clusters_h[0], nDigis); + return std::move(*digis_d); + } + + SiPixelClustersCollection getClusters() { + clusters_d->setNClusters(nModules_Clusters_h[1], nModules_Clusters_h[2]); + return std::move(*clusters_d); + } + + SiPixelDigiErrorsCollection getErrors() { return std::move(*digiErrors_d); } + + auto nModules() { return nModules_Clusters_h[0]; } + + private: + uint32_t nDigis = 0; + + // Data to be put in the event + cms::alpakatools::host_buffer nModules_Clusters_h; + std::optional digis_d; + std::optional clusters_d; + std::optional digiErrors_d; + }; + + } // namespace pixelDetails +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif // plugin_SiPixelClusterizer_alpaka_SiPixelRawToClusterKernel_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h index 180b356db2c88..232b7d05444ec 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h @@ -52,6 +52,7 @@ namespace gpuCalibPixel { int row = x[i]; int col = y[i]; + auto ret = ped->getPedAndGain(id[i], col, row, isDeadColumn, isNoisyColumn); float pedestal = ret.first; float gain = ret.second; diff --git a/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py b/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py index 4460dd6ab0240..61775f6aee0c7 100644 --- a/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py +++ b/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py @@ -1,6 +1,7 @@ import FWCore.ParameterSet.Config as cms from Configuration.Eras.Modifier_run3_common_cff import run3_common from Configuration.ProcessModifiers.gpu_cff import gpu +from Configuration.ProcessModifiers.alpaka_cff import alpaka # conditions used *only* by the modules running on GPU from CalibTracker.SiPixelESProducers.siPixelROCsStatusAndMappingWrapperESProducer_cfi import siPixelROCsStatusAndMappingWrapperESProducer @@ -17,6 +18,7 @@ # reconstruct the pixel digis and clusters on the gpu from RecoLocalTracker.SiPixelClusterizer.siPixelRawToClusterCUDAPhase1_cfi import siPixelRawToClusterCUDAPhase1 as _siPixelRawToClusterCUDA from RecoLocalTracker.SiPixelClusterizer.siPixelRawToClusterCUDAHIonPhase1_cfi import siPixelRawToClusterCUDAHIonPhase1 as _siPixelRawToClusterCUDAHIonPhase1 + siPixelClustersPreSplittingCUDA = _siPixelRawToClusterCUDA.clone() # HIon Modifiers @@ -34,7 +36,6 @@ VCaltoElectronOffset = 0, VCaltoElectronOffset_L1 = 0) - from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAPhase1_cfi import siPixelDigisClustersFromSoAPhase1 as _siPixelDigisClustersFromSoAPhase1 from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAPhase2_cfi import siPixelDigisClustersFromSoAPhase2 as _siPixelDigisClustersFromSoAPhase2 @@ -93,3 +94,102 @@ siPixelDigisClustersPreSplitting, # SwitchProducer wrapping the legacy pixel cluster producer or an alias for the pixel clusters information converted from SoA siPixelClustersPreSplitting)) + +###################################################################### + +### Alpaka Pixel Clusters Reco + +#from CalibTracker.SiPixelESProducers.siPixelCablingSoAESProducer_cfi import siPixelCablingSoAESProducer +#from CalibTracker.SiPixelESProducers.siPixelGainCalibrationForHLTSoAESProducer_cfi import siPixelGainCalibrationForHLTSoAESProducer + +def _addProcessCalibTrackerAlpakaES(process): + process.load("CalibTracker.SiPixelESProducers.siPixelCablingSoAESProducer_cfi") + process.load("CalibTracker.SiPixelESProducers.siPixelGainCalibrationForHLTSoAESProducer_cfi") + +modifyConfigurationCalibTrackerAlpakaES_ = alpaka.makeProcessModifier(_addProcessCalibTrackerAlpakaES) + +# reconstruct the pixel digis and clusters on the device +from RecoLocalTracker.SiPixelClusterizer.siPixelRawToClusterPhase1_cfi import siPixelRawToClusterPhase1 as _siPixelRawToClusterAlpaka +siPixelClustersPreSplittingAlpaka = _siPixelRawToClusterAlpaka.clone() + +(alpaka & run3_common).toModify(siPixelClustersPreSplittingAlpaka, + # use the pixel channel calibrations scheme for Run 3 + clusterThreshold_layer1 = 4000, + VCaltoElectronGain = 1, # all gains=1, pedestals=0 + VCaltoElectronGain_L1 = 1, + VCaltoElectronOffset = 0, + VCaltoElectronOffset_L1 = 0) + +from RecoLocalTracker.SiPixelClusterizer.siPixelPhase2DigiToCluster_cfi import siPixelPhase2DigiToCluster as _siPixelPhase2DigiToCluster + +(alpaka & phase2_tracker).toReplaceWith(siPixelClustersPreSplittingAlpaka,_siPixelPhase2DigiToCluster.clone( + Phase2ReadoutMode = PixelDigitizerAlgorithmCommon.Phase2ReadoutMode.value(), # Flag to decide Readout Mode : linear TDR (-1), dual slope with slope parameters (+1,+2,+3,+4 ...) with threshold subtraction + Phase2DigiBaseline = int(PixelDigitizerAlgorithmCommon.ThresholdInElectrons_Barrel.value()), #Same for barrel and endcap + Phase2KinkADC = 8, + ElectronPerADCGain = PixelDigitizerAlgorithmCommon.ElectronPerAdc.value() +)) + +from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAAlpakaPhase1_cfi import siPixelDigisClustersFromSoAAlpakaPhase1 as _siPixelDigisClustersFromSoAAlpakaPhase1 +from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAAlpakaPhase2_cfi import siPixelDigisClustersFromSoAAlpakaPhase2 as _siPixelDigisClustersFromSoAAlpakaPhase2 + +(alpaka & ~phase2_tracker).toReplaceWith(siPixelDigisClustersPreSplitting,_siPixelDigisClustersFromSoAAlpakaPhase1.clone( + src = "siPixelClustersPreSplittingAlpaka" +)) + +(alpaka & phase2_tracker).toReplaceWith(siPixelDigisClustersPreSplitting,_siPixelDigisClustersFromSoAAlpakaPhase2.clone( + clusterThreshold_layer1 = 4000, + clusterThreshold_otherLayers = 4000, + src = "siPixelClustersPreSplittingAlpaka", + storeDigis = False, + produceDigis = False +)) + +from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAAlpakaPhase1_cfi import siPixelDigisClustersFromSoAAlpakaPhase1 as _siPixelDigisClustersFromSoAAlpakaPhase1 +from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAAlpakaPhase2_cfi import siPixelDigisClustersFromSoAAlpakaPhase2 as _siPixelDigisClustersFromSoAAlpakaPhase2 + +alpaka.toModify(siPixelClustersPreSplitting, + cpu = cms.EDAlias( + siPixelDigisClustersPreSplitting = cms.VPSet( + cms.PSet(type = cms.string("SiPixelClusteredmNewDetSetVector")) + ) + ) + # _siPixelDigisClustersFromSoAAlpakaPhase1.clone( + # src = cms.InputTag('siPixelClustersPreSplittingAlpaka') + # ) +) + +# (alpaka & phase2_tracker).toModify(siPixelClustersPreSplitting, +# cpu = _siPixelDigisClustersFromSoAAlpakaPhase2.clone( +# src = cms.InputTag('siPixelClustersPreSplittingAlpaka') +# ) +# ) +# Run3 +alpaka.toReplaceWith(siPixelClustersPreSplittingTask, cms.Task( + # Reconstruct the pixel clusters with alpaka + siPixelClustersPreSplittingAlpaka, + # Convert from host SoA to legacy formats (digis and clusters) + siPixelDigisClustersPreSplitting, + # EDAlias for the clusters + siPixelClustersPreSplitting)) + +# Phase2 +(alpaka & phase2_tracker).toReplaceWith(siPixelClustersPreSplittingTask, cms.Task( + # Reconstruct the pixel clusters on Device from copied digis + siPixelClustersPreSplittingAlpaka, + # Convert the pixel digis (except errors) and clusters to the legacy format + siPixelDigisClustersPreSplitting, + # SwitchProducer wrapping the legacy pixel cluster producer or an alias for the pixel clusters information converted from SoA + siPixelClustersPreSplitting)) + +### Alpaka Device vs Host validation + +from Configuration.ProcessModifiers.alpakaValidationPixel_cff import alpakaValidationPixel + +siPixelClustersPreSplittingAlpakaSerial = siPixelClustersPreSplittingAlpaka.clone( alpaka = dict( backend = 'serial_sync' )) + +alpakaValidationPixel.toReplaceWith(siPixelClustersPreSplittingTask, cms.Task( + # Reconstruct and convert the pixel clusters with alpaka on device + siPixelClustersPreSplittingTask.copy(), + # SoA serial counterpart + siPixelClustersPreSplittingAlpakaSerial)) + diff --git a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml index 70a2970420c51..62787f4c989c1 100644 --- a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml @@ -1,15 +1,20 @@ + + + + + diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsDevice.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsDevice.h new file mode 100644 index 0000000000000..a6436038cac1f --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsDevice.h @@ -0,0 +1,35 @@ +#ifndef DataFormats_PixelCPEFastParams_interface_PixelCPEFastParamsDevice_h +#define DataFormats_PixelCPEFastParams_interface_PixelCPEFastParamsDevice_h + +#include +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEGenericBase.h" + +#include "pixelCPEforDevice.h" + +template +class PixelCPEFastParamsDevice { +public: + + using Buffer = cms::alpakatools::device_buffer>; + using ConstBuffer = cms::alpakatools::const_device_buffer>; + + template + PixelCPEFastParamsDevice (TQueue queue) : + buffer_(cms::alpakatools::make_device_buffer>(queue)) + {} + + Buffer buffer() { return buffer_; } + ConstBuffer buffer() const { return buffer_; } + ConstBuffer const_buffer() const { return buffer_; } + pixelCPEforDevice::ParamsOnDeviceT const* data() const { return buffer_.data(); } + auto size() const { return alpaka::getExtentProduct(buffer_); } + +private: + + Buffer buffer_; +}; + +#endif diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h new file mode 100644 index 0000000000000..3dad8ee75397e --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h @@ -0,0 +1,58 @@ +#ifndef DataFormats_PixelCPEFastParams_interface_PixelCPEFastParamsHost_h +#define DataFormats_PixelCPEFastParams_interface_PixelCPEFastParamsHost_h + +#include +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToDevice.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/GeometrySurface/interface/SOARotation.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEGenericBase.h" +#include "CondFormats/SiPixelTransient/interface/SiPixelGenError.h" + +#include "pixelCPEforDevice.h" + +namespace pixelCPEforDevice { + + constexpr float micronsToCm = 1.0e-4; + +} + +template +class PixelCPEFastParamsHost : public PixelCPEGenericBase { +public: + using Buffer = cms::alpakatools::host_buffer>; + using ConstBuffer = cms::alpakatools::const_host_buffer>; + + PixelCPEFastParamsHost(edm::ParameterSet const& conf, + const MagneticField* mag, + const TrackerGeometry& geom, + const TrackerTopology& ttopo, + const SiPixelLorentzAngle* lorentzAngle, + const SiPixelGenErrorDBObject* genErrorDBObject, + const SiPixelLorentzAngle* lorentzAngleWidth); + + Buffer buffer() { return buffer_; } + ConstBuffer buffer() const { return buffer_; } + ConstBuffer const_buffer() const { return buffer_; } + auto size() const { return alpaka::getExtentProduct(buffer_); } + + static void fillPSetDescription(edm::ParameterSetDescription& desc); + +private: + LocalPoint localPosition(DetParam const& theDetParam, ClusterParam& theClusterParam) const override; + LocalError localError(DetParam const& theDetParam, ClusterParam& theClusterParam) const override; + + void errorFromTemplates(DetParam const& theDetParam, ClusterParamGeneric& theClusterParam, float qclus) const; + + std::vector thePixelGenError_; + + void fillParamsForDevice(); + + Buffer buffer_; +}; +// } // namespace pixelCPEforDevice + +#endif diff --git a/RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h b/RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h new file mode 100644 index 0000000000000..0fa3a993b53f3 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h @@ -0,0 +1,44 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_interface_alpaka_PixelCPEFastParamsCollection_h +#define RecoLocalTracker_SiPixelRecHits_interface_alpaka_PixelCPEFastParamsCollection_h + +#include +#include +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsDevice.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToDevice.h" + +// TODO: The class is created via inheritance of the PortableCollection. +// This is generally discouraged, and should be done via composition. +// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 +namespace ALPAKA_ACCELERATOR_NAMESPACE { +#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + template + using PixelCPEFastParamsCollection = PixelCPEFastParamsHost; +#else + template + using PixelCPEFastParamsCollection = PixelCPEFastParamsDevice; +#endif + template + using PixelCPEFastParams = PixelCPEFastParamsCollection; + + using PixelCPEFastParamsPhase1 = PixelCPEFastParams; + using PixelCPEFastParamsPhase2 = PixelCPEFastParams; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace cms::alpakatools { + template + struct CopyToDevice> { + template + static auto copyAsync(TQueue& queue, PixelCPEFastParamsHost const& srcData) { + using TDevice = typename alpaka::trait::DevType::type; + PixelCPEFastParamsDevice dstData (queue);//srcData->metadata().size(), queue); + alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); + return dstData; + } + }; +} // namespace cms::alpakatools + +#endif // DataFormats_PixelCPEFastParamsoA_interface_alpaka_PixelCPEFastParamsCollection_h \ No newline at end of file diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h new file mode 100644 index 0000000000000..20bc489b1a124 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h @@ -0,0 +1,438 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_interface_pixelCPEforDevice_h +#define RecoLocalTracker_SiPixelRecHits_interface_pixelCPEforDevice_h + +#include +#include +#include +#include + +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/GeometrySurface/interface/SOARotation.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +// Nesting this namespace to prevent conflicts with pixelCPEForDevice.h +namespace CPEFastParametrisation { + // From https://cmssdt.cern.ch/dxr/CMSSW/source/CondFormats/SiPixelTransient/src/SiPixelGenError.cc#485-486 + // qbin: int (0-4) describing the charge of the cluster + // [0: 1.5; + using Rotation = SOARotation; + + // SOA (on device) + + template + struct ClusParamsT { + uint32_t minRow[N]; + uint32_t maxRow[N]; + uint32_t minCol[N]; + uint32_t maxCol[N]; + + int32_t q_f_X[N]; + int32_t q_l_X[N]; + int32_t q_f_Y[N]; + int32_t q_l_Y[N]; + + int32_t charge[N]; + + float xpos[N]; + float ypos[N]; + + float xerr[N]; + float yerr[N]; + + int16_t xsize[N]; // (*8) clipped at 127 if negative is edge.... + int16_t ysize[N]; + + Status status[N]; + }; + + // all modules are identical! + struct CommonParams { + float theThicknessB; + float theThicknessE; + float thePitchX; + float thePitchY; + + uint16_t maxModuleStride; + uint8_t numberOfLaddersInBarrel; + }; + + struct DetParams { + bool isBarrel; + bool isPosZ; + uint16_t layer; + uint16_t index; + uint32_t rawId; + + float shiftX; + float shiftY; + float chargeWidthX; + float chargeWidthY; + uint16_t pixmx; // max pix charge + + uint16_t nRowsRoc; //we don't need 2^16 columns, is worth to use 15 + 1 for sign + uint16_t nColsRoc; + uint16_t nRows; + uint16_t nCols; + + uint32_t numPixsInModule; + + float x0, y0, z0; // the vertex in the local coord of the detector + + float apeXX, apeYY; // ape^2 + uint8_t sx2, sy1, sy2; + uint8_t sigmax[CPEFastParametrisation::kNumErrorBins], sigmax1[CPEFastParametrisation::kNumErrorBins], + sigmay[CPEFastParametrisation::kNumErrorBins]; // in micron + float xfact[CPEFastParametrisation::kGenErrorQBins], yfact[CPEFastParametrisation::kGenErrorQBins]; + int minCh[CPEFastParametrisation::kGenErrorQBins]; + + Frame frame; + }; + + template + struct LayerGeometryT { + uint32_t layerStart[TrackerTopology::numberOfLayers + 1]; + uint8_t layer[pixelTopology::layerIndexSize]; + uint16_t maxModuleStride; + }; + + constexpr int32_t MaxHitsInIter = pixelClustering::maxHitsInIter(); + using ClusParams = ClusParamsT; + + + + constexpr inline void computeAnglesFromDet( + DetParams const& __restrict__ detParams, float const x, float const y, float& cotalpha, float& cotbeta) { + // x,y local position on det + auto gvx = x - detParams.x0; + auto gvy = y - detParams.y0; + auto gvz = -1.f / detParams.z0; + // normalization not required as only ratio used... + // calculate angles + cotalpha = gvx * gvz; + cotbeta = gvy * gvz; + } + + constexpr inline float correction(int sizeM1, + int q_f, //!< Charge in the first pixel. + int q_l, //!< Charge in the last pixel. + uint16_t upper_edge_first_pix, //!< As the name says. + uint16_t lower_edge_last_pix, //!< As the name says. + float lorentz_shift, //!< L-shift at half thickness + float theThickness, //detector thickness + float cot_angle, //!< cot of alpha_ or beta_ + float pitch, //!< thePitchX or thePitchY + bool first_is_big, //!< true if the first is big + bool last_is_big) //!< true if the last is big + { + if (0 == sizeM1) // size 1 + return 0; + + float w_eff = 0; + bool simple = true; + if (1 == sizeM1) { // size 2 + //--- Width of the clusters minus the edge (first and last) pixels. + //--- In the note, they are denoted x_F and x_L (and y_F and y_L) + // assert(lower_edge_last_pix >= upper_edge_first_pix); + auto w_inner = pitch * float(lower_edge_last_pix - upper_edge_first_pix); // in cm + + //--- Predicted charge width from geometry + auto w_pred = theThickness * cot_angle // geometric correction (in cm) + - lorentz_shift; // (in cm) &&& check fpix! + + w_eff = std::abs(w_pred) - w_inner; + + //--- If the observed charge width is inconsistent with the expectations + //--- based on the track, do *not* use w_pred-w_inner. Instead, replace + //--- it with an *average* effective charge width, which is the average + //--- length of the edge pixels. + + // this can produce "large" regressions for very small numeric differences + simple = (w_eff < 0.0f) | (w_eff > pitch); + } + + if (simple) { + //--- Total length of the two edge pixels (first+last) + float sum_of_edge = 2.0f; + if (first_is_big) + sum_of_edge += 1.0f; + if (last_is_big) + sum_of_edge += 1.0f; + w_eff = pitch * 0.5f * sum_of_edge; // ave. length of edge pixels (first+last) (cm) + } + + //--- Finally, compute the position in this projection + float qdiff = q_l - q_f; + float qsum = q_l + q_f; + + //--- Temporary fix for clusters with both first and last pixel with charge = 0 + if (qsum == 0) + qsum = 1.0f; + + return 0.5f * (qdiff / qsum) * w_eff; + } + + template + constexpr inline void position(CommonParams const& __restrict__ comParams, + DetParams const& __restrict__ detParams, + ClusParams& cp, + uint32_t ic) { + constexpr int maxSize = TrackerTraits::maxSizeCluster; + //--- Upper Right corner of Lower Left pixel -- in measurement frame + uint16_t llx = cp.minRow[ic] + 1; + uint16_t lly = cp.minCol[ic] + 1; + + //--- Lower Left corner of Upper Right pixel -- in measurement frame + uint16_t urx = cp.maxRow[ic]; + uint16_t ury = cp.maxCol[ic]; + + uint16_t llxl = llx, llyl = lly, urxl = urx, uryl = ury; + + llxl = TrackerTraits::localX(llx); + llyl = TrackerTraits::localY(lly); + urxl = TrackerTraits::localX(urx); + uryl = TrackerTraits::localY(ury); + + auto mx = llxl + urxl; + auto my = llyl + uryl; + + int xsize = int(urxl) + 2 - int(llxl); + int ysize = int(uryl) + 2 - int(llyl); + assert(xsize >= 0); // 0 if bixpix... + assert(ysize >= 0); + + if (TrackerTraits::isBigPixX(cp.minRow[ic])) + ++xsize; + if (TrackerTraits::isBigPixX(cp.maxRow[ic])) + ++xsize; + if (TrackerTraits::isBigPixY(cp.minCol[ic])) + ++ysize; + if (TrackerTraits::isBigPixY(cp.maxCol[ic])) + ++ysize; + + int unbalanceX = 8.f * std::abs(float(cp.q_f_X[ic] - cp.q_l_X[ic])) / float(cp.q_f_X[ic] + cp.q_l_X[ic]); + int unbalanceY = 8.f * std::abs(float(cp.q_f_Y[ic] - cp.q_l_Y[ic])) / float(cp.q_f_Y[ic] + cp.q_l_Y[ic]); + + xsize = 8 * xsize - unbalanceX; + ysize = 8 * ysize - unbalanceY; + + cp.xsize[ic] = std::min(xsize, maxSize); + cp.ysize[ic] = std::min(ysize, maxSize); + + if (cp.minRow[ic] == 0 || cp.maxRow[ic] == uint32_t(detParams.nRows - 1)) + cp.xsize[ic] = -cp.xsize[ic]; + + if (cp.minCol[ic] == 0 || cp.maxCol[ic] == uint32_t(detParams.nCols - 1)) + cp.ysize[ic] = -cp.ysize[ic]; + + // apply the lorentz offset correction + float xoff = 0.5f * float(detParams.nRows) * comParams.thePitchX; + float yoff = 0.5f * float(detParams.nCols) * comParams.thePitchY; + + //correction for bigpixels for phase1 + xoff = xoff + TrackerTraits::bigPixXCorrection * comParams.thePitchX; + yoff = yoff + TrackerTraits::bigPixYCorrection * comParams.thePitchY; + + // apply the lorentz offset correction + auto xPos = detParams.shiftX + (comParams.thePitchX * 0.5f * float(mx)) - xoff; + auto yPos = detParams.shiftY + (comParams.thePitchY * 0.5f * float(my)) - yoff; + + float cotalpha = 0, cotbeta = 0; + + computeAnglesFromDet(detParams, xPos, yPos, cotalpha, cotbeta); + + auto thickness = detParams.isBarrel ? comParams.theThicknessB : comParams.theThicknessE; + + auto xcorr = correction(cp.maxRow[ic] - cp.minRow[ic], + cp.q_f_X[ic], + cp.q_l_X[ic], + llxl, + urxl, + detParams.chargeWidthX, // lorentz shift in cm + thickness, + cotalpha, + comParams.thePitchX, + TrackerTraits::isBigPixX(cp.minRow[ic]), + TrackerTraits::isBigPixX(cp.maxRow[ic])); + + auto ycorr = correction(cp.maxCol[ic] - cp.minCol[ic], + cp.q_f_Y[ic], + cp.q_l_Y[ic], + llyl, + uryl, + detParams.chargeWidthY, // lorentz shift in cm + thickness, + cotbeta, + comParams.thePitchY, + TrackerTraits::isBigPixY(cp.minCol[ic]), + TrackerTraits::isBigPixY(cp.maxCol[ic])); + + cp.xpos[ic] = xPos + xcorr; + cp.ypos[ic] = yPos + ycorr; + } + + template + constexpr inline void errorFromSize(CommonParams const& __restrict__ comParams, + DetParams const& __restrict__ detParams, + ClusParams& cp, + uint32_t ic) { + // Edge cluster errors + cp.xerr[ic] = 0.0050; + cp.yerr[ic] = 0.0085; + + // FIXME these are errors form Run1 + float xerr_barrel_l1_def = TrackerTraits::xerr_barrel_l1_def; + float yerr_barrel_l1_def = TrackerTraits::yerr_barrel_l1_def; + float xerr_barrel_ln_def = TrackerTraits::xerr_barrel_ln_def; + float yerr_barrel_ln_def = TrackerTraits::yerr_barrel_ln_def; + float xerr_endcap_def = TrackerTraits::xerr_endcap_def; + float yerr_endcap_def = TrackerTraits::yerr_endcap_def; + + constexpr float xerr_barrel_l1[] = {0.00115, 0.00120, 0.00088}; //TODO MOVE THESE SOMEWHERE ELSE + constexpr float yerr_barrel_l1[] = { + 0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240}; + constexpr float xerr_barrel_ln[] = {0.00115, 0.00120, 0.00088}; + constexpr float yerr_barrel_ln[] = { + 0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240}; + constexpr float xerr_endcap[] = {0.0020, 0.0020}; + constexpr float yerr_endcap[] = {0.00210}; + + auto sx = cp.maxRow[ic] - cp.minRow[ic]; + auto sy = cp.maxCol[ic] - cp.minCol[ic]; + + // is edgy ? + bool isEdgeX = cp.xsize[ic] < 1; + bool isEdgeY = cp.ysize[ic] < 1; + + // is one and big? + bool isBig1X = ((0 == sx) && TrackerTraits::isBigPixX(cp.minRow[ic])); + bool isBig1Y = ((0 == sy) && TrackerTraits::isBigPixY(cp.minCol[ic])); + + if (!isEdgeX && !isBig1X) { + if (not detParams.isBarrel) { + cp.xerr[ic] = sx < std::size(xerr_endcap) ? xerr_endcap[sx] : xerr_endcap_def; + } else if (detParams.layer == 1) { + cp.xerr[ic] = sx < std::size(xerr_barrel_l1) ? xerr_barrel_l1[sx] : xerr_barrel_l1_def; + } else { + cp.xerr[ic] = sx < std::size(xerr_barrel_ln) ? xerr_barrel_ln[sx] : xerr_barrel_ln_def; + } + } + + if (!isEdgeY && !isBig1Y) { + if (not detParams.isBarrel) { + cp.yerr[ic] = sy < std::size(yerr_endcap) ? yerr_endcap[sy] : yerr_endcap_def; + } else if (detParams.layer == 1) { + cp.yerr[ic] = sy < std::size(yerr_barrel_l1) ? yerr_barrel_l1[sy] : yerr_barrel_l1_def; + } else { + cp.yerr[ic] = sy < std::size(yerr_barrel_ln) ? yerr_barrel_ln[sy] : yerr_barrel_ln_def; + } + } + } + + template + constexpr inline void errorFromDB(CommonParams const& __restrict__ comParams, + DetParams const& __restrict__ detParams, + ClusParams& cp, + uint32_t ic) { + // Edge cluster errors + cp.xerr[ic] = 0.0050f; + cp.yerr[ic] = 0.0085f; + + auto sx = cp.maxRow[ic] - cp.minRow[ic]; + auto sy = cp.maxCol[ic] - cp.minCol[ic]; + + // is edgy ? (size is set negative: see above) + bool isEdgeX = cp.xsize[ic] < 1; + bool isEdgeY = cp.ysize[ic] < 1; + // is one and big? + bool isOneX = (0 == sx); + bool isOneY = (0 == sy); + bool isBigX = TrackerTraits::isBigPixX(cp.minRow[ic]); + bool isBigY = TrackerTraits::isBigPixY(cp.minCol[ic]); + + auto ch = cp.charge[ic]; + auto bin = 0; + for (; bin < CPEFastParametrisation::kGenErrorQBins - 1; ++bin) + // find first bin which minimum charge exceeds cluster charge + if (ch < detParams.minCh[bin + 1]) + break; + + // in detParams qBins are reversed bin0 -> smallest charge, bin4-> largest charge + // whereas in CondFormats/SiPixelTransient/src/SiPixelGenError.cc it is the opposite + // so we reverse the bin here -> kGenErrorQBins - 1 - bin + cp.status[ic].qBin = CPEFastParametrisation::kGenErrorQBins - 1 - bin; + cp.status[ic].isOneX = isOneX; + cp.status[ic].isBigX = (isOneX & isBigX) | isEdgeX; + cp.status[ic].isOneY = isOneY; + cp.status[ic].isBigY = (isOneY & isBigY) | isEdgeY; + + auto xoff = -float(TrackerTraits::xOffset) * comParams.thePitchX; + int low_value = 0; + int high_value = CPEFastParametrisation::kNumErrorBins - 1; + int bin_value = float(CPEFastParametrisation::kNumErrorBins) * (cp.xpos[ic] + xoff) / (2 * xoff); + // return estimated bin value truncated to [0, 15] + int jx = std::clamp(bin_value, low_value, high_value); + + auto toCM = [](uint8_t x) { return float(x) * 1.e-4f; }; + + if (not isEdgeX) { + cp.xerr[ic] = isOneX ? toCM(isBigX ? detParams.sx2 : detParams.sigmax1[jx]) + : detParams.xfact[bin] * toCM(detParams.sigmax[jx]); + } + + auto ey = cp.ysize[ic] > 8 ? detParams.sigmay[std::min(cp.ysize[ic] - 9, 15)] : detParams.sy1; + if (not isEdgeY) { + cp.yerr[ic] = isOneY ? toCM(isBigY ? detParams.sy2 : detParams.sy1) : detParams.yfact[bin] * toCM(ey); + } + } + + //for Phase2 -> fallback to error from size + template <> + constexpr inline void errorFromDB(CommonParams const& __restrict__ comParams, + DetParams const& __restrict__ detParams, + ClusParams& cp, + uint32_t ic) { + errorFromSize(comParams, detParams, cp, ic); + } + + template + struct ParamsOnDeviceT { + using LayerGeometry = LayerGeometryT; + using AverageGeometry = pixelTopology::AverageGeometryT; + + CommonParams m_commonParams; + // Will contain an array of DetParams instances + DetParams m_detParams[TrackerTopology::numberOfModules]; + LayerGeometry m_layerGeometry; + AverageGeometry m_averageGeometry; + + constexpr CommonParams const& __restrict__ commonParams() const { return m_commonParams; } + constexpr DetParams const& __restrict__ detParams(int i) const { return m_detParams[i]; } + constexpr LayerGeometry const& __restrict__ layerGeometry() const { return m_layerGeometry; } + constexpr AverageGeometry const& __restrict__ averageGeometry() const { return m_averageGeometry; } + + CommonParams & commonParams() { return m_commonParams; } + DetParams & detParams(int i) { return m_detParams[i]; } + LayerGeometry & layerGeometry() { return m_layerGeometry; } + AverageGeometry & averageGeometry() { return m_averageGeometry; } + + + constexpr uint8_t layer(uint16_t id) const { + return m_layerGeometry.layer[id / TrackerTopology::maxModuleStride]; + }; + }; + +} // namespace pixelCPEforDevice + +#endif // RecoLocalTracker_SiPixelRecHits_interface_pixelCPEforDevice_h diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml index 00c88eadd4b51..7745b5d91d044 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml @@ -1,12 +1,13 @@ - - - + + + + + + - - @@ -14,5 +15,14 @@ + + + + + + + + + diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc new file mode 100644 index 0000000000000..c1d438b8b4e10 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc @@ -0,0 +1,185 @@ +#include +#include + +#include + +#include "DataFormats/Common/interface/DetSetVectorNew.h" +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" +#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitSoAHost.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" + +template +class SiPixelRecHitFromSoAAlpaka : public edm::global::EDProducer<> { + using HitModuleStartArray = typename TrackingRecHitAlpakaSoA::HitModuleStartArray; + using hindex_type = typename TrackerTraits::hindex_type; + using HMSstorage = typename std::vector; + +public: + explicit SiPixelRecHitFromSoAAlpaka(const edm::ParameterSet& iConfig); + ~SiPixelRecHitFromSoAAlpaka() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + // Data has been implicitly copied from Device to Host by the framework + using HitsOnHost = TrackingRecHitHost; + +private: + void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + const edm::ESGetToken geomToken_; + const edm::EDGetTokenT hitsToken_; // Alpaka hits + const edm::EDGetTokenT clusterToken_; // legacy clusters + const edm::EDPutTokenT rechitsPutToken_; // legacy rechits + const edm::EDPutTokenT hostPutToken_; +}; + +template +SiPixelRecHitFromSoAAlpaka::SiPixelRecHitFromSoAAlpaka(const edm::ParameterSet& iConfig) + : geomToken_(esConsumes()), + hitsToken_(consumes(iConfig.getParameter("pixelRecHitSrc"))), + clusterToken_(consumes(iConfig.getParameter("src"))), + rechitsPutToken_(produces()), + hostPutToken_(produces()) {} + +template +void SiPixelRecHitFromSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("pixelRecHitSrc", edm::InputTag("siPixelRecHitsPreSplittingAlpaka")); + desc.add("src", edm::InputTag("siPixelClustersPreSplitting")); + descriptions.addWithDefaultLabel(desc); +} + +template +void SiPixelRecHitFromSoAAlpaka::produce(edm::StreamID streamID, + edm::Event& iEvent, + const edm::EventSetup& iSetup) const { + auto const& hits = iEvent.get(hitsToken_); + auto nHits = hits.view().metadata().size(); + LogDebug("SiPixelRecHitFromSoAAlpaka") << "converting " << nHits << " Hits"; + + // allocate a buffer for the indices of the clusters + constexpr auto nMaxModules = TrackerTraits::numberOfModules; + + SiPixelRecHitCollection output; + output.reserve(nMaxModules, nHits); + + HMSstorage hmsp(nMaxModules + 1); + + if (0 == nHits) { + hmsp.clear(); + iEvent.emplace(rechitsPutToken_, std::move(output)); + iEvent.emplace(hostPutToken_, std::move(hmsp)); + return; + } + + // fill content of HMSstorage product, and put it into the Event + for (unsigned int idx = 0; idx < hmsp.size(); ++idx) { + hmsp[idx] = hits.view().hitsModuleStart()[idx]; + } + iEvent.emplace(hostPutToken_, std::move(hmsp)); + + auto xl = hits.view().xLocal(); + auto yl = hits.view().yLocal(); + auto xe = hits.view().xerrLocal(); + auto ye = hits.view().yerrLocal(); + + TrackerGeometry const& geom = iSetup.getData(geomToken_); + + auto const hclusters = iEvent.getHandle(clusterToken_); + + constexpr uint32_t maxHitsInModule = pixelClustering::maxHitsInModule(); + + int numberOfDetUnits = 0; + int numberOfClusters = 0; + for (auto const& dsv : *hclusters) { + numberOfDetUnits++; + unsigned int detid = dsv.detId(); + DetId detIdObject(detid); + const GeomDetUnit* genericDet = geom.idToDetUnit(detIdObject); + auto gind = genericDet->index(); + const PixelGeomDetUnit* pixDet = dynamic_cast(genericDet); + assert(pixDet); + SiPixelRecHitCollection::FastFiller recHitsOnDetUnit(output, detid); + auto fc = hits.view().hitsModuleStart()[gind]; + auto lc = hits.view().hitsModuleStart()[gind + 1]; + auto nhits = lc - fc; + + assert(lc > fc); + LogDebug("SiPixelRecHitFromSoAAlpaka") << "in det " << gind << ": conv " << nhits << " hits from " << dsv.size() + << " legacy clusters" << ' ' << fc << ',' << lc << "\n"; + if (nhits > maxHitsInModule) + edm::LogWarning("SiPixelRecHitFromSoAAlpaka") << fmt::sprintf( + "Too many clusters %d in module %d. Only the first %d hits will be converted", nhits, gind, maxHitsInModule); + nhits = std::min(nhits, maxHitsInModule); + + LogDebug("SiPixelRecHitFromSoAAlpaka") << "in det " << gind << "conv " << nhits << " hits from " << dsv.size() + << " legacy clusters" << ' ' << lc << ',' << fc; + + if (0 == nhits) + continue; + auto jnd = [&](int k) { return fc + k; }; + assert(nhits <= dsv.size()); + if (nhits != dsv.size()) { + edm::LogWarning("GPUHits2CPU") << "nhits!= nclus " << nhits << ' ' << dsv.size(); + } + for (auto const& clust : dsv) { + assert(clust.originalId() >= 0); + assert(clust.originalId() < dsv.size()); + if (clust.originalId() >= nhits) + continue; + auto ij = jnd(clust.originalId()); + LocalPoint lp(xl[ij], yl[ij]); + LocalError le(xe[ij], 0, ye[ij]); + SiPixelRecHitQuality::QualWordType rqw = 0; + + numberOfClusters++; + + /* cpu version.... (for reference) + std::tuple tuple = cpe_->getParameters( clust, *genericDet ); + LocalPoint lp( std::get<0>(tuple) ); + LocalError le( std::get<1>(tuple) ); + SiPixelRecHitQuality::QualWordType rqw( std::get<2>(tuple) ); + */ + + // Create a persistent edm::Ref to the cluster + edm::Ref, SiPixelCluster> cluster = edmNew::makeRefTo(hclusters, &clust); + // Make a RecHit and add it to the DetSet + recHitsOnDetUnit.emplace_back(lp, le, rqw, *genericDet, cluster); + // ============================= + + LogDebug("SiPixelRecHitFromSoAAlpaka") << "cluster " << numberOfClusters << " at " << lp << ' ' << le; + + } // <-- End loop on Clusters + + // LogDebug("SiPixelRecHitGPU") + LogDebug("SiPixelRecHitFromSoAAlpaka") << "found " << recHitsOnDetUnit.size() << " RecHits on " << detid; + + } // <-- End loop on DetUnits + + LogDebug("SiPixelRecHitFromSoAAlpaka") << "found " << numberOfDetUnits << " dets, " << numberOfClusters + << " clusters"; + + iEvent.emplace(rechitsPutToken_, std::move(output)); +} + +using SiPixelRecHitFromSoAAlpakaPhase1 = SiPixelRecHitFromSoAAlpaka; +DEFINE_FWK_MODULE(SiPixelRecHitFromSoAAlpakaPhase1); + +using SiPixelRecHitFromSoAAlpakaPhase2 = SiPixelRecHitFromSoAAlpaka; +DEFINE_FWK_MODULE(SiPixelRecHitFromSoAAlpakaPhase2); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelCPEFastParamsESProducerAlpaka.cc b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelCPEFastParamsESProducerAlpaka.cc new file mode 100644 index 0000000000000..3b22217b0dca5 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelCPEFastParamsESProducerAlpaka.cc @@ -0,0 +1,121 @@ +#include +#include +#include +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h" + +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "MagneticField/Engine/interface/MagneticField.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" +#include "RecoLocalTracker/ClusterParameterEstimator/interface/PixelClusterParameterEstimator.h" + +#include "CondFormats/DataRecord/interface/SiPixelGenErrorDBObjectRcd.h" +#include "RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + template + class PixelCPEFastParamsESProducerAlpaka : public ESProducer { + public: + PixelCPEFastParamsESProducerAlpaka(edm::ParameterSet const& iConfig); + std::unique_ptr> produce(const PixelCPEFastParamsRecord& iRecord); + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + private: + edm::ESGetToken magfieldToken_; + edm::ESGetToken pDDToken_; + edm::ESGetToken hTTToken_; + edm::ESGetToken lorentzAngleToken_; + edm::ESGetToken lorentzAngleWidthToken_; + edm::ESGetToken genErrorDBObjectToken_; + + edm::ParameterSet pset_; + bool useErrorsFromTemplates_; + }; + + using namespace edm; + + template + PixelCPEFastParamsESProducerAlpaka::PixelCPEFastParamsESProducerAlpaka(const edm::ParameterSet& p) + : ESProducer(p), pset_(p) { + auto const& myname = p.getParameter("ComponentName"); + auto const& magname = p.getParameter("MagneticFieldRecord"); + useErrorsFromTemplates_ = p.getParameter("UseErrorsFromTemplates"); + + auto cc = setWhatProduced(this, myname); + magfieldToken_ = cc.consumes(magname); + pDDToken_ = cc.consumes(); + hTTToken_ = cc.consumes(); + lorentzAngleToken_ = cc.consumes(edm::ESInputTag("")); + lorentzAngleWidthToken_ = cc.consumes(edm::ESInputTag("", "forWidth")); + if (useErrorsFromTemplates_) { + genErrorDBObjectToken_ = cc.consumes(); + } + } + + template + std::unique_ptr> PixelCPEFastParamsESProducerAlpaka::produce( + const PixelCPEFastParamsRecord& iRecord) { + // add the new la width object + const SiPixelLorentzAngle* lorentzAngleWidthProduct = nullptr; + lorentzAngleWidthProduct = &iRecord.get(lorentzAngleWidthToken_); + + const SiPixelGenErrorDBObject* genErrorDBObjectProduct = nullptr; + + // Errors take only from new GenError + if (useErrorsFromTemplates_) { // do only when generrors are needed + genErrorDBObjectProduct = &iRecord.get(genErrorDBObjectToken_); + //} else { + //std::cout<<" pass an empty GenError pointer"<>(pset_, + &iRecord.get(magfieldToken_), + iRecord.get(pDDToken_), + iRecord.get(hTTToken_), + &iRecord.get(lorentzAngleToken_), + genErrorDBObjectProduct, + lorentzAngleWidthProduct); + } + + template + void PixelCPEFastParamsESProducerAlpaka::fillDescriptions( + edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + // from PixelCPEBase + PixelCPEBase::fillPSetDescription(desc); + + // from PixelCPEFast + PixelCPEFastParamsHost::fillPSetDescription(desc); + + // used by PixelCPEFast + desc.add("EdgeClusterErrorX", 50.0); + desc.add("EdgeClusterErrorY", 85.0); + desc.add("UseErrorsFromTemplates", true); + desc.add("TruncatePixelCharge", true); + + std::string name = "PixelCPEFastParams"; + name += TrackerTraits::nameModifier; + desc.add("ComponentName", name); + desc.add("MagneticFieldRecord", edm::ESInputTag()); + + descriptions.addWithDefaultLabel(desc); + } + + using PixelCPEFastParamsESProducerAlpakaPhase1 = PixelCPEFastParamsESProducerAlpaka; + using PixelCPEFastParamsESProducerAlpakaPhase2 = PixelCPEFastParamsESProducerAlpaka; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(PixelCPEFastParamsESProducerAlpakaPhase1); +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(PixelCPEFastParamsESProducerAlpakaPhase2); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitGPUKernel.dev.cc b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitGPUKernel.dev.cc new file mode 100644 index 0000000000000..d930434f84f50 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitGPUKernel.dev.cc @@ -0,0 +1,123 @@ +// C++ headers +#include +#include + +// Alpaka headers +#include + +// CMSSW headers +#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +#include "PixelRecHitGPUKernel.h" +#include "PixelRecHits.h" + +//#define GPU_DEBUG + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + using namespace cms::alpakatools; + template + class setHitsLayerStart { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, + uint32_t const* __restrict__ hitsModuleStart, + pixelCPEforDevice::ParamsOnDeviceT const* __restrict__ cpeParams, + uint32_t* __restrict__ hitsLayerStart) const { + assert(0 == hitsModuleStart[0]); + + for (int32_t i : cms::alpakatools::elements_with_stride(acc, TrackerTraits::numberOfLayers + 1)) { + hitsLayerStart[i] = hitsModuleStart[cpeParams->layerGeometry().layerStart[i]]; +#ifdef GPU_DEBUG + int old = i == 0 ? 0 : hitsModuleStart[cpeParams->layerGeometry().layerStart[i - 1]]; + printf("LayerStart %d/%d at module %d: %d - %d\n", + i, + TrackerTraits::numberOfLayers, + cpeParams->layerGeometry().layerStart[i], + hitsLayerStart[i], + hitsLayerStart[i] - old); +#endif + } + } + }; + + namespace pixelgpudetails { + + template + TrackingRecHitAlpakaCollection PixelRecHitGPUKernel::makeHitsAsync( + SiPixelDigisCollection const& digis_d, + SiPixelClustersCollection const& clusters_d, + BeamSpotPOD const* bs_d, + pixelCPEforDevice::ParamsOnDeviceT const* cpeParams, + Queue queue) const { + using namespace pixelRecHits; + auto nHits = clusters_d.nClusters(); + auto offsetBPIX2 = clusters_d.offsetBPIX2(); + + TrackingRecHitAlpakaCollection hits_d(nHits, offsetBPIX2, clusters_d->clusModuleStart(), queue); + + int activeModulesWithDigis = digis_d.nModules(); + + // protect from empty events + if (activeModulesWithDigis) { + int threadsPerBlock = 128; + int blocks = activeModulesWithDigis; + const auto workDiv1D = cms::alpakatools::make_workdiv(blocks, threadsPerBlock); + +#ifdef GPU_DEBUG + std::cout << "launching getHits kernel on " << alpaka::core::demangled << " with " << blocks << " blocks" + << std::endl; +#endif + alpaka::exec(queue, + workDiv1D, + getHits{}, + cpeParams, + bs_d, + digis_d.view(), + digis_d.nDigis(), + clusters_d.view(), + hits_d.view()); +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + + // assuming full warp of threads is better than a smaller number... + if (nHits) { + const auto workDiv1D = cms::alpakatools::make_workdiv(1, 32); + alpaka::exec(queue, + workDiv1D, + setHitsLayerStart{}, + clusters_d->clusModuleStart(), + cpeParams, + hits_d.view().hitsLayerStart().data()); + constexpr auto nLayers = TrackerTraits::numberOfLayers; + cms::alpakatools::fillManyFromVector(&(hits_d.view().phiBinner()), + nLayers, + hits_d.view().iphi(), + hits_d.view().hitsLayerStart().data(), + nHits, + (uint32_t)256, + queue); + +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + } + } + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "PixelRecHitGPUKernel -> DONE!" << std::endl; +#endif + + return hits_d; + } + + template class PixelRecHitGPUKernel; + template class PixelRecHitGPUKernel; + template class PixelRecHitGPUKernel; + + } // namespace pixelgpudetails +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitGPUKernel.h b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitGPUKernel.h new file mode 100644 index 0000000000000..f7f93151a9824 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitGPUKernel.h @@ -0,0 +1,45 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_PixelRecHitGPUKernel_h +#define RecoLocalTracker_SiPixelRecHits_PixelRecHitGPUKernel_h + +#include + +#include + +#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersCollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisCollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitSoADevice.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitSoACollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace pixelgpudetails { + using namespace cms::alpakatools; + + template + class PixelRecHitGPUKernel { + public: + PixelRecHitGPUKernel() = default; + ~PixelRecHitGPUKernel() = default; + + PixelRecHitGPUKernel(const PixelRecHitGPUKernel&) = delete; + PixelRecHitGPUKernel(PixelRecHitGPUKernel&&) = delete; + PixelRecHitGPUKernel& operator=(const PixelRecHitGPUKernel&) = delete; + PixelRecHitGPUKernel& operator=(PixelRecHitGPUKernel&&) = delete; + + using ParamsOnDevice = pixelCPEforDevice::ParamsOnDeviceT; + + TrackingRecHitAlpakaCollection makeHitsAsync(SiPixelDigisCollection const& digis_d, + SiPixelClustersCollection const& clusters_d, + BeamSpotPOD const* bs_d, + ParamsOnDevice const* cpeParams, + Queue queue) const; + }; + } // namespace pixelgpudetails +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif // RecoLocalTracker_SiPixelRecHits_PixelRecHitGPUKernel_h diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h new file mode 100644 index 0000000000000..03cc38365e02a --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h @@ -0,0 +1,241 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_alpaka_PixelRecHits_h +#define RecoLocalTracker_SiPixelRecHits_alpaka_PixelRecHits_h + +#include +#include +#include +#include + +#include + +#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" +#include "DataFormats/Math/interface/approx_atan2.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisCollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" + +//#define GPU_DEBUG 1 +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace pixelRecHits { + + template + class getHits { + public: + template >> + ALPAKA_FN_ACC void operator()(const TAcc& acc, + pixelCPEforDevice::ParamsOnDeviceT const* __restrict__ cpeParams, + BeamSpotPOD const* __restrict__ bs, + SiPixelDigisSoAv2ConstView digis, + uint32_t numElements, + SiPixelClustersSoAConstView clusters, + TrackingRecHitAlpakaSoAView hits) const { + // FIXME + // the compiler seems NOT to optimize loads from views (even in a simple test case) + // The whole gimnastic here of copying or not is a pure heuristic exercise that seems to produce the fastest code with the above signature + // not using views (passing a gazzilion of array pointers) seems to produce the fastest code (but it is harder to mantain) + + ALPAKA_ASSERT_OFFLOAD(cpeParams); + + const uint32_t blockIdx(alpaka::getIdx(acc)[0u]); + const uint32_t threadIdxLocal(alpaka::getIdx(acc)[0u]); + + // copy average geometry corrected by beamspot . FIXME (move it somewhere else???) + if (0 == blockIdx) { + auto& agc = hits.averageGeometry(); + auto const& ag = cpeParams->averageGeometry(); + auto nLadders = TrackerTraits::numberOfLaddersInBarrel; + + cms::alpakatools::for_each_element_in_block_strided(acc, nLadders, [&](uint32_t il) { + agc.ladderZ[il] = ag.ladderZ[il] - bs->z; + agc.ladderX[il] = ag.ladderX[il] - bs->x; + agc.ladderY[il] = ag.ladderY[il] - bs->y; + agc.ladderR[il] = sqrt(agc.ladderX[il] * agc.ladderX[il] + agc.ladderY[il] * agc.ladderY[il]); + agc.ladderMinZ[il] = ag.ladderMinZ[il] - bs->z; + agc.ladderMaxZ[il] = ag.ladderMaxZ[il] - bs->z; + }); + + if (0 == threadIdxLocal) { + agc.endCapZ[0] = ag.endCapZ[0] - bs->z; + agc.endCapZ[1] = ag.endCapZ[1] - bs->z; + } + } + + // to be moved in common namespace... + using pixelClustering::invalidModuleId; + constexpr int32_t MaxHitsInIter = pixelCPEforDevice::MaxHitsInIter; + + using ClusParams = pixelCPEforDevice::ClusParams; + + // as usual one block per module + auto& clusParams = alpaka::declareSharedVar(acc); + + auto me = clusters[blockIdx].moduleId(); + int nclus = clusters[me].clusInModule(); + + if (0 == nclus) + return; +#ifdef GPU_DEBUG + if (threadIdxLocal == 0) { + auto k = clusters[1 + blockIdx].moduleStart(); + while (digis[k].moduleId() == invalidModuleId) + ++k; + ALPAKA_ASSERT_OFFLOAD(digis[k].moduleId() == me); + } + + if (me % 100 == 1) + if (threadIdxLocal == 0) + printf( + "hitbuilder: %d clusters in module %d. will write at %d\n", nclus, me, clusters[me].clusModuleStart()); +#endif + + for (int startClus = 0, endClus = nclus; startClus < endClus; startClus += MaxHitsInIter) { + auto first = clusters[1 + blockIdx].moduleStart(); + + int nClusInIter = alpaka::math::min(acc, MaxHitsInIter, endClus - startClus); + int lastClus = startClus + nClusInIter; + assert(nClusInIter <= nclus); + assert(nClusInIter > 0); + assert(lastClus <= nclus); + + assert(nclus > MaxHitsInIter || (0 == startClus && nClusInIter == nclus && lastClus == nclus)); + + // init + cms::alpakatools::for_each_element_in_block_strided(acc, nClusInIter, [&](uint32_t ic) { + clusParams.minRow[ic] = std::numeric_limits::max(); + clusParams.maxRow[ic] = 0; + clusParams.minCol[ic] = std::numeric_limits::max(); + clusParams.maxCol[ic] = 0; + clusParams.charge[ic] = 0; + clusParams.q_f_X[ic] = 0; + clusParams.q_l_X[ic] = 0; + clusParams.q_f_Y[ic] = 0; + clusParams.q_l_Y[ic] = 0; + }); + + alpaka::syncBlockThreads(acc); + + // one thread per "digi" + const uint32_t blockDimension(alpaka::getWorkDiv(acc)[0u]); + const auto& [firstElementIdxNoStride, endElementIdxNoStride] = + cms::alpakatools::element_index_range_in_block(acc, first); + uint32_t rowsColsFirstElementIdx = firstElementIdxNoStride; + uint32_t rowsColsEndElementIdx = endElementIdxNoStride; + for (uint32_t i = rowsColsFirstElementIdx; i < numElements; ++i) { + if (not cms::alpakatools::next_valid_element_index_strided( + i, rowsColsFirstElementIdx, rowsColsEndElementIdx, blockDimension, numElements)) + break; + auto id = digis[i].moduleId(); + if (id == invalidModuleId) + continue; // not valid + if (id != me) + break; // end of module + auto cl = digis[i].clus(); + if (cl < startClus || cl >= lastClus) + continue; + cl -= startClus; + ALPAKA_ASSERT_OFFLOAD(cl >= 0); + ALPAKA_ASSERT_OFFLOAD(cl < MaxHitsInIter); + auto x = digis[i].xx(); + auto y = digis[i].yy(); + alpaka::atomicMin(acc, &clusParams.minRow[cl], (uint32_t)x, alpaka::hierarchy::Threads{}); + alpaka::atomicMax(acc, &clusParams.maxRow[cl], (uint32_t)x, alpaka::hierarchy::Threads{}); + alpaka::atomicMin(acc, &clusParams.minCol[cl], (uint32_t)y, alpaka::hierarchy::Threads{}); + alpaka::atomicMax(acc, &clusParams.maxCol[cl], (uint32_t)y, alpaka::hierarchy::Threads{}); + } + + alpaka::syncBlockThreads(acc); + + auto pixmx = cpeParams->detParams(me).pixmx; + uint32_t chargeFirstElementIdx = firstElementIdxNoStride; + uint32_t chargeEndElementIdx = endElementIdxNoStride; + for (uint32_t i = chargeFirstElementIdx; i < numElements; ++i) { + if (not cms::alpakatools::next_valid_element_index_strided( + i, chargeFirstElementIdx, chargeEndElementIdx, blockDimension, numElements)) + break; + auto id = digis[i].moduleId(); + if (id == invalidModuleId) + continue; // not valid + if (id != me) + break; // end of module + auto cl = digis[i].clus(); + if (cl < startClus || cl >= lastClus) + continue; + cl -= startClus; + ALPAKA_ASSERT_OFFLOAD(cl >= 0); + ALPAKA_ASSERT_OFFLOAD(cl < MaxHitsInIter); + auto x = digis[i].xx(); + auto y = digis[i].yy(); + auto ch = digis[i].adc(); + alpaka::atomicAdd(acc, &clusParams.charge[cl], (int32_t)ch, alpaka::hierarchy::Threads{}); + ch = alpaka::math::min(acc, ch, pixmx); + if (clusParams.minRow[cl] == x) + alpaka::atomicAdd(acc, &clusParams.q_f_X[cl], (int32_t)ch, alpaka::hierarchy::Threads{}); + if (clusParams.maxRow[cl] == x) + alpaka::atomicAdd(acc, &clusParams.q_l_X[cl], (int32_t)ch, alpaka::hierarchy::Threads{}); + if (clusParams.minCol[cl] == y) + alpaka::atomicAdd(acc, &clusParams.q_f_Y[cl], (int32_t)ch, alpaka::hierarchy::Threads{}); + if (clusParams.maxCol[cl] == y) + alpaka::atomicAdd(acc, &clusParams.q_l_Y[cl], (int32_t)ch, alpaka::hierarchy::Threads{}); + } + + alpaka::syncBlockThreads(acc); + + // next one cluster per thread... + first = clusters[me].clusModuleStart() + startClus; + cms::alpakatools::for_each_element_in_block_strided(acc, nClusInIter, [&](uint32_t ic) { + auto h = first + ic; // output index in global memory + + assert(h < (uint32_t)hits.metadata().size()); + assert(h < clusters[me + 1].clusModuleStart()); + + pixelCPEforDevice::position( + cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); + + pixelCPEforDevice::errorFromDB( + cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); + + // store it + hits[h].chargeAndStatus().charge = clusParams.charge[ic]; + hits[h].chargeAndStatus().status = clusParams.status[ic]; + hits[h].detectorIndex() = me; + + float xl, yl; + hits[h].xLocal() = xl = clusParams.xpos[ic]; + hits[h].yLocal() = yl = clusParams.ypos[ic]; + + hits[h].clusterSizeX() = clusParams.xsize[ic]; + hits[h].clusterSizeY() = clusParams.ysize[ic]; + + hits[h].xerrLocal() = clusParams.xerr[ic] * clusParams.xerr[ic] + cpeParams->detParams(me).apeXX; + hits[h].yerrLocal() = clusParams.yerr[ic] * clusParams.yerr[ic] + cpeParams->detParams(me).apeYY; + + // keep it local for computations + float xg, yg, zg; + // to global and compute phi... + cpeParams->detParams(me).frame.toGlobal(xl, yl, xg, yg, zg); + // here correct for the beamspot... + xg -= bs->x; + yg -= bs->y; + zg -= bs->z; + + hits[h].xGlobal() = xg; + hits[h].yGlobal() = yg; + hits[h].zGlobal() = zg; + + hits[h].rGlobal() = alpaka::math::sqrt(acc, xg * xg + yg * yg); + hits[h].iphi() = unsafe_atan2s<7>(yg, xg); + }); + alpaka::syncBlockThreads(acc); + } // end loop on batches + } + }; + + } // namespace pixelRecHits +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif // RecoLocalTracker_SiPixelRecHits_plugins_alpaka_PixelRecHits_h diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitAlpaka.cc b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitAlpaka.cc new file mode 100644 index 0000000000000..a463fad652d24 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitAlpaka.cc @@ -0,0 +1,102 @@ +#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" +#include "DataFormats/BeamSpot/interface/alpaka/BeamSpotDeviceProduct.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisCollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitSoADevice.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitSoACollection.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/global/EDProducer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h" + +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h" + +#include "PixelRecHitGPUKernel.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + template + class SiPixelRecHitAlpaka : public global::EDProducer<> { + public: + explicit SiPixelRecHitAlpaka(const edm::ParameterSet& iConfig); + ~SiPixelRecHitAlpaka() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + using ParamsOnGPU = pixelCPEforDevice::ParamsOnDeviceT; + + private: + void produce(edm::StreamID streamID, device::Event& iEvent, const device::EventSetup& iSetup) const override; + + const device::ESGetToken, PixelCPEFastParamsRecord> cpeToken_; + const device::EDGetToken tBeamSpot; + const device::EDGetToken tokenClusters_; + const device::EDGetToken tokenDigi_; + const device::EDPutToken> tokenHit_; + + const pixelgpudetails::PixelRecHitGPUKernel gpuAlgo_; + }; + + template + SiPixelRecHitAlpaka::SiPixelRecHitAlpaka(const edm::ParameterSet& iConfig) + : cpeToken_(esConsumes(edm::ESInputTag("", iConfig.getParameter("CPE")))), + tBeamSpot(consumes(iConfig.getParameter("beamSpot"))), + tokenClusters_(consumes(iConfig.getParameter("src"))), + tokenDigi_(consumes(iConfig.getParameter("src"))), + tokenHit_(produces()) {} + + template + void SiPixelRecHitAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("beamSpot", edm::InputTag("offlineBeamSpotDevice")); + desc.add("src", edm::InputTag("siPixelClustersPreSplittingAlpaka")); + + std::string cpe = "PixelCPEFastParams"; + cpe += TrackerTraits::nameModifier; + desc.add("CPE", cpe); + + descriptions.addWithDefaultLabel(desc); + } + + template + void SiPixelRecHitAlpaka::produce(edm::StreamID streamID, + device::Event& iEvent, + const device::EventSetup& es) const { + auto& fcpe = es.getData(cpeToken_); + + auto const& clusters = iEvent.get(tokenClusters_); + + auto const& digis = iEvent.get(tokenDigi_); + + auto const& bs = iEvent.get(tBeamSpot); + + iEvent.emplace(tokenHit_, + gpuAlgo_.makeHitsAsync(digis, clusters, bs.data(), fcpe.const_buffer().data(), iEvent.queue())); + } + using SiPixelRecHitAlpakaPhase1 = SiPixelRecHitAlpaka; + using SiPixelRecHitAlpakaPhase2 = SiPixelRecHitAlpaka; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(SiPixelRecHitAlpakaPhase1); +DEFINE_FWK_ALPAKA_MODULE(SiPixelRecHitAlpakaPhase2); diff --git a/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py b/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py index 686b0afc335c4..52efaece5e4df 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py +++ b/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py @@ -1,4 +1,5 @@ import FWCore.ParameterSet.Config as cms +from Configuration.ProcessModifiers.alpaka_cff import alpaka # # Load all Pixel Cluster Position Estimator ESProducers @@ -18,3 +19,10 @@ # from CalibTracker.SiPixelESProducers.SiPixelTemplateDBObjectESProducer_cfi import * from CalibTracker.SiPixelESProducers.SiPixel2DTemplateDBObjectESProducer_cfi import * + +def _addProcessCPEsAlpaka(process): + process.load("RecoLocalTracker.SiPixelRecHits.pixelCPEFastParamsESProducerAlpakaPhase1_cfi") + process.load("RecoLocalTracker.SiPixelRecHits.pixelCPEFastParamsESProducerAlpakaPhase2_cfi") + +modifyConfigurationForAlpakaCPEs_ = alpaka.makeProcessModifier(_addProcessCPEsAlpaka) + diff --git a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py index f45b41861995d..745477f44bdd1 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py +++ b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py @@ -1,6 +1,7 @@ import FWCore.ParameterSet.Config as cms from HeterogeneousCore.CUDACore.SwitchProducerCUDA import SwitchProducerCUDA from Configuration.ProcessModifiers.gpu_cff import gpu +from Configuration.ProcessModifiers.alpaka_cff import alpaka # legacy pixel rechit producer siPixelRecHits = cms.EDProducer("SiPixelRecHitConverter", @@ -112,9 +113,6 @@ ) ) - -#(gpu & pixelNtupletFit & phase2_tracker).toReplaceWith(siPixelRecHitsPreSplitting , cuda = _siPixelRecHitFromCUDAPhase2.clone()) - (gpu & pixelNtupletFit).toReplaceWith(siPixelRecHitsPreSplittingTask, cms.Task( # reconstruct the pixel rechits on the gpu or on the cpu # (normally only one of the two is run because only one is consumed from later stages) @@ -125,3 +123,58 @@ # producing and converting on cpu (if needed) siPixelRecHitsPreSplittingSoA )) + +###################################################################### + +### Alpaka Pixel Hits Reco +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitAlpakaPhase1_cfi import siPixelRecHitAlpakaPhase1 as _siPixelRecHitAlpakaPhase1 +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitAlpakaPhase2_cfi import siPixelRecHitAlpakaPhase2 as _siPixelRecHitAlpakaPhase2 + +# Hit SoA producer on Device +siPixelRecHitsPreSplittingAlpaka = _siPixelRecHitAlpakaPhase1.clone( + src = "siPixelClustersPreSplittingAlpaka" +) +phase2_tracker.toReplaceWith(siPixelRecHitsPreSplittingAlpaka,_siPixelRecHitAlpakaPhase2.clone( + src = "siPixelClustersPreSplittingAlpaka" +)) + +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitFromSoAAlpakaPhase1_cfi import siPixelRecHitFromSoAAlpakaPhase1 as _siPixelRecHitFromSoAAlpakaPhase1 +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitFromSoAAlpakaPhase2_cfi import siPixelRecHitFromSoAAlpakaPhase2 as _siPixelRecHitFromSoAAlpakaPhase2 + +(alpaka & ~phase2_tracker).toModify(siPixelRecHitsPreSplitting, + cpu = _siPixelRecHitFromSoAAlpakaPhase1.clone( + pixelRecHitSrc = cms.InputTag('siPixelRecHitsPreSplittingAlpaka'), + src = cms.InputTag('siPixelClustersPreSplitting')) +) + +(alpaka & phase2_tracker).toModify(siPixelRecHitsPreSplitting, + cpu = _siPixelRecHitFromSoAAlpakaPhase2.clone( + pixelRecHitSrc = cms.InputTag('siPixelRecHitsPreSplittingAlpaka'), + src = cms.InputTag('siPixelClustersPreSplitting')) +) + + +alpaka.toReplaceWith(siPixelRecHitsPreSplittingTask, cms.Task( + # Reconstruct the pixel hits on the device + siPixelRecHitsPreSplittingAlpaka, + # Convert hit soa on host to legacy formats + siPixelRecHitsPreSplitting)) + + +### Alpaka Device vs Host validation + +from Configuration.ProcessModifiers.alpakaValidationPixel_cff import alpakaValidationPixel + +# Hit SoA producer on serial backend +siPixelRecHitsPreSplittingAlpakaSerial = siPixelRecHitsPreSplittingAlpaka.clone( + src = "siPixelClustersPreSplittingAlpakaSerial", + alpaka = dict( backend = 'serial_sync' ) +) + +alpakaValidationPixel.toReplaceWith(siPixelRecHitsPreSplittingTask, cms.Task( + # Reconstruct and convert the pixel hit with alpaka on device + siPixelRecHitsPreSplittingTask.copy(), + # SoA serial counterpart + siPixelRecHitsPreSplittingAlpakaSerial)) + + diff --git a/RecoLocalTracker/SiPixelRecHits/src/ES_PixelCPEFastParams.cc b/RecoLocalTracker/SiPixelRecHits/src/ES_PixelCPEFastParams.cc new file mode 100644 index 0000000000000..804f817bdb6e0 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/src/ES_PixelCPEFastParams.cc @@ -0,0 +1,9 @@ +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h" +#include "FWCore/Utilities/interface/typelookup.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +using PixelCPEFastParamsHostPhase1 = PixelCPEFastParamsHost; +using PixelCPEFastParamsHostPhase2 = PixelCPEFastParamsHost; + +TYPELOOKUP_DATA_REG(PixelCPEFastParamsHostPhase1); +TYPELOOKUP_DATA_REG(PixelCPEFastParamsHostPhase2); diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParams.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParams.cc new file mode 100644 index 0000000000000..d98c84e5860f4 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParams.cc @@ -0,0 +1,9 @@ +#include "FWCore/Utilities/interface/typelookup.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsDevice.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +using PixelCPEFastParamsPhase1 = PixelCPEFastParamsDevice; +using PixelCPEFastParamsPhase2 = PixelCPEFastParamsDevice; + +TYPELOOKUP_DATA_REG(PixelCPEFastParamsPhase1); +TYPELOOKUP_DATA_REG(PixelCPEFastParamsPhase2); \ No newline at end of file diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParamsHost.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParamsHost.cc new file mode 100644 index 0000000000000..fb44fe4e0b34f --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParamsHost.cc @@ -0,0 +1,480 @@ +#include +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToDevice.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/GeometrySurface/interface/SOARotation.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h" +#include "CondFormats/SiPixelTransient/interface/SiPixelGenError.h" + +// namespace pixelCPEforDevice { + +//----------------------------------------------------------------------------- +//! The constructor. +//----------------------------------------------------------------------------- +template +PixelCPEFastParamsHost::PixelCPEFastParamsHost(edm::ParameterSet const& conf, + const MagneticField* mag, + const TrackerGeometry& geom, + const TrackerTopology& ttopo, + const SiPixelLorentzAngle* lorentzAngle, + const SiPixelGenErrorDBObject* genErrorDBObject, + const SiPixelLorentzAngle* lorentzAngleWidth) + : PixelCPEGenericBase(conf, mag, geom, ttopo, lorentzAngle, genErrorDBObject, lorentzAngleWidth), + buffer_(cms::alpakatools::make_host_buffer>()) { + // Use errors from templates or from GenError + if (useErrorsFromTemplates_) { + if (!SiPixelGenError::pushfile(*genErrorDBObject_, this->thePixelGenError_)) + throw cms::Exception("InvalidCalibrationLoaded") + << "ERROR: GenErrors not filled correctly. Check the sqlite file. Using SiPixelTemplateDBObject version " + << (*genErrorDBObject_).version(); + } + + fillParamsForDevice(); +} + + +template +void PixelCPEFastParamsHost::fillParamsForDevice() { + + // this code executes only once per job, computation inefficiency is not an issue + // many code blocks are repeated: better keep the computation local and self oconsistent as blocks may in future move around, be deleted ... + // It is valid only for Phase1 and the version of GenError in DB used in late 2018 and in 2021 + + buffer_->commonParams().theThicknessB = m_DetParams.front().theThickness; + buffer_->commonParams().theThicknessE = m_DetParams.back().theThickness; + buffer_->commonParams().thePitchX = m_DetParams[0].thePitchX; + buffer_->commonParams().thePitchY = m_DetParams[0].thePitchY; + + buffer_->commonParams().numberOfLaddersInBarrel = TrackerTraits::numberOfLaddersInBarrel; + + LogDebug("PixelCPEFastParamsHost") << "pitch & thickness " << buffer_->commonParams().thePitchX << ' ' << buffer_->commonParams().thePitchY + << " " << buffer_->commonParams().theThicknessB << ' ' << buffer_->commonParams().theThicknessE; + + // zero average geometry + memset(&buffer_->averageGeometry(), 0, sizeof(pixelTopology::AverageGeometryT)); + + uint32_t oldLayer = 0; + uint32_t oldLadder = 0; + float rl = 0; + float zl = 0; + float miz = 500, mxz = 0; + float pl = 0; + int nl = 0; + + assert(m_DetParams.size()<=TrackerTraits::numberOfModules); + for (auto i = 0U; i < m_DetParams.size(); ++i) { + auto& p = m_DetParams[i]; + auto& g = buffer_->detParams(i); + + g.nRowsRoc = p.theDet->specificTopology().rowsperroc(); + g.nColsRoc = p.theDet->specificTopology().colsperroc(); + g.nRows = p.theDet->specificTopology().rocsX() * g.nRowsRoc; + g.nCols = p.theDet->specificTopology().rocsY() * g.nColsRoc; + + g.numPixsInModule = g.nRows * g.nCols; + + assert(p.theDet->index() == int(i)); + assert(buffer_->commonParams().thePitchY == p.thePitchY); + assert(buffer_->commonParams().thePitchX == p.thePitchX); + + g.isBarrel = GeomDetEnumerators::isBarrel(p.thePart); + g.isPosZ = p.theDet->surface().position().z() > 0; + g.layer = ttopo_.layer(p.theDet->geographicalId()); + g.index = i; // better be! + g.rawId = p.theDet->geographicalId(); + auto thickness = g.isBarrel ? buffer_->commonParams().theThicknessB : buffer_->commonParams().theThicknessE; + assert(thickness == p.theThickness); + + auto ladder = ttopo_.pxbLadder(p.theDet->geographicalId()); + if (oldLayer != g.layer) { + oldLayer = g.layer; + LogDebug("PixelCPEFastParamsHost") << "new layer at " << i << (g.isBarrel ? " B " : (g.isPosZ ? " E+ " : " E- ")) + << g.layer << " starting at " << g.rawId << '\n' + << "old layer had " << nl << " ladders"; + nl = 0; + } + if (oldLadder != ladder) { + oldLadder = ladder; + LogDebug("PixelCPEFastParamsHost") << "new ladder at " << i << (g.isBarrel ? " B " : (g.isPosZ ? " E+ " : " E- ")) + << ladder << " starting at " << g.rawId << '\n' + << "old ladder ave z,r,p mz " << zl / 8.f << " " << rl / 8.f << " " << pl / 8.f << ' ' + << miz << ' ' << mxz; + rl = 0; + zl = 0; + pl = 0; + miz = 500; + mxz = 0; + nl++; + } + + g.shiftX = 0.5f * p.lorentzShiftInCmX; + g.shiftY = 0.5f * p.lorentzShiftInCmY; + g.chargeWidthX = p.lorentzShiftInCmX * p.widthLAFractionX; + g.chargeWidthY = p.lorentzShiftInCmY * p.widthLAFractionY; + + g.x0 = p.theOrigin.x(); + g.y0 = p.theOrigin.y(); + g.z0 = p.theOrigin.z(); + + auto vv = p.theDet->surface().position(); + auto rr = pixelCPEforDevice::Rotation(p.theDet->surface().rotation()); + g.frame = pixelCPEforDevice::Frame(vv.x(), vv.y(), vv.z(), rr); + + zl += vv.z(); + miz = std::min(miz, std::abs(vv.z())); + mxz = std::max(mxz, std::abs(vv.z())); + rl += vv.perp(); + pl += vv.phi(); // (not obvious) + + // errors ..... + ClusterParamGeneric cp; + + cp.with_track_angle = false; + + auto lape = p.theDet->localAlignmentError(); + if (lape.invalid()) + lape = LocalError(); // zero.... + + g.apeXX = lape.xx(); + g.apeYY = lape.yy(); + + auto toMicron = [&](float x) { return std::min(511, int(x * 1.e4f + 0.5f)); }; + + // average angle + auto gvx = p.theOrigin.x() + 40.f * buffer_->commonParams().thePitchX; + auto gvy = p.theOrigin.y(); + auto gvz = 1.f / p.theOrigin.z(); + //--- Note that the normalization is not required as only the ratio used + + { + // calculate angles (fed into errorFromTemplates) + cp.cotalpha = gvx * gvz; + cp.cotbeta = gvy * gvz; + + errorFromTemplates(p, cp, 20000.); + } + +#ifdef EDM_ML_DEBUG + auto m = 10000.f; + for (float qclus = 15000; qclus < 35000; qclus += 15000) { + errorFromTemplates(p, cp, qclus); + LogDebug("PixelCPEFastParamsHost") << i << ' ' << qclus << ' ' << cp.pixmx << ' ' << m * cp.sigmax << ' ' << m * cp.sx1 + << ' ' << m * cp.sx2 << ' ' << m * cp.sigmay << ' ' << m * cp.sy1 << ' ' << m * cp.sy2; + } + LogDebug("PixelCPEFastParamsHost") << i << ' ' << m * std::sqrt(lape.xx()) << ' ' << m * std::sqrt(lape.yy()); +#endif // EDM_ML_DEBUG + + g.pixmx = std::max(0, cp.pixmx); + g.sx2 = toMicron(cp.sx2); + g.sy1 = std::max(21, toMicron(cp.sy1)); // for some angles sy1 is very small + g.sy2 = std::max(55, toMicron(cp.sy2)); // sometimes sy2 is smaller than others (due to angle?) + + //sample xerr as function of position + // moduleOffsetX is the definition of TrackerTraits::xOffset, + // needs to be calculated because for Phase2 the modules are not uniform + float moduleOffsetX = -(0.5f * float(g.nRows) + TrackerTraits::bigPixXCorrection); + auto const xoff = moduleOffsetX * buffer_->commonParams().thePitchX; + + for (int ix = 0; ix < CPEFastParametrisation::kNumErrorBins; ++ix) { + auto x = xoff * (1.f - (0.5f + float(ix)) / 8.f); + auto gvx = p.theOrigin.x() - x; + auto gvy = p.theOrigin.y(); + auto gvz = 1.f / p.theOrigin.z(); + cp.cotbeta = gvy * gvz; + cp.cotalpha = gvx * gvz; + errorFromTemplates(p, cp, 20000.f); + g.sigmax[ix] = toMicron(cp.sigmax); + g.sigmax1[ix] = toMicron(cp.sx1); + LogDebug("PixelCPEFastParamsHost") << "sigmax vs x " << i << ' ' << x << ' ' << cp.cotalpha << ' ' << int(g.sigmax[ix]) + << ' ' << int(g.sigmax1[ix]) << ' ' << 10000.f * cp.sigmay << std::endl; + } +#ifdef EDM_ML_DEBUG + // sample yerr as function of position + // moduleOffsetY is the definition of TrackerTraits::yOffset (removed) + float moduleOffsetY = 0.5f * float(g.nCols) + TrackerTraits::bigPixYCorrection; + auto const yoff = -moduleOffsetY * buffer_->commonParams().thePitchY; + + for (int ix = 0; ix < CPEFastParametrisation::kNumErrorBins; ++ix) { + auto y = yoff * (1.f - (0.5f + float(ix)) / 8.f); + auto gvx = p.theOrigin.x() + 40.f * buffer_->commonParams().thePitchY; + auto gvy = p.theOrigin.y() - y; + auto gvz = 1.f / p.theOrigin.z(); + cp.cotbeta = gvy * gvz; + cp.cotalpha = gvx * gvz; + errorFromTemplates(p, cp, 20000.f); + LogDebug("PixelCPEFastParamsHost") << "sigmay vs y " << i << ' ' << y << ' ' << cp.cotbeta << ' ' << 10000.f * cp.sigmay + << std::endl; + } +#endif // EDM_ML_DEBUG + + // calculate angles (repeated) + cp.cotalpha = gvx * gvz; + cp.cotbeta = gvy * gvz; + auto aveCB = cp.cotbeta; + + // sample x by charge + int qbin = CPEFastParametrisation::kGenErrorQBins; // low charge + int k = 0; + for (int qclus = 1000; qclus < 200000; qclus += 1000) { + errorFromTemplates(p, cp, qclus); + if (cp.qBin_ == qbin) + continue; + qbin = cp.qBin_; + g.xfact[k] = cp.sigmax; + g.yfact[k] = cp.sigmay; + g.minCh[k++] = qclus; +#ifdef EDM_ML_DEBUG + LogDebug("PixelCPEFastParamsHost") << i << ' ' << g.rawId << ' ' << cp.cotalpha << ' ' << qclus << ' ' << cp.qBin_ << ' ' + << cp.pixmx << ' ' << m * cp.sigmax << ' ' << m * cp.sx1 << ' ' << m * cp.sx2 << ' ' + << m * cp.sigmay << ' ' << m * cp.sy1 << ' ' << m * cp.sy2 << std::endl; +#endif // EDM_ML_DEBUG + } + + assert(k <= CPEFastParametrisation::kGenErrorQBins); + + // fill the rest (sometimes bin 4 is missing) + for (int kk = k; kk < CPEFastParametrisation::kGenErrorQBins; ++kk) { + g.xfact[kk] = g.xfact[k - 1]; + g.yfact[kk] = g.yfact[k - 1]; + g.minCh[kk] = g.minCh[k - 1]; + } + auto detx = 1.f / g.xfact[0]; + auto dety = 1.f / g.yfact[0]; + for (int kk = 0; kk < CPEFastParametrisation::kGenErrorQBins; ++kk) { + g.xfact[kk] *= detx; + g.yfact[kk] *= dety; + } + // sample y in "angle" (estimated from cluster size) + float ys = 8.f - 4.f; // apperent bias of half pixel (see plot) + // plot: https://indico.cern.ch/event/934821/contributions/3974619/attachments/2091853/3515041/DigilessReco.pdf page 25 + // sample yerr as function of "size" + for (int iy = 0; iy < CPEFastParametrisation::kNumErrorBins; ++iy) { + ys += 1.f; // first bin 0 is for size 9 (and size is in fixed point 2^3) + if (CPEFastParametrisation::kNumErrorBins - 1 == iy) + ys += 8.f; // last bin for "overflow" + // cp.cotalpha = ys*(buffer_->commonParams().thePitchX/(8.f*thickness)); // use this to print sampling in "x" (and comment the line below) + cp.cotbeta = std::copysign(ys * (buffer_->commonParams().thePitchY / (8.f * thickness)), aveCB); + errorFromTemplates(p, cp, 20000.f); + g.sigmay[iy] = toMicron(cp.sigmay); + LogDebug("PixelCPEFastParamsHost") << "sigmax/sigmay " << i << ' ' << (ys + 4.f) / 8.f << ' ' << cp.cotalpha << '/' + << cp.cotbeta << ' ' << 10000.f * cp.sigmax << '/' << int(g.sigmay[iy]) << std::endl; + } + } // loop over det + + constexpr int numberOfModulesInLadder = TrackerTraits::numberOfModulesInLadder; + constexpr int numberOfLaddersInBarrel = TrackerTraits::numberOfLaddersInBarrel; + constexpr int numberOfModulesInBarrel = TrackerTraits::numberOfModulesInBarrel; + + constexpr float ladderFactor = 1.f / float(numberOfModulesInLadder); + + constexpr int firstEndcapPos = TrackerTraits::firstEndcapPos; + constexpr int firstEndcapNeg = TrackerTraits::firstEndcapNeg; + + // compute ladder baricenter (only in global z) for the barrel + // + auto& aveGeom = buffer_->averageGeometry(); + int il = 0; + for (int im = 0, nm = numberOfModulesInBarrel; im < nm; ++im) { + auto const& g = buffer_->detParams(im); + il = im / numberOfModulesInLadder; + assert(il < int(numberOfLaddersInBarrel)); + auto z = g.frame.z(); + aveGeom.ladderZ[il] += ladderFactor * z; + aveGeom.ladderMinZ[il] = std::min(aveGeom.ladderMinZ[il], z); + aveGeom.ladderMaxZ[il] = std::max(aveGeom.ladderMaxZ[il], z); + aveGeom.ladderX[il] += ladderFactor * g.frame.x(); + aveGeom.ladderY[il] += ladderFactor * g.frame.y(); + aveGeom.ladderR[il] += ladderFactor * sqrt(g.frame.x() * g.frame.x() + g.frame.y() * g.frame.y()); + } + assert(il + 1 == int(numberOfLaddersInBarrel)); + // add half_module and tollerance + constexpr float moduleLength = TrackerTraits::moduleLength; + constexpr float module_tolerance = 0.2f; + for (int il = 0, nl = numberOfLaddersInBarrel; il < nl; ++il) { + aveGeom.ladderMinZ[il] -= (0.5f * moduleLength - module_tolerance); + aveGeom.ladderMaxZ[il] += (0.5f * moduleLength - module_tolerance); + } + + // compute "max z" for first layer in endcap (should we restrict to the outermost ring?) + for (auto im = TrackerTraits::layerStart[firstEndcapPos]; im < TrackerTraits::layerStart[firstEndcapPos + 1]; ++im) { + auto const& g = buffer_->detParams(im); + aveGeom.endCapZ[0] = std::max(aveGeom.endCapZ[0], g.frame.z()); + } + for (auto im = TrackerTraits::layerStart[firstEndcapNeg]; im < TrackerTraits::layerStart[firstEndcapNeg + 1]; ++im) { + auto const& g = buffer_->detParams(im); + aveGeom.endCapZ[1] = std::min(aveGeom.endCapZ[1], g.frame.z()); + } + // correct for outer ring being closer + aveGeom.endCapZ[0] -= TrackerTraits::endcapCorrection; + aveGeom.endCapZ[1] += TrackerTraits::endcapCorrection; +#ifdef EDM_ML_DEBUG + for (int jl = 0, nl = numberOfLaddersInBarrel; jl < nl; ++jl) { + LogDebug("PixelCPEFastParamsHost") << jl << ':' << aveGeom.ladderR[jl] << '/' + << std::sqrt(aveGeom.ladderX[jl] * aveGeom.ladderX[jl] + + aveGeom.ladderY[jl] * aveGeom.ladderY[jl]) + << ',' << aveGeom.ladderZ[jl] << ',' << aveGeom.ladderMinZ[jl] << ',' + << aveGeom.ladderMaxZ[jl] << '\n'; + } + LogDebug("PixelCPEFastParamsHost") << aveGeom.endCapZ[0] << ' ' << aveGeom.endCapZ[1]; +#endif // EDM_ML_DEBUG + + // fill Layer and ladders geometry + memset(&buffer_->layerGeometry(), 0, sizeof(pixelCPEforDevice::LayerGeometryT)); + memcpy(buffer_->layerGeometry().layerStart, + TrackerTraits::layerStart, + sizeof(pixelCPEforDevice::LayerGeometryT::layerStart)); + memcpy(buffer_->layerGeometry().layer, pixelTopology::layer.data(), pixelTopology::layer.size()); + buffer_->layerGeometry().maxModuleStride = pixelTopology::maxModuleStride; +} + +template +void PixelCPEFastParamsHost::errorFromTemplates(DetParam const& theDetParam, + ClusterParamGeneric& theClusterParam, + float qclus) const { + float locBz = theDetParam.bz; + float locBx = theDetParam.bx; + LogDebug("PixelCPEFastParamsHost") << "PixelCPEFastParamsHost::localPosition(...) : locBz = " << locBz; + + theClusterParam.pixmx = std::numeric_limits::max(); // max pixel charge for truncation of 2-D cluster + + theClusterParam.sigmay = -999.9; // CPE Generic y-error for multi-pixel cluster + theClusterParam.sigmax = -999.9; // CPE Generic x-error for multi-pixel cluster + theClusterParam.sy1 = -999.9; // CPE Generic y-error for single single-pixel + theClusterParam.sy2 = -999.9; // CPE Generic y-error for single double-pixel cluster + theClusterParam.sx1 = -999.9; // CPE Generic x-error for single single-pixel cluster + theClusterParam.sx2 = -999.9; // CPE Generic x-error for single double-pixel cluster + + float dummy; + + SiPixelGenError gtempl(this->thePixelGenError_); + int gtemplID = theDetParam.detTemplateId; + + theClusterParam.qBin_ = gtempl.qbin(gtemplID, + theClusterParam.cotalpha, + theClusterParam.cotbeta, + locBz, + locBx, + qclus, + false, + theClusterParam.pixmx, + theClusterParam.sigmay, + dummy, + theClusterParam.sigmax, + dummy, + theClusterParam.sy1, + dummy, + theClusterParam.sy2, + dummy, + theClusterParam.sx1, + dummy, + theClusterParam.sx2, + dummy); + + theClusterParam.sigmax = theClusterParam.sigmax * pixelCPEforDevice::micronsToCm; + theClusterParam.sx1 = theClusterParam.sx1 * pixelCPEforDevice::micronsToCm; + theClusterParam.sx2 = theClusterParam.sx2 * pixelCPEforDevice::micronsToCm; + + theClusterParam.sigmay = theClusterParam.sigmay * pixelCPEforDevice::micronsToCm; + theClusterParam.sy1 = theClusterParam.sy1 * pixelCPEforDevice::micronsToCm; + theClusterParam.sy2 = theClusterParam.sy2 * pixelCPEforDevice::micronsToCm; + + +} + +template <> +void PixelCPEFastParamsHost::errorFromTemplates(DetParam const& theDetParam, + ClusterParamGeneric& theClusterParam, + float qclus) const { + theClusterParam.qBin_ = 0.0f; +} + +//----------------------------------------------------------------------------- +//! Hit position in the local frame (in cm). Unlike other CPE's, this +//! one converts everything from the measurement frame (in channel numbers) +//! into the local frame (in centimeters). +//----------------------------------------------------------------------------- +template +LocalPoint PixelCPEFastParamsHost::localPosition(DetParam const& theDetParam, + ClusterParam& theClusterParamBase) const { + ClusterParamGeneric& theClusterParam = static_cast(theClusterParamBase); + + if (useErrorsFromTemplates_) { + errorFromTemplates(theDetParam, theClusterParam, theClusterParam.theCluster->charge()); + } else { + theClusterParam.qBin_ = 0; + } + + int q_f_X; //!< Q of the first pixel in X + int q_l_X; //!< Q of the last pixel in X + int q_f_Y; //!< Q of the first pixel in Y + int q_l_Y; //!< Q of the last pixel in Y + collect_edge_charges(theClusterParam, q_f_X, q_l_X, q_f_Y, q_l_Y, useErrorsFromTemplates_ && truncatePixelCharge_); + + // do GPU like ... + pixelCPEforDevice::ClusParams cp; + + cp.minRow[0] = theClusterParam.theCluster->minPixelRow(); + cp.maxRow[0] = theClusterParam.theCluster->maxPixelRow(); + cp.minCol[0] = theClusterParam.theCluster->minPixelCol(); + cp.maxCol[0] = theClusterParam.theCluster->maxPixelCol(); + + cp.q_f_X[0] = q_f_X; + cp.q_l_X[0] = q_l_X; + cp.q_f_Y[0] = q_f_Y; + cp.q_l_Y[0] = q_l_Y; + + cp.charge[0] = theClusterParam.theCluster->charge(); + + auto ind = theDetParam.theDet->index(); + pixelCPEforDevice::position(buffer_->commonParams(), buffer_->detParams(ind), cp, 0); + auto xPos = cp.xpos[0]; + auto yPos = cp.ypos[0]; + + // set the error (mind ape....) + pixelCPEforDevice::errorFromDB(buffer_->commonParams(), buffer_->detParams(ind), cp, 0); + theClusterParam.sigmax = cp.xerr[0]; + theClusterParam.sigmay = cp.yerr[0]; + + LogDebug("PixelCPEFastParamsHost") << " in PixelCPEFastParamsHost:localPosition - pos = " << xPos << " " << yPos << " size " + << cp.maxRow[0] - cp.minRow[0] << ' ' << cp.maxCol[0] - cp.minCol[0]; + + //--- Now put the two together + LocalPoint pos_in_local(xPos, yPos); + return pos_in_local; +} + +//============== INFLATED ERROR AND ERRORS FROM DB BELOW ================ + +//------------------------------------------------------------------------- +// Hit error in the local frame +//------------------------------------------------------------------------- +template +LocalError PixelCPEFastParamsHost::localError(DetParam const& theDetParam, + ClusterParam& theClusterParamBase) const { + ClusterParamGeneric& theClusterParam = static_cast(theClusterParamBase); + + auto xerr = theClusterParam.sigmax; + auto yerr = theClusterParam.sigmay; + + LogDebug("PixelCPEFastParamsHost") << " errors " << xerr << " " << yerr; + + auto xerr_sq = xerr * xerr; + auto yerr_sq = yerr * yerr; + + return LocalError(xerr_sq, 0, yerr_sq); +} + +template +void PixelCPEFastParamsHost::fillPSetDescription(edm::ParameterSetDescription& desc) { + // call PixelCPEGenericBase fillPSetDescription to add common rechit errors + PixelCPEGenericBase::fillPSetDescription(desc); +} + +template class PixelCPEFastParamsHost; +template class PixelCPEFastParamsHost; +template class PixelCPEFastParamsHost; + +// } // namespace pixelCPEforDevice diff --git a/RecoLocalTracker/SiPixelRecHits/src/alpaka/ES_PixelCPEFastParams.cc b/RecoLocalTracker/SiPixelRecHits/src/alpaka/ES_PixelCPEFastParams.cc new file mode 100644 index 0000000000000..3b4a2f74a8869 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/src/alpaka/ES_PixelCPEFastParams.cc @@ -0,0 +1,5 @@ +#include "RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/typelookup.h" + +TYPELOOKUP_ALPAKA_DATA_REG(PixelCPEFastParamsPhase1); +TYPELOOKUP_ALPAKA_DATA_REG(PixelCPEFastParamsPhase2); diff --git a/RecoTauTag/HLTProducers/BuildFile.xml b/RecoTauTag/HLTProducers/BuildFile.xml index 6f4aa24552400..272cca42bf2f8 100644 --- a/RecoTauTag/HLTProducers/BuildFile.xml +++ b/RecoTauTag/HLTProducers/BuildFile.xml @@ -1,3 +1,6 @@ + + + @@ -13,9 +16,9 @@ - - - + + + diff --git a/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc b/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc new file mode 100644 index 0000000000000..749a88fd603db --- /dev/null +++ b/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc @@ -0,0 +1,822 @@ +/* + * \class L2TauTagProducer + * + * L2Tau identification using Convolutional NN. + * + * \author Valeria D'Amante, Università di Siena and INFN Pisa + * Konstantin Androsov, EPFL and ETHZ +*/ +#include +#include +#include +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "DataFormats/Math/interface/deltaR.h" +#include "DataFormats/Common/interface/Handle.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "FWCore/Utilities/interface/isFinite.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "PhysicsTools/TensorFlow/interface/TensorFlow.h" +#include "Geometry/CaloGeometry/interface/CaloCellGeometry.h" +#include "Geometry/CaloGeometry/interface/CaloGeometry.h" +#include "Geometry/CaloTopology/interface/HcalTopology.h" +#include "Geometry/Records/interface/CaloGeometryRecord.h" +#include "DataFormats/CaloRecHit/interface/CaloRecHit.h" +#include "DataFormats/EcalRecHit/interface/EcalRecHit.h" +#include "DataFormats/EcalRecHit/interface/EcalRecHitCollections.h" +#include "DataFormats/EcalDetId/interface/EcalDetIdCollections.h" +#include "DataFormats/HcalDetId/interface/HcalDetId.h" +#include "DataFormats/HcalRecHit/interface/HBHERecHit.h" +#include "DataFormats/HcalRecHit/interface/HcalRecHitDefs.h" +#include "DataFormats/HcalRecHit/interface/HFRecHit.h" +#include "DataFormats/HcalRecHit/interface/HORecHit.h" +#include "DataFormats/HLTReco/interface/TriggerTypeDefs.h" +#include "DataFormats/HLTReco/interface/TriggerFilterObjectWithRefs.h" +#include "TrackingTools/TrajectoryParametrization/interface/CurvilinearTrajectoryError.h" +#include "RecoTracker/PixelTrackFitting/interface/FitUtils.h" +#include "TrackingTools/TrajectoryParametrization/interface/GlobalTrajectoryParameters.h" +#include "DataFormats/TrackReco/interface/HitPattern.h" +#include "TrackingTools/AnalyticalJacobians/interface/JacobianLocalToCurvilinear.h" +#include "DataFormats/TrajectoryState/interface/LocalTrajectoryParameters.h" +#include "DataFormats/GeometrySurface/interface/Plane.h" +#include "DataFormats/BeamSpot/interface/BeamSpot.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" + +#include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h" +#include "DataFormats/TrackSoA/interface/TrackSoAHost.h" +#include "DataFormats/Vertex/interface/ZVertexSoAHost.h" + +namespace L2TauTagNNv1 { + constexpr int nCellEta = 5; + constexpr int nCellPhi = 5; + constexpr int nVars = 31; + constexpr float dR_max = 0.5; + enum class NNInputs { + nVertices = 0, + l1Tau_pt, + l1Tau_eta, + l1Tau_hwIso, + EcalEnergySum, + EcalSize, + EcalEnergyStdDev, + EcalDeltaEta, + EcalDeltaPhi, + EcalChi2, + EcalEnergySumForPositiveChi2, + EcalSizeForPositiveChi2, + HcalEnergySum, + HcalSize, + HcalEnergyStdDev, + HcalDeltaEta, + HcalDeltaPhi, + HcalChi2, + HcalEnergySumForPositiveChi2, + HcalSizeForPositiveChi2, + PatatrackPtSum, + PatatrackSize, + PatatrackSizeWithVertex, + PatatrackPtSumWithVertex, + PatatrackChargeSum, + PatatrackDeltaEta, + PatatrackDeltaPhi, + PatatrackChi2OverNdof, + PatatrackNdof, + PatatrackDxy, + PatatrackDz + }; + + const std::map varNameMap = { + {NNInputs::nVertices, "nVertices"}, + {NNInputs::l1Tau_pt, "l1Tau_pt"}, + {NNInputs::l1Tau_eta, "l1Tau_eta"}, + {NNInputs::l1Tau_hwIso, "l1Tau_hwIso"}, + {NNInputs::EcalEnergySum, "EcalEnergySum"}, + {NNInputs::EcalSize, "EcalSize"}, + {NNInputs::EcalEnergyStdDev, "EcalEnergyStdDev"}, + {NNInputs::EcalDeltaEta, "EcalDeltaEta"}, + {NNInputs::EcalDeltaPhi, "EcalDeltaPhi"}, + {NNInputs::EcalChi2, "EcalChi2"}, + {NNInputs::EcalEnergySumForPositiveChi2, "EcalEnergySumForPositiveChi2"}, + {NNInputs::EcalSizeForPositiveChi2, "EcalSizeForPositiveChi2"}, + {NNInputs::HcalEnergySum, "HcalEnergySum"}, + {NNInputs::HcalSize, "HcalSize"}, + {NNInputs::HcalEnergyStdDev, "HcalEnergyStdDev"}, + {NNInputs::HcalDeltaEta, "HcalDeltaEta"}, + {NNInputs::HcalDeltaPhi, "HcalDeltaPhi"}, + {NNInputs::HcalChi2, "HcalChi2"}, + {NNInputs::HcalEnergySumForPositiveChi2, "HcalEnergySumForPositiveChi2"}, + {NNInputs::HcalSizeForPositiveChi2, "HcalSizeForPositiveChi2"}, + {NNInputs::PatatrackPtSum, "PatatrackPtSum"}, + {NNInputs::PatatrackSize, "PatatrackSize"}, + {NNInputs::PatatrackSizeWithVertex, "PatatrackSizeWithVertex"}, + {NNInputs::PatatrackPtSumWithVertex, "PatatrackPtSumWithVertex"}, + {NNInputs::PatatrackChargeSum, "PatatrackChargeSum"}, + {NNInputs::PatatrackDeltaEta, "PatatrackDeltaEta"}, + {NNInputs::PatatrackDeltaPhi, "PatatrackDeltaPhi"}, + {NNInputs::PatatrackChi2OverNdof, "PatatrackChi2OverNdof"}, + {NNInputs::PatatrackNdof, "PatatrackNdof"}, + {NNInputs::PatatrackDxy, "PatatrackDxy"}, + {NNInputs::PatatrackDz, "PatatrackDz"}}; +} // namespace L2TauTagNNv1 +namespace { + inline float& getCellImpl( + tensorflow::Tensor& cellGridMatrix, int tau_idx, int phi_idx, int eta_idx, L2TauTagNNv1::NNInputs NNInput_idx) { + return cellGridMatrix.tensor()(tau_idx, phi_idx, eta_idx, static_cast(NNInput_idx)); + } +} // namespace +struct normDictElement { + float mean; + float std; + float min; + float max; +}; + +struct L2TauNNProducerAlpakaCacheData { + L2TauNNProducerAlpakaCacheData() : graphDef(nullptr), session(nullptr) {} + tensorflow::GraphDef* graphDef; + tensorflow::Session* session; + std::vector normVec; +}; + +class L2TauNNProducerAlpaka : public edm::stream::EDProducer> { +public: + using TrackSoAHost = pixelTrack::TrackSoAHostPhase1; + + struct caloRecHitCollections { + const HBHERecHitCollection* hbhe; + const HORecHitCollection* ho; + const EcalRecHitCollection* eb; + const EcalRecHitCollection* ee; + const CaloGeometry* geometry; + }; + + struct InputDescTau { + std::string CollectionName; + edm::EDGetTokenT inputToken_; + }; + + static constexpr float dR2_max = L2TauTagNNv1::dR_max * L2TauTagNNv1::dR_max; + static constexpr float dEta_width = 2 * L2TauTagNNv1::dR_max / static_cast(L2TauTagNNv1::nCellEta); + static constexpr float dPhi_width = 2 * L2TauTagNNv1::dR_max / static_cast(L2TauTagNNv1::nCellPhi); + + explicit L2TauNNProducerAlpaka(const edm::ParameterSet&, const L2TauNNProducerAlpakaCacheData*); + static void fillDescriptions(edm::ConfigurationDescriptions&); + static std::unique_ptr initializeGlobalCache(const edm::ParameterSet&); + static void globalEndJob(L2TauNNProducerAlpakaCacheData*); + +private: + void checknan(tensorflow::Tensor& tensor, int debugLevel); + void standardizeTensor(tensorflow::Tensor& tensor); + std::vector getTauScore(const tensorflow::Tensor& cellGridMatrix); + void produce(edm::Event& event, const edm::EventSetup& eventsetup) override; + void fillL1TauVars(tensorflow::Tensor& cellGridMatrix, const std::vector& allTaus); + void fillCaloRecHits(tensorflow::Tensor& cellGridMatrix, + const std::vector& allTaus, + const caloRecHitCollections& caloRecHits); + void fillPatatracks(tensorflow::Tensor& cellGridMatrix, + const std::vector& allTaus, + const TrackSoAHost& patatracks_tsoa, + const ZVertexHost& patavtx_soa, + const reco::BeamSpot& beamspot, + const MagneticField* magfi); + void selectGoodTracksAndVertices(const ZVertexHost& patavtx_soa, + const TrackSoAHost& patatracks_tsoa, + std::vector& trkGood, + std::vector& vtxGood); + std::pair impactParameter(int it, + const TrackSoAHost& patatracks_tsoa, + float patatrackPhi, + const reco::BeamSpot& beamspot, + const MagneticField* magfi); + template + std::tuple getEtaPhiIndices(const VPos& position, const LVec& tau_p4); + template + std::tuple getEtaPhiIndices(float eta, float phi, const LVec& tau_p4); + +private: + const int debugLevel_; + const edm::EDGetTokenT tauTriggerToken_; + std::vector L1TauDesc_; + const edm::EDGetTokenT hbheToken_; + const edm::EDGetTokenT hoToken_; + const edm::EDGetTokenT ebToken_; + const edm::EDGetTokenT eeToken_; + const edm::ESGetToken geometryToken_; + const edm::ESGetToken bFieldToken_; + const edm::EDGetTokenT pataVerticesToken_; + const edm::EDGetTokenT pataTracksToken_; + const edm::EDGetTokenT beamSpotToken_; + const unsigned int maxVtx_; + const float fractionSumPt2_; + const float minSumPt2_; + const float trackPtMin_; + const float trackPtMax_; + const float trackChi2Max_; + std::string inputTensorName_; + std::string outputTensorName_; + const L2TauNNProducerAlpakaCacheData* L2cacheData_; +}; + +std::unique_ptr L2TauNNProducerAlpaka::initializeGlobalCache( + const edm::ParameterSet& cfg) { + std::unique_ptr cacheData = std::make_unique(); + cacheData->normVec.reserve(L2TauTagNNv1::nVars); + + auto const graphPath = edm::FileInPath(cfg.getParameter("graphPath")).fullPath(); + + cacheData->graphDef = tensorflow::loadGraphDef(graphPath); + cacheData->session = tensorflow::createSession(cacheData->graphDef); + + tensorflow::setLogging("2"); + + boost::property_tree::ptree loadPtreeRoot; + auto const normalizationDict = edm::FileInPath(cfg.getParameter("normalizationDict")).fullPath(); + boost::property_tree::read_json(normalizationDict, loadPtreeRoot); + for (const auto& [key, val] : L2TauTagNNv1::varNameMap) { + boost::property_tree::ptree var = loadPtreeRoot.get_child(val); + normDictElement current_element; + current_element.mean = var.get_child("mean").get_value(); + current_element.std = var.get_child("std").get_value(); + current_element.min = var.get_child("min").get_value(); + current_element.max = var.get_child("max").get_value(); + cacheData->normVec.push_back(current_element); + } + return cacheData; +} +void L2TauNNProducerAlpaka::globalEndJob(L2TauNNProducerAlpakaCacheData* cacheData) { + if (cacheData->graphDef != nullptr) { + delete cacheData->graphDef; + } + tensorflow::closeSession(cacheData->session); +} +void L2TauNNProducerAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("debugLevel", 0)->setComment("set debug level for printing out info"); + edm::ParameterSetDescription l1TausPset; + l1TausPset.add("L1CollectionName", "DoubleTau")->setComment("Name of collections"); + l1TausPset.add("L1TauTrigger", edm::InputTag("hltL1sDoubleTauBigOR")) + ->setComment("Which trigger should the L1 Taus collection pass"); + edm::ParameterSet l1TausPSetDefault; + l1TausPSetDefault.addParameter("L1CollectionName", "DoubleTau"); + l1TausPSetDefault.addParameter("L1TauTrigger", edm::InputTag("hltL1sDoubleTauBigOR")); + desc.addVPSet("L1Taus", l1TausPset, {l1TausPSetDefault}); + desc.add("hbheInput", edm::InputTag("hltHbhereco"))->setComment("HBHE recHit collection"); + desc.add("hoInput", edm::InputTag("hltHoreco"))->setComment("HO recHit Collection"); + desc.add("ebInput", edm::InputTag("hltEcalRecHit:EcalRecHitsEB"))->setComment("EB recHit Collection"); + desc.add("eeInput", edm::InputTag("hltEcalRecHit:EcalRecHitsEE"))->setComment("EE recHit Collection"); + desc.add("pataVertices", edm::InputTag("hltPixelVerticesSoA")) + ->setComment("patatrack vertices collection"); + desc.add("pataTracks", edm::InputTag("hltPixelTracksSoA"))->setComment("patatrack collection"); + desc.add("BeamSpot", edm::InputTag("hltOnlineBeamSpot"))->setComment("BeamSpot Collection"); + desc.add("maxVtx", 100)->setComment("max output collection size (number of accepted vertices)"); + desc.add("fractionSumPt2", 0.3)->setComment("threshold on sumPt2 fraction of the leading vertex"); + desc.add("minSumPt2", 0.)->setComment("min sumPt2"); + desc.add("track_pt_min", 1.0)->setComment("min track p_T"); + desc.add("track_pt_max", 10.0)->setComment("max track p_T"); + desc.add("track_chi2_max", 99999.)->setComment("max track chi2"); + desc.add("graphPath", "RecoTauTag/TrainingFiles/data/L2TauNNTag/L2TauTag_Run3v1.pb") + ->setComment("path to the saved CNN"); + desc.add("normalizationDict", "RecoTauTag/TrainingFiles/data/L2TauNNTag/NormalizationDict.json") + ->setComment("path to the dictionary for variable standardization"); + descriptions.addWithDefaultLabel(desc); +} + +L2TauNNProducerAlpaka::L2TauNNProducerAlpaka(const edm::ParameterSet& cfg, + const L2TauNNProducerAlpakaCacheData* cacheData) + : debugLevel_(cfg.getParameter("debugLevel")), + hbheToken_(consumes(cfg.getParameter("hbheInput"))), + hoToken_(consumes(cfg.getParameter("hoInput"))), + ebToken_(consumes(cfg.getParameter("ebInput"))), + eeToken_(consumes(cfg.getParameter("eeInput"))), + geometryToken_(esConsumes()), + bFieldToken_(esConsumes()), + pataVerticesToken_(consumes(cfg.getParameter("pataVertices"))), + pataTracksToken_(consumes(cfg.getParameter("pataTracks"))), + beamSpotToken_(consumes(cfg.getParameter("BeamSpot"))), + maxVtx_(cfg.getParameter("maxVtx")), + fractionSumPt2_(cfg.getParameter("fractionSumPt2")), + minSumPt2_(cfg.getParameter("minSumPt2")), + trackPtMin_(cfg.getParameter("track_pt_min")), + trackPtMax_(cfg.getParameter("track_pt_max")), + trackChi2Max_(cfg.getParameter("track_chi2_max")) { + if (cacheData->graphDef == nullptr) { + throw cms::Exception("InvalidCacheData") << "Invalid Cache Data."; + } + inputTensorName_ = cacheData->graphDef->node(0).name(); + outputTensorName_ = cacheData->graphDef->node(cacheData->graphDef->node_size() - 1).name(); + L2cacheData_ = cacheData; + std::vector L1TauCollections = cfg.getParameter>("L1Taus"); + L1TauDesc_.reserve(L1TauCollections.size()); + for (const auto& l1TauInput : L1TauCollections) { + InputDescTau toInsert; + toInsert.CollectionName = l1TauInput.getParameter("L1CollectionName"); + toInsert.inputToken_ = + consumes(l1TauInput.getParameter("L1TauTrigger")); + L1TauDesc_.push_back(toInsert); + } + for (const auto& desc : L1TauDesc_) + produces>(desc.CollectionName); +} + +void L2TauNNProducerAlpaka::checknan(tensorflow::Tensor& tensor, int debugLevel) { + using NNInputs = L2TauTagNNv1::NNInputs; + std::vector tensor_shape(tensor.shape().dims()); + for (int d = 0; d < tensor.shape().dims(); d++) { + tensor_shape.at(d) = tensor.shape().dim_size(d); + } + if (tensor_shape.size() != 4) { + throw cms::Exception("InvalidTensor") << "Tensor shape does not have 4 dimensions!"; + } + for (int tau_idx = 0; tau_idx < tensor_shape.at(0); tau_idx++) { + for (int phi_idx = 0; phi_idx < tensor_shape.at(1); phi_idx++) { + for (int eta_idx = 0; eta_idx < tensor_shape.at(2); eta_idx++) { + for (int var_idx = 0; var_idx < tensor_shape.at(3); var_idx++) { + auto getCell = [&](NNInputs input) -> float& { + return getCellImpl(tensor, tau_idx, phi_idx, eta_idx, input); + }; + auto nonstd_var = getCell(static_cast(var_idx)); + if (edm::isNotFinite(nonstd_var)) { + edm::LogWarning("InputVar") << "var is nan \nvar name= " + << L2TauTagNNv1::varNameMap.at(static_cast(var_idx)) + << "\t var_idx = " << var_idx << "\t eta_idx = " << eta_idx + << "\t phi_idx = " << phi_idx << "\t tau_idx = " << tau_idx; + if (debugLevel > 2) { + edm::LogWarning("InputVar") << "other vars in same cell \n"; + if (var_idx + 1 < tensor_shape.at(3)) + edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast(var_idx + 1)) + << "\t = " << getCell(static_cast(var_idx + 1)); + if (var_idx + 2 < tensor_shape.at(3)) + edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast(var_idx + 2)) + << "\t = " << getCell(static_cast(var_idx + 2)); + if (var_idx + 3 < tensor_shape.at(3)) + edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast(var_idx + 3)) + << "\t = " << getCell(static_cast(var_idx + 3)); + if (var_idx + 4 < tensor_shape.at(3)) + edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast(var_idx + 4)) + << "\t = " << getCell(static_cast(var_idx + 4)); + } + } + } + } + } + } +} + +void L2TauNNProducerAlpaka::standardizeTensor(tensorflow::Tensor& tensor) { + using NNInputs = L2TauTagNNv1::NNInputs; + std::vector tensor_shape(tensor.shape().dims()); + for (int d = 0; d < tensor.shape().dims(); d++) { + tensor_shape.at(d) = tensor.shape().dim_size(d); + } + if (tensor_shape.size() != 4) { + throw cms::Exception("InvalidTensor") << "Tensor shape does not have 4 dimensions!"; + } + for (int tau_idx = 0; tau_idx < tensor_shape.at(0); tau_idx++) { + for (int phi_idx = 0; phi_idx < tensor_shape.at(1); phi_idx++) { + for (int eta_idx = 0; eta_idx < tensor_shape.at(2); eta_idx++) { + for (int var_idx = 0; var_idx < tensor_shape.at(3); var_idx++) { + auto getCell = [&](NNInputs input) -> float& { + return getCellImpl(tensor, tau_idx, phi_idx, eta_idx, input); + }; + float mean = L2cacheData_->normVec.at(var_idx).mean; + float std = L2cacheData_->normVec.at(var_idx).std; + float min = L2cacheData_->normVec.at(var_idx).min; + float max = L2cacheData_->normVec.at(var_idx).max; + float nonstd_var = getCell(static_cast(var_idx)); + float std_var = static_cast((nonstd_var - mean) / std); + if (std_var > max) { + std_var = static_cast(max); + } else if (std_var < min) { + std_var = static_cast(min); + } + getCell(static_cast(var_idx)) = std_var; + } + } + } + } +} + +void L2TauNNProducerAlpaka::fillL1TauVars(tensorflow::Tensor& cellGridMatrix, const std::vector& allTaus) { + using NNInputs = L2TauTagNNv1::NNInputs; + + const int nTaus = allTaus.size(); + for (int tau_idx = 0; tau_idx < nTaus; tau_idx++) { + for (int eta_idx = 0; eta_idx < L2TauTagNNv1::nCellEta; eta_idx++) { + for (int phi_idx = 0; phi_idx < L2TauTagNNv1::nCellPhi; phi_idx++) { + auto getCell = [&](NNInputs input) -> float& { + return getCellImpl(cellGridMatrix, tau_idx, phi_idx, eta_idx, input); + }; + getCell(NNInputs::l1Tau_pt) = allTaus[tau_idx]->pt(); + getCell(NNInputs::l1Tau_eta) = allTaus[tau_idx]->eta(); + getCell(NNInputs::l1Tau_hwIso) = allTaus[tau_idx]->hwIso(); + } + } + } +} + +template +std::tuple L2TauNNProducerAlpaka::getEtaPhiIndices(float eta, float phi, const LVec& tau_p4) { + const float deta = eta - tau_p4.eta(); + const float dphi = reco::deltaPhi(phi, tau_p4.phi()); + const int eta_idx = static_cast(floor((deta + L2TauTagNNv1::dR_max) / dEta_width)); + const int phi_idx = static_cast(floor((dphi + L2TauTagNNv1::dR_max) / dPhi_width)); + return std::make_tuple(deta, dphi, eta_idx, phi_idx); +} + +template +std::tuple L2TauNNProducerAlpaka::getEtaPhiIndices(const VPos& position, const LVec& tau_p4) { + return getEtaPhiIndices(position.eta(), position.phi(), tau_p4); +} + +void L2TauNNProducerAlpaka::fillCaloRecHits(tensorflow::Tensor& cellGridMatrix, + const std::vector& allTaus, + const caloRecHitCollections& caloRecHits) { + using NNInputs = L2TauTagNNv1::NNInputs; + + const int nTaus = allTaus.size(); + float deta, dphi; + int eta_idx = 0; + int phi_idx = 0; + int tau_idx = 0; + + auto getCell = [&](NNInputs input) -> float& { + return getCellImpl(cellGridMatrix, tau_idx, phi_idx, eta_idx, input); + }; + for (tau_idx = 0; tau_idx < nTaus; tau_idx++) { + // calorechit_EE + for (const auto& caloRecHit_ee : *caloRecHits.ee) { + if (caloRecHit_ee.energy() <= 0) + continue; + const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_ee.id())->getPosition(); + const float eeCalEn = caloRecHit_ee.energy(); + const float eeCalChi2 = caloRecHit_ee.chi2(); + if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) { + std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4()); + getCell(NNInputs::EcalEnergySum) += eeCalEn; + getCell(NNInputs::EcalSize) += 1.; + getCell(NNInputs::EcalEnergyStdDev) += eeCalEn * eeCalEn; + getCell(NNInputs::EcalDeltaEta) += deta * eeCalEn; + getCell(NNInputs::EcalDeltaPhi) += dphi * eeCalEn; + if (eeCalChi2 >= 0) { + getCell(NNInputs::EcalChi2) += eeCalChi2 * eeCalEn; + getCell(NNInputs::EcalEnergySumForPositiveChi2) += eeCalEn; + getCell(NNInputs::EcalSizeForPositiveChi2) += 1.; + } + } + } + + // calorechit_EB + for (const auto& caloRecHit_eb : *caloRecHits.eb) { + if (caloRecHit_eb.energy() <= 0) + continue; + const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_eb.id())->getPosition(); + const float ebCalEn = caloRecHit_eb.energy(); + const float ebCalChi2 = caloRecHit_eb.chi2(); + if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) { + std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4()); + getCell(NNInputs::EcalEnergySum) += ebCalEn; + getCell(NNInputs::EcalSize) += 1.; + getCell(NNInputs::EcalEnergyStdDev) += ebCalEn * ebCalEn; + getCell(NNInputs::EcalDeltaEta) += deta * ebCalEn; + getCell(NNInputs::EcalDeltaPhi) += dphi * ebCalEn; + if (ebCalChi2 >= 0) { + getCell(NNInputs::EcalChi2) += ebCalChi2 * ebCalEn; + getCell(NNInputs::EcalEnergySumForPositiveChi2) += ebCalEn; + getCell(NNInputs::EcalSizeForPositiveChi2) += 1.; + } + } + } + + // calorechit_HBHE + for (const auto& caloRecHit_hbhe : *caloRecHits.hbhe) { + if (caloRecHit_hbhe.energy() <= 0) + continue; + const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_hbhe.id())->getPosition(); + const float hbheCalEn = caloRecHit_hbhe.energy(); + const float hbheCalChi2 = caloRecHit_hbhe.chi2(); + if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) { + std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4()); + getCell(NNInputs::HcalEnergySum) += hbheCalEn; + getCell(NNInputs::HcalEnergyStdDev) += hbheCalEn * hbheCalEn; + getCell(NNInputs::HcalSize) += 1.; + getCell(NNInputs::HcalDeltaEta) += deta * hbheCalEn; + getCell(NNInputs::HcalDeltaPhi) += dphi * hbheCalEn; + if (hbheCalChi2 >= 0) { + getCell(NNInputs::HcalChi2) += hbheCalChi2 * hbheCalEn; + getCell(NNInputs::HcalEnergySumForPositiveChi2) += hbheCalEn; + getCell(NNInputs::HcalSizeForPositiveChi2) += 1.; + } + } + } + + // calorechit_HO + for (const auto& caloRecHit_ho : *caloRecHits.ho) { + if (caloRecHit_ho.energy() <= 0) + continue; + const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_ho.id())->getPosition(); + const float hoCalEn = caloRecHit_ho.energy(); + if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) { + std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4()); + getCell(NNInputs::HcalEnergySum) += hoCalEn; + getCell(NNInputs::HcalEnergyStdDev) += hoCalEn * hoCalEn; + getCell(NNInputs::HcalSize) += 1.; + getCell(NNInputs::HcalDeltaEta) += deta * hoCalEn; + getCell(NNInputs::HcalDeltaPhi) += dphi * hoCalEn; + } + } + + // normalize to sum and define stdDev + for (eta_idx = 0; eta_idx < L2TauTagNNv1::nCellEta; eta_idx++) { + for (phi_idx = 0; phi_idx < L2TauTagNNv1::nCellPhi; phi_idx++) { + /* normalize eCal vars*/ + if (getCell(NNInputs::EcalEnergySum) > 0.) { + getCell(NNInputs::EcalDeltaEta) /= getCell(NNInputs::EcalEnergySum); + getCell(NNInputs::EcalDeltaPhi) /= getCell(NNInputs::EcalEnergySum); + } + if (getCell(NNInputs::EcalEnergySumForPositiveChi2) > 0.) { + getCell(NNInputs::EcalChi2) /= getCell(NNInputs::EcalEnergySumForPositiveChi2); + } + if (getCell(NNInputs::EcalSize) > 1.) { + // (stdDev - (enSum*enSum)/size) / (size-1) + getCell(NNInputs::EcalEnergyStdDev) = + (getCell(NNInputs::EcalEnergyStdDev) - + (getCell(NNInputs::EcalEnergySum) * getCell(NNInputs::EcalEnergySum)) / getCell(NNInputs::EcalSize)) / + (getCell(NNInputs::EcalSize) - 1); + } else { + getCell(NNInputs::EcalEnergyStdDev) = 0.; + } + /* normalize hCal Vars */ + if (getCell(NNInputs::HcalEnergySum) > 0.) { + getCell(NNInputs::HcalDeltaEta) /= getCell(NNInputs::HcalEnergySum); + getCell(NNInputs::HcalDeltaPhi) /= getCell(NNInputs::HcalEnergySum); + } + if (getCell(NNInputs::HcalEnergySumForPositiveChi2) > 0.) { + getCell(NNInputs::HcalChi2) /= getCell(NNInputs::HcalEnergySumForPositiveChi2); + } + if (getCell(NNInputs::HcalSize) > 1.) { + // (stdDev - (enSum*enSum)/size) / (size-1) + getCell(NNInputs::HcalEnergyStdDev) = + (getCell(NNInputs::HcalEnergyStdDev) - + (getCell(NNInputs::HcalEnergySum) * getCell(NNInputs::HcalEnergySum)) / getCell(NNInputs::HcalSize)) / + (getCell(NNInputs::HcalSize) - 1); + } else { + getCell(NNInputs::HcalEnergyStdDev) = 0.; + } + } + } + } +} + +void L2TauNNProducerAlpaka::selectGoodTracksAndVertices(const ZVertexHost& patavtx_soa, + const TrackSoAHost& patatracks_tsoa, + std::vector& trkGood, + std::vector& vtxGood) { + using patatrackHelpers = TracksUtilities; + const auto maxTracks = patatracks_tsoa.view().metadata().size(); + const int nv = patavtx_soa.view().nvFinal(); + trkGood.clear(); + trkGood.reserve(maxTracks); + vtxGood.clear(); + vtxGood.reserve(nv); + auto const* quality = patatracks_tsoa.view().quality(); + + // No need to sort either as the algorithms is just using the max (not even the location, just the max value of pt2sum). + std::vector pTSquaredSum(nv, 0); + std::vector nTrkAssociated(nv, 0); + + for (int32_t trk_idx = 0; trk_idx < maxTracks; ++trk_idx) { + auto nHits = patatrackHelpers::nHits(patatracks_tsoa.view(), trk_idx); + if (nHits == 0) { + break; + } + int vtx_ass_to_track = patavtx_soa.view()[trk_idx].idv(); + if (vtx_ass_to_track >= 0 && vtx_ass_to_track < nv) { + auto patatrackPt = patatracks_tsoa.view()[trk_idx].pt(); + ++nTrkAssociated[vtx_ass_to_track]; + if (patatrackPt >= trackPtMin_ && patatracks_tsoa.const_view()[trk_idx].chi2() <= trackChi2Max_) { + patatrackPt = std::min(patatrackPt, trackPtMax_); + pTSquaredSum[vtx_ass_to_track] += patatrackPt * patatrackPt; + } + } + if (nHits > 0 and quality[trk_idx] >= pixelTrack::Quality::loose) { + trkGood.push_back(trk_idx); + } + } + if (nv > 0) { + const auto minFOM_fromFrac = (*std::max_element(pTSquaredSum.begin(), pTSquaredSum.end())) * fractionSumPt2_; + for (int j = nv - 1; j >= 0 && vtxGood.size() < maxVtx_; --j) { + auto vtx_idx = patavtx_soa.view()[j].sortInd(); + assert(vtx_idx < nv); + if (nTrkAssociated[vtx_idx] >= 2 && pTSquaredSum[vtx_idx] >= minFOM_fromFrac && + pTSquaredSum[vtx_idx] > minSumPt2_) { + vtxGood.push_back(vtx_idx); + } + } + } +} + +std::pair L2TauNNProducerAlpaka::impactParameter(int it, + const TrackSoAHost& patatracks_tsoa, + float patatrackPhi, + const reco::BeamSpot& beamspot, + const MagneticField* magfi) { + /* dxy and dz */ + riemannFit::Vector5d ipar, opar; + riemannFit::Matrix5d icov, ocov; + TracksUtilities::copyToDense(patatracks_tsoa.view(), ipar, icov, it); + riemannFit::transformToPerigeePlane(ipar, icov, opar, ocov); + LocalTrajectoryParameters lpar(opar(0), opar(1), opar(2), opar(3), opar(4), 1.); + float sp = std::sin(patatrackPhi); + float cp = std::cos(patatrackPhi); + Surface::RotationType Rotation(sp, -cp, 0, 0, 0, -1.f, cp, sp, 0); + GlobalPoint BeamSpotPoint(beamspot.x0(), beamspot.y0(), beamspot.z0()); + Plane impPointPlane(BeamSpotPoint, Rotation); + GlobalTrajectoryParameters gp( + impPointPlane.toGlobal(lpar.position()), impPointPlane.toGlobal(lpar.momentum()), lpar.charge(), magfi); + GlobalPoint vv = gp.position(); + math::XYZPoint pos(vv.x(), vv.y(), vv.z()); + GlobalVector pp = gp.momentum(); + math::XYZVector mom(pp.x(), pp.y(), pp.z()); + auto lambda = M_PI_2 - pp.theta(); + auto phi = pp.phi(); + float patatrackDxy = -vv.x() * std::sin(phi) + vv.y() * std::cos(phi); + float patatrackDz = + (vv.z() * std::cos(lambda) - (vv.x() * std::cos(phi) + vv.y() * std::sin(phi)) * std::sin(lambda)) / + std::cos(lambda); + return std::make_pair(patatrackDxy, patatrackDz); +} + +void L2TauNNProducerAlpaka::fillPatatracks(tensorflow::Tensor& cellGridMatrix, + const std::vector& allTaus, + const TrackSoAHost& patatracks_tsoa, + const ZVertexHost& patavtx_soa, + const reco::BeamSpot& beamspot, + const MagneticField* magfi) { + using NNInputs = L2TauTagNNv1::NNInputs; + using patatrackHelpers = TracksUtilities; + float deta, dphi; + int eta_idx = 0; + int phi_idx = 0; + int tau_idx = 0; + + auto getCell = [&](NNInputs input) -> float& { + return getCellImpl(cellGridMatrix, tau_idx, phi_idx, eta_idx, input); + }; + + std::vector trkGood; + std::vector vtxGood; + + selectGoodTracksAndVertices(patavtx_soa, patatracks_tsoa, trkGood, vtxGood); + + const int nTaus = allTaus.size(); + for (tau_idx = 0; tau_idx < nTaus; tau_idx++) { + const float tauEta = allTaus[tau_idx]->eta(); + const float tauPhi = allTaus[tau_idx]->phi(); + + for (const auto it : trkGood) { + const float patatrackPt = patatracks_tsoa.const_view()[it].pt(); + if (patatrackPt <= 0) + continue; + const float patatrackPhi = patatrackHelpers::phi(patatracks_tsoa.const_view(), it); + const float patatrackEta = patatracks_tsoa.const_view()[it].eta(); + const float patatrackCharge = patatrackHelpers::charge(patatracks_tsoa.const_view(), it); + const float patatrackChi2OverNdof = patatracks_tsoa.view()[it].chi2(); + const auto nHits = patatrackHelpers::nHits(patatracks_tsoa.const_view(), it); + if (nHits <= 0) + continue; + const int patatrackNdof = 2 * std::min(6, nHits) - 5; + + const int vtx_idx_assTrk = patavtx_soa.view()[it].idv(); + if (reco::deltaR2(patatrackEta, patatrackPhi, tauEta, tauPhi) < dR2_max) { + std::tie(deta, dphi, eta_idx, phi_idx) = + getEtaPhiIndices(patatrackEta, patatrackPhi, allTaus[tau_idx]->polarP4()); + getCell(NNInputs::PatatrackPtSum) += patatrackPt; + getCell(NNInputs::PatatrackSize) += 1.; + getCell(NNInputs::PatatrackChargeSum) += patatrackCharge; + getCell(NNInputs::PatatrackDeltaEta) += deta * patatrackPt; + getCell(NNInputs::PatatrackDeltaPhi) += dphi * patatrackPt; + getCell(NNInputs::PatatrackChi2OverNdof) += patatrackChi2OverNdof * patatrackPt; + getCell(NNInputs::PatatrackNdof) += patatrackNdof * patatrackPt; + std::pair impactParameters = impactParameter(it, patatracks_tsoa, patatrackPhi, beamspot, magfi); + getCell(NNInputs::PatatrackDxy) += impactParameters.first * patatrackPt; + getCell(NNInputs::PatatrackDz) += impactParameters.second * patatrackPt; + if ((std::find(vtxGood.begin(), vtxGood.end(), vtx_idx_assTrk) != vtxGood.end())) { + getCell(NNInputs::PatatrackPtSumWithVertex) += patatrackPt; + getCell(NNInputs::PatatrackSizeWithVertex) += 1.; + } + } + } + + // normalize to sum and define stdDev + for (eta_idx = 0; eta_idx < L2TauTagNNv1::nCellEta; eta_idx++) { + for (phi_idx = 0; phi_idx < L2TauTagNNv1::nCellPhi; phi_idx++) { + getCell(NNInputs::nVertices) = vtxGood.size(); + if (getCell(NNInputs::PatatrackPtSum) > 0.) { + getCell(NNInputs::PatatrackDeltaEta) /= getCell(NNInputs::PatatrackPtSum); + getCell(NNInputs::PatatrackDeltaPhi) /= getCell(NNInputs::PatatrackPtSum); + getCell(NNInputs::PatatrackChi2OverNdof) /= getCell(NNInputs::PatatrackPtSum); + getCell(NNInputs::PatatrackNdof) /= getCell(NNInputs::PatatrackPtSum); + getCell(NNInputs::PatatrackDxy) /= getCell(NNInputs::PatatrackPtSum); + getCell(NNInputs::PatatrackDz) /= getCell(NNInputs::PatatrackPtSum); + } + } + } + } +} + +std::vector L2TauNNProducerAlpaka::getTauScore(const tensorflow::Tensor& cellGridMatrix) { + std::vector pred_tensor; + tensorflow::run(L2cacheData_->session, {{inputTensorName_, cellGridMatrix}}, {outputTensorName_}, &pred_tensor); + const int nTau = cellGridMatrix.shape().dim_size(0); + std::vector pred_vector(nTau); + for (int tau_idx = 0; tau_idx < nTau; ++tau_idx) { + pred_vector[tau_idx] = pred_tensor[0].matrix()(tau_idx, 0); + } + + return pred_vector; +} + +void L2TauNNProducerAlpaka::produce(edm::Event& event, const edm::EventSetup& eventsetup) { + std::vector> TauCollectionMap(L1TauDesc_.size()); + l1t::TauVectorRef allTaus; + + for (size_t inp_idx = 0; inp_idx < L1TauDesc_.size(); inp_idx++) { + l1t::TauVectorRef l1Taus; + auto const& l1TriggeredTaus = event.get(L1TauDesc_[inp_idx].inputToken_); + l1TriggeredTaus.getObjects(trigger::TriggerL1Tau, l1Taus); + TauCollectionMap.at(inp_idx).resize(l1Taus.size()); + + for (size_t l1_idx = 0; l1_idx < l1Taus.size(); l1_idx++) { + size_t tau_idx; + const auto iter = std::find(allTaus.begin(), allTaus.end(), l1Taus[l1_idx]); + if (iter != allTaus.end()) { + tau_idx = std::distance(allTaus.begin(), iter); + } else { + allTaus.push_back(l1Taus[l1_idx]); + tau_idx = allTaus.size() - 1; + } + TauCollectionMap.at(inp_idx).at(l1_idx) = tau_idx; + } + } + const auto ebCal = event.getHandle(ebToken_); + const auto eeCal = event.getHandle(eeToken_); + const auto hbhe = event.getHandle(hbheToken_); + const auto ho = event.getHandle(hoToken_); + auto const& patatracks_SoA = event.get(pataTracksToken_); + auto const& vertices_SoA = event.get(pataVerticesToken_); + const auto bsHandle = event.getHandle(beamSpotToken_); + + auto const fieldESH = eventsetup.getHandle(bFieldToken_); + auto const geometry = eventsetup.getHandle(geometryToken_); + + caloRecHitCollections caloRecHits; + caloRecHits.hbhe = &*hbhe; + caloRecHits.ho = &*ho; + caloRecHits.eb = &*ebCal; + caloRecHits.ee = &*eeCal; + caloRecHits.geometry = &*geometry; + + const int nTaus = allTaus.size(); + tensorflow::Tensor cellGridMatrix(tensorflow::DT_FLOAT, + {nTaus, L2TauTagNNv1::nCellEta, L2TauTagNNv1::nCellPhi, L2TauTagNNv1::nVars}); + const int n_inputs = nTaus * L2TauTagNNv1::nCellEta * L2TauTagNNv1::nCellPhi * L2TauTagNNv1::nVars; + for (int input_idx = 0; input_idx < n_inputs; ++input_idx) { + cellGridMatrix.flat()(input_idx) = 0; + } + fillL1TauVars(cellGridMatrix, allTaus); + + fillCaloRecHits(cellGridMatrix, allTaus, caloRecHits); + + fillPatatracks(cellGridMatrix, allTaus, patatracks_SoA, vertices_SoA, *bsHandle, fieldESH.product()); + + standardizeTensor(cellGridMatrix); + + if (debugLevel_ > 0) { + checknan(cellGridMatrix, debugLevel_); + } + + std::vector tau_score = getTauScore(cellGridMatrix); + + for (size_t inp_idx = 0; inp_idx < L1TauDesc_.size(); inp_idx++) { + const size_t nTau = TauCollectionMap[inp_idx].size(); + auto tau_tags = std::make_unique>(nTau); + for (size_t tau_pos = 0; tau_pos < nTau; ++tau_pos) { + const auto tau_idx = TauCollectionMap[inp_idx][tau_pos]; + if (debugLevel_ > 0) { + edm::LogInfo("DebugInfo") << event.id().event() << " \t " << (allTaus[tau_idx])->pt() << " \t " + << tau_score.at(tau_idx) << std::endl; + } + (*tau_tags)[tau_pos] = tau_score.at(tau_idx); + } + event.put(std::move(tau_tags), L1TauDesc_[inp_idx].CollectionName); + } +} +//define this as a plug-in +#include "FWCore/Framework/interface/MakerMacros.h" +DEFINE_FWK_MODULE(L2TauNNProducerAlpaka); diff --git a/RecoTracker/Configuration/python/RecoPixelVertexing_cff.py b/RecoTracker/Configuration/python/RecoPixelVertexing_cff.py index c08a0987d3f59..e97dd1009620e 100644 --- a/RecoTracker/Configuration/python/RecoPixelVertexing_cff.py +++ b/RecoTracker/Configuration/python/RecoPixelVertexing_cff.py @@ -98,9 +98,47 @@ pixelVerticesTask.copy() )) +### Alpaka + +## Alpaka Vertices + +from RecoTracker.PixelVertexFinding.pixelVertexProducerAlpakaPhase1_cfi import pixelVertexProducerAlpakaPhase1 as _pixelVerticesAlpakaPhase1 +from RecoTracker.PixelVertexFinding.pixelVertexProducerAlpakaPhase2_cfi import pixelVertexProducerAlpakaPhase2 as _pixelVerticesAlpakaPhase2 +pixelVerticesAlpaka = _pixelVerticesAlpakaPhase1.clone() +phase2_tracker.toReplaceWith(pixelVerticesAlpaka,_pixelVerticesAlpakaPhase2.clone()) + +from RecoTracker.PixelVertexFinding.pixelVertexFromSoAAlpaka_cfi import pixelVertexFromSoAAlpaka as _pixelVertexFromSoAAlpaka +alpaka.toReplaceWith(pixelVertices, _pixelVertexFromSoAAlpaka.clone()) + +alpaka.toReplaceWith(pixelVerticesTask, cms.Task( + # Build the pixel vertices in SoA format on Device + pixelVerticesAlpaka, + # Convert the pixel vertices from Portable SoA (on Host) to legacy format + pixelVertices +)) + +### Alpaka Device vs Host validation + +from Configuration.ProcessModifiers.alpakaValidationPixel_cff import alpakaValidationPixel + +# Hit SoA producer on serial backend +pixelVerticesAlpakaSerial = pixelVerticesAlpaka.clone( + pixelTrackSrc = 'pixelTracksAlpakaSerial', + alpaka = dict( backend = 'serial_sync' ) +) + +alpakaValidationPixel.toReplaceWith(pixelVerticesTask, cms.Task( + # Reconstruct and convert the pixel tracks with alpaka on device + pixelVerticesTask.copy(), + # SoA serial counterpart + pixelVerticesAlpakaSerial)) + + # Tasks and Sequences recopixelvertexingTask = cms.Task( pixelTracksTask, pixelVerticesTask ) recopixelvertexing = cms.Sequence(recopixelvertexingTask) + + diff --git a/RecoTracker/Configuration/python/customizePixelOnlyForProfiling.py b/RecoTracker/Configuration/python/customizePixelOnlyForProfiling.py index 3d121a8736f8e..85131be421d6d 100644 --- a/RecoTracker/Configuration/python/customizePixelOnlyForProfiling.py +++ b/RecoTracker/Configuration/python/customizePixelOnlyForProfiling.py @@ -3,10 +3,12 @@ # Customise the Pixel-only reconstruction to run on GPU # # Run the unpacker, clustering, ntuplets, track fit and vertex reconstruction on GPU. +# CUDA and Alpaka co-living here for the moment + def customizePixelOnlyForProfilingGPUOnly(process): process.consumer = cms.EDAnalyzer("GenericConsumer", - eventProducts = cms.untracked.vstring('pixelTracksCUDA', 'pixelVerticesCUDA') + eventProducts = cms.untracked.vstring('pixelTracksCUDA', 'pixelVerticesCUDA','pixelTracksAlpaka','pixelVerticesAlpaka') ) process.consume_step = cms.EndPath(process.consumer) @@ -23,6 +25,7 @@ def customizePixelOnlyForProfilingGPUOnly(process): # # The same customisation can be also used on the SoA CPU workflow, running up to the # tracks and vertices on the CPU in SoA format, without conversion to legacy format. +# TODO: does exist a module that forces the copy to host for Alpaka? def customizePixelOnlyForProfilingGPUWithHostCopy(process): #? process.siPixelRecHitSoAFromLegacy.convertToLegacy = False @@ -56,4 +59,4 @@ def customizePixelOnlyForProfiling(process): process.schedule = cms.Schedule(process.raw2digi_step, process.reconstruction_step, process.consume_step) - return process + return process \ No newline at end of file diff --git a/RecoTracker/PixelSeeding/BuildFile.xml b/RecoTracker/PixelSeeding/BuildFile.xml index 7bc10578b4448..e3875d0fcfdb3 100644 --- a/RecoTracker/PixelSeeding/BuildFile.xml +++ b/RecoTracker/PixelSeeding/BuildFile.xml @@ -16,6 +16,7 @@ + diff --git a/RecoTracker/PixelSeeding/plugins/BuildFile.xml b/RecoTracker/PixelSeeding/plugins/BuildFile.xml index 82b80e1c55b66..2e23e15f9f7c2 100644 --- a/RecoTracker/PixelSeeding/plugins/BuildFile.xml +++ b/RecoTracker/PixelSeeding/plugins/BuildFile.xml @@ -1,21 +1,36 @@ - - - + + + + - + + + + + + + + + + + + + + + diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc new file mode 100644 index 0000000000000..9ba9c3d289f50 --- /dev/null +++ b/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc @@ -0,0 +1,412 @@ +// +// Author: Felice Pantaleo, CERN +// + +//#define BROKENLINE_DEBUG +//#define BL_DUMP_HITS +#include +#include + +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" +#include "RecoTracker/PixelTrackFitting/interface/alpaka/BrokenLine.h" + +#include "HelixFit.h" + +template +using Tuples = typename TrackSoA::HitContainer; +template +using OutputSoAView = TrackSoAView; +template +using TupleMultiplicity = caStructures::TupleMultiplicityT; + +// #define BL_DUMP_HITS + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + template + class kernel_BLFastFit { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + Tuples const *__restrict__ foundNtuplets, + TupleMultiplicity const *__restrict__ tupleMultiplicity, + TrackingRecHitAlpakaSoAConstView hh, + pixelCPEforDevice::ParamsOnDeviceT const *__restrict__ cpeParams, + typename TrackerTraits::tindex_type *__restrict__ ptkids, + double *__restrict__ phits, + float *__restrict__ phits_ge, + double *__restrict__ pfast_fit, + uint32_t nHitsL, + uint32_t nHitsH, + int32_t offset) const { + constexpr uint32_t hitsInFit = N; + constexpr auto invalidTkId = std::numeric_limits::max(); + + ALPAKA_ASSERT_OFFLOAD(hitsInFit <= nHitsL); + ALPAKA_ASSERT_OFFLOAD(nHitsL <= nHitsH); + ALPAKA_ASSERT_OFFLOAD(phits); + ALPAKA_ASSERT_OFFLOAD(pfast_fit); + ALPAKA_ASSERT_OFFLOAD(foundNtuplets); + ALPAKA_ASSERT_OFFLOAD(tupleMultiplicity); + + // look in bin for this hit multiplicity + int totTK = tupleMultiplicity->end(nHitsH) - tupleMultiplicity->begin(nHitsL); + ALPAKA_ASSERT_OFFLOAD(totTK <= int(tupleMultiplicity->size())); + ALPAKA_ASSERT_OFFLOAD(totTK >= 0); + +#ifdef BROKENLINE_DEBUG + const uint32_t threadIdx(alpaka::getIdx(acc)[0u]); + if (0 == threadIdx) { + printf("%d total Ntuple\n", tupleMultiplicity->size()); + printf("%d Ntuple of size %d/%d for %d hits to fit\n", totTK, nHitsL, nHitsH, hitsInFit); + } +#endif + const auto nt = riemannFit::maxNumberOfConcurrentFits; + for (auto local_idx : cms::alpakatools::elements_with_stride(acc, nt)) { + auto tuple_idx = local_idx + offset; + if ((int)tuple_idx >= totTK) { + ptkids[local_idx] = invalidTkId; + break; + } + // get it from the ntuple container (one to one to helix) + auto tkid = *(tupleMultiplicity->begin(nHitsL) + tuple_idx); + ALPAKA_ASSERT_OFFLOAD(tkid < foundNtuplets->nbins()); + + ptkids[local_idx] = tkid; + + auto nHits = foundNtuplets->size(tkid); + + ALPAKA_ASSERT_OFFLOAD(nHits >= nHitsL); + ALPAKA_ASSERT_OFFLOAD(nHits <= nHitsH); + + riemannFit::Map3xNd hits(phits + local_idx); + riemannFit::Map4d fast_fit(pfast_fit + local_idx); + riemannFit::Map6xNf hits_ge(phits_ge + local_idx); + +#ifdef BL_DUMP_HITS + auto &&done = alpaka::declareSharedVar(acc); + done = 0; + alpaka::syncBlockThreads(acc); + bool dump = + (foundNtuplets->size(tkid) == 5 && 0 == alpaka::atomicAdd(acc, &done, 1, alpaka::hierarchy::Blocks{})); +#endif + + // Prepare data structure + auto const *hitId = foundNtuplets->begin(tkid); + + // #define YERR_FROM_DC +#ifdef YERR_FROM_DC + // try to compute more precise error in y + auto dx = hh[hitId[hitsInFit - 1]].xGlobal() - hh[hitId[0]].xGlobal(); + auto dy = hh[hitId[hitsInFit - 1]].yGlobal() - hh[hitId[0]].yGlobal(); + auto dz = hh[hitId[hitsInFit - 1]].zGlobal() - hh[hitId[0]].zGlobal(); + float ux, uy, uz; +#endif + + float incr = std::max(1.f, float(nHits) / float(hitsInFit)); + float n = 0; + for (uint32_t i = 0; i < hitsInFit; ++i) { + int j = int(n + 0.5f); // round + if (hitsInFit - 1 == i) + j = nHits - 1; // force last hit to ensure max lever arm. + ALPAKA_ASSERT_OFFLOAD(j < int(nHits)); + n += incr; + auto hit = hitId[j]; + float ge[6]; + +#ifdef YERR_FROM_DC + auto const &dp = cpeParams->detParams(hh.detectorIndex(hit)); + auto status = hh[hit].chargeAndStatus().status; + int qbin = CPEFastParametrisation::kGenErrorQBins - 1 - status.qBin; + ALPAKA_ASSERT_OFFLOAD(qbin >= 0 && qbin < 5); + bool nok = (status.isBigY | status.isOneY); + // compute cotanbeta and use it to recompute error + dp.frame.rotation().multiply(dx, dy, dz, ux, uy, uz); + auto cb = std::abs(uy / uz); + int bin = + int(cb * (float(phase1PixelTopology::pixelThickess) / float(phase1PixelTopology::pixelPitchY)) * 8.f) - 4; + int low_value = 0; + int high_value = CPEFastParametrisation::kNumErrorBins - 1; + // return estimated bin value truncated to [0, 15] + bin = std::clamp(bin, low_value, high_value); + float yerr = dp.sigmay[bin] * 1.e-4f; // toCM + yerr *= dp.yfact[qbin]; // inflate + yerr *= yerr; + yerr += dp.apeYY; + yerr = nok ? hh[hit].yerrLocal() : yerr; + dp.frame.toGlobal(hh[hit].xerrLocal(), 0, yerr, ge); +#else + cpeParams->detParams(hh[hit].detectorIndex()).frame.toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge); +#endif + +#ifdef BL_DUMP_HITS + bool dump = foundNtuplets->size(tkid) == 5; + if (dump) { + printf("Track id %d %d Hit %d on %d\nGlobal: hits.col(%d) << %f,%f,%f\n", + local_idx, + tkid, + hit, + hh[hit].detectorIndex(), + i, + hh[hit].xGlobal(), + hh[hit].yGlobal(), + hh[hit].zGlobal()); + printf("Error: hits_ge.col(%d) << %e,%e,%e,%e,%e,%e\n", i, ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]); + } +#endif + + hits.col(i) << hh[hit].xGlobal(), hh[hit].yGlobal(), hh[hit].zGlobal(); + hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]; + } + brokenline::fastFit(acc, hits, fast_fit); + + // no NaN here.... + ALPAKA_ASSERT_OFFLOAD(fast_fit(0) == fast_fit(0)); + ALPAKA_ASSERT_OFFLOAD(fast_fit(1) == fast_fit(1)); + ALPAKA_ASSERT_OFFLOAD(fast_fit(2) == fast_fit(2)); + ALPAKA_ASSERT_OFFLOAD(fast_fit(3) == fast_fit(3)); + } + } + }; + + template + struct kernel_BLFit { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + TupleMultiplicity const *__restrict__ tupleMultiplicity, + double bField, + OutputSoAView results_view, + typename TrackerTraits::tindex_type const *__restrict__ ptkids, + double *__restrict__ phits, + float *__restrict__ phits_ge, + double *__restrict__ pfast_fit) const { + ALPAKA_ASSERT_OFFLOAD(results_view.pt()); + ALPAKA_ASSERT_OFFLOAD(results_view.eta()); + ALPAKA_ASSERT_OFFLOAD(results_view.chi2()); + ALPAKA_ASSERT_OFFLOAD(pfast_fit); + constexpr auto invalidTkId = std::numeric_limits::max(); + + // same as above... + // look in bin for this hit multiplicity + const auto nt = riemannFit::maxNumberOfConcurrentFits; + for (auto local_idx : cms::alpakatools::elements_with_stride(acc, nt)) { + if (invalidTkId == ptkids[local_idx]) + break; + auto tkid = ptkids[local_idx]; + + ALPAKA_ASSERT_OFFLOAD(tkid < TrackerTraits::maxNumberOfTuples); + + riemannFit::Map3xNd hits(phits + local_idx); + riemannFit::Map4d fast_fit(pfast_fit + local_idx); + riemannFit::Map6xNf hits_ge(phits_ge + local_idx); + + brokenline::PreparedBrokenLineData data; + + brokenline::karimaki_circle_fit circle; + riemannFit::LineFit line; + + brokenline::prepareBrokenLineData(acc, hits, fast_fit, bField, data); + brokenline::lineFit(acc, hits_ge, fast_fit, bField, data, line); + brokenline::circleFit(acc, hits, hits_ge, fast_fit, bField, data, circle); + + TracksUtilities::copyFromCircle( + results_view, circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid); + results_view[tkid].pt() = float(bField) / float(std::abs(circle.par(2))); + results_view[tkid].eta() = alpaka::math::asinh(acc, line.par(0)); + results_view[tkid].chi2() = (circle.chi2 + line.chi2) / (2 * N - 5); + +#ifdef BROKENLINE_DEBUG + if (!(circle.chi2 >= 0) || !(line.chi2 >= 0)) + printf("kernelBLFit failed! %f/%f\n", circle.chi2, line.chi2); + printf("kernelBLFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n", + N, + N, + tkid, + circle.par(0), + circle.par(1), + circle.par(2)); + printf("kernelBLHits line.par(0,1): %d %f,%f\n", tkid, line.par(0), line.par(1)); + printf("kernelBLHits chi2 cov %f/%f %e,%e,%e,%e,%e\n", + circle.chi2, + line.chi2, + circle.cov(0, 0), + circle.cov(1, 1), + circle.cov(2, 2), + line.cov(0, 0), + line.cov(1, 1)); +#endif + } + } + }; + + template + void HelixFit::launchBrokenLineKernels( + const TrackingRecHitAlpakaSoAConstView &hv, + pixelCPEforDevice::ParamsOnDeviceT const *cpeParams, + uint32_t hitsInFit, + uint32_t maxNumberOfTuples, + Queue &queue) { + ALPAKA_ASSERT_OFFLOAD(tuples_); + + uint32_t blockSize = 64; + uint32_t numberOfBlocks = cms::alpakatools::divide_up_by(maxNumberOfConcurrentFits_, blockSize); + const WorkDiv1D workDivTriplets = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + const WorkDiv1D workDivQuadsPenta = cms::alpakatools::make_workdiv(numberOfBlocks / 4, blockSize); + + // Fit internals + auto tkidDevice = + cms::alpakatools::make_device_buffer(queue, maxNumberOfConcurrentFits_); + auto hitsDevice = cms::alpakatools::make_device_buffer( + queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<6>) / sizeof(double)); + auto hits_geDevice = cms::alpakatools::make_device_buffer( + queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6xNf<6>) / sizeof(float)); + auto fast_fit_resultsDevice = cms::alpakatools::make_device_buffer( + queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double)); + + for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) { + // fit triplets + + alpaka::exec(queue, + workDivTriplets, + kernel_BLFastFit<3, TrackerTraits>{}, + tuples_, + tupleMultiplicity_, + hv, + cpeParams, + tkidDevice.data(), + hitsDevice.data(), + hits_geDevice.data(), + fast_fit_resultsDevice.data(), + 3, + 3, + offset); + + alpaka::exec(queue, + workDivTriplets, + kernel_BLFit<3, TrackerTraits>{}, + tupleMultiplicity_, + bField_, + outputSoa_, + tkidDevice.data(), + hitsDevice.data(), + hits_geDevice.data(), + fast_fit_resultsDevice.data()); + + if (fitNas4_) { + // fit all as 4 + riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrack, 1>([this, + &hv, + &cpeParams, + &tkidDevice, + &hitsDevice, + &hits_geDevice, + &fast_fit_resultsDevice, + &offset, + &queue, + &workDivQuadsPenta](auto i) { + alpaka::exec(queue, + workDivQuadsPenta, + kernel_BLFastFit<4, TrackerTraits>{}, + tuples_, + tupleMultiplicity_, + hv, + cpeParams, + tkidDevice.data(), + hitsDevice.data(), + hits_geDevice.data(), + fast_fit_resultsDevice.data(), + 4, + 4, + offset); + + alpaka::exec(queue, + workDivQuadsPenta, + kernel_BLFit<4, TrackerTraits>{}, + tupleMultiplicity_, + bField_, + outputSoa_, + tkidDevice.data(), + hitsDevice.data(), + hits_geDevice.data(), + fast_fit_resultsDevice.data()); + }); + + } else { + riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrackForFullFit, 1>([this, + &hv, + &cpeParams, + &tkidDevice, + &hitsDevice, + &hits_geDevice, + &fast_fit_resultsDevice, + &offset, + &queue, + &workDivQuadsPenta](auto i) { + alpaka::exec(queue, + workDivQuadsPenta, + kernel_BLFastFit{}, + tuples_, + tupleMultiplicity_, + hv, + cpeParams, + tkidDevice.data(), + hitsDevice.data(), + hits_geDevice.data(), + fast_fit_resultsDevice.data(), + i, + i, + offset); + + alpaka::exec(queue, + workDivQuadsPenta, + kernel_BLFit{}, + tupleMultiplicity_, + bField_, + outputSoa_, + tkidDevice.data(), + hitsDevice.data(), + hits_geDevice.data(), + fast_fit_resultsDevice.data()); + }); + + static_assert(TrackerTraits::maxHitsOnTrackForFullFit < TrackerTraits::maxHitsOnTrack); + + //Fit all the rest using the maximum from previous call + alpaka::exec(queue, + workDivQuadsPenta, + kernel_BLFastFit{}, + tuples_, + tupleMultiplicity_, + hv, + cpeParams, + tkidDevice.data(), + hitsDevice.data(), + hits_geDevice.data(), + fast_fit_resultsDevice.data(), + TrackerTraits::maxHitsOnTrackForFullFit, + TrackerTraits::maxHitsOnTrack - 1, + offset); + + alpaka::exec(queue, + workDivQuadsPenta, + kernel_BLFit{}, + tupleMultiplicity_, + bField_, + outputSoa_, + tkidDevice.data(), + hitsDevice.data(), + hits_geDevice.data(), + fast_fit_resultsDevice.data()); + } + + } // loop on concurrent fits + } + + template class HelixFit; + template class HelixFit; + template class HelixFit; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h b/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h new file mode 100644 index 0000000000000..1ae4bb661f377 --- /dev/null +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h @@ -0,0 +1,399 @@ +#ifndef RecoPixelVertexing_PixelTriplets_CACellT_h +#define RecoPixelVertexing_PixelTriplets_CACellT_h + +// +// Author: Felice Pantaleo, CERN +// + +// #define ONLY_TRIPLETS_IN_HOLE + +#include + +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h" +#include "HeterogeneousCore/AlpakaInterface/interface/VecArray.h" +#include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" +#include "RecoTracker/PixelSeeding/interface/CircleEq.h" +#include "DataFormats/TrackSoA/interface/TrackDefinitions.h" +#include "DataFormats/TrackSoA/interface/TrackLayout.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "CAStructures.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + template + class CACellT { + public: + using PtrAsInt = unsigned long long; + + static constexpr auto maxCellsPerHit = TrackerTraits::maxCellsPerHit; + using OuterHitOfCellContainer = caStructures::OuterHitOfCellContainerT; + using OuterHitOfCell = caStructures::OuterHitOfCellT; + using CellNeighbors = caStructures::CellNeighborsT; + using CellTracks = caStructures::CellTracksT; + using CellNeighborsVector = caStructures::CellNeighborsVectorT; + using CellTracksVector = caStructures::CellTracksVectorT; + + using HitsConstView = TrackingRecHitAlpakaSoAConstView; + using hindex_type = typename TrackerTraits::hindex_type; + using tindex_type = typename TrackerTraits::tindex_type; + static constexpr auto invalidHitId = std::numeric_limits::max(); + + using TmpTuple = cms::alpakatools::VecArray; + + using HitContainer = typename TrackSoA::HitContainer; + using Quality = ::pixelTrack::Quality; + static constexpr auto bad = ::pixelTrack::Quality::bad; + + enum class StatusBit : uint16_t { kUsed = 1, kInTrack = 2, kKilled = 1 << 15 }; + + CACellT() = default; + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void init(CellNeighborsVector& cellNeighbors, + CellTracksVector& cellTracks, + const HitsConstView& hh, + int layerPairId, + hindex_type innerHitId, + hindex_type outerHitId) { + theInnerHitId = innerHitId; + theOuterHitId = outerHitId; + theLayerPairId_ = layerPairId; + theStatus_ = 0; + theFishboneId = invalidHitId; + + // optimization that depends on access pattern + theInnerZ = hh[innerHitId].zGlobal(); + theInnerR = hh[innerHitId].rGlobal(); + + // link to default empty + theOuterNeighbors = &cellNeighbors[0]; + theTracks = &cellTracks[0]; + assert(outerNeighbors().empty()); + assert(tracks().empty()); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) int addOuterNeighbor( + const TAcc& acc, typename TrackerTraits::cindex_type t, CellNeighborsVector& cellNeighbors) { + // use smart cache + if (outerNeighbors().empty()) { + auto i = cellNeighbors.extend(acc); // maybe wasted.... + if (i > 0) { + cellNeighbors[i].reset(); + alpaka::mem_fence(acc, alpaka::memory_scope::Grid{}); +#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + theOuterNeighbors = &cellNeighbors[i]; +#else + auto zero = (PtrAsInt)(&cellNeighbors[0]); + alpaka::atomicCas(acc, + (PtrAsInt*)(&theOuterNeighbors), + zero, + (PtrAsInt)(&cellNeighbors[i]), + alpaka::hierarchy::Blocks{}); // if fails we cannot give "i" back... +#endif + } else + return -1; + } + alpaka::mem_fence(acc, alpaka::memory_scope::Grid{}); + return outerNeighbors().push_back(acc, t); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) int addTrack(TAcc const& acc, + tindex_type t, + CellTracksVector& cellTracks) { + if (tracks().empty()) { + auto i = cellTracks.extend(acc); // maybe wasted.... + if (i > 0) { + cellTracks[i].reset(); + alpaka::mem_fence(acc, alpaka::memory_scope::Grid{}); +#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + theTracks = &cellTracks[i]; +#else + auto zero = (PtrAsInt)(&cellTracks[0]); + alpaka::atomicCas(acc, + (PtrAsInt*)(&theTracks), + zero, + (PtrAsInt)(&cellTracks[i]), + alpaka::hierarchy::Blocks{}); // if fails we cannot give "i" back... + +#endif + } else + return -1; + } + alpaka::mem_fence(acc, alpaka::memory_scope::Grid{}); + return tracks().push_back(acc, t); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE CellTracks& tracks() { return *theTracks; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE CellTracks const& tracks() const { return *theTracks; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE CellNeighbors& outerNeighbors() { return *theOuterNeighbors; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE CellNeighbors const& outerNeighbors() const { return *theOuterNeighbors; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_x(const HitsConstView& hh) const { return hh[theInnerHitId].xGlobal(); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_x(const HitsConstView& hh) const { return hh[theOuterHitId].xGlobal(); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_y(const HitsConstView& hh) const { return hh[theInnerHitId].yGlobal(); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_y(const HitsConstView& hh) const { return hh[theOuterHitId].yGlobal(); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_z(const HitsConstView& hh) const { return theInnerZ; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_z(const HitsConstView& hh) const { return hh[theOuterHitId].zGlobal(); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_r(const HitsConstView& hh) const { return theInnerR; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_r(const HitsConstView& hh) const { return hh[theOuterHitId].rGlobal(); } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE auto inner_iphi(const HitsConstView& hh) const { return hh[theInnerHitId].iphi(); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE auto outer_iphi(const HitsConstView& hh) const { return hh[theOuterHitId].iphi(); } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_detIndex(const HitsConstView& hh) const { + return hh[theInnerHitId].detectorIndex(); + } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_detIndex(const HitsConstView& hh) const { + return hh[theOuterHitId].detectorIndex(); + } + + constexpr unsigned int inner_hit_id() const { return theInnerHitId; } + constexpr unsigned int outer_hit_id() const { return theOuterHitId; } + + ALPAKA_FN_ACC void print_cell() const { + printf("printing cell: on layerPair: %d, innerHitId: %d, outerHitId: %d \n", + theLayerPairId_, + theInnerHitId, + theOuterHitId); + } + + ALPAKA_FN_ACC bool check_alignment(const HitsConstView& hh, + CACellT const& otherCell, + const float ptmin, + const float hardCurvCut, + const float caThetaCutBarrel, + const float caThetaCutForward, + const float dcaCutInnerTriplet, + const float dcaCutOuterTriplet) const { + // detIndex of the layerStart for the Phase1 Pixel Detector: + // [BPX1, BPX2, BPX3, BPX4, FP1, FP2, FP3, FN1, FN2, FN3, LAST_VALID] + // [ 0, 96, 320, 672, 1184, 1296, 1408, 1520, 1632, 1744, 1856] + auto ri = inner_r(hh); + auto zi = inner_z(hh); + + auto ro = outer_r(hh); + auto zo = outer_z(hh); + + auto r1 = otherCell.inner_r(hh); + auto z1 = otherCell.inner_z(hh); + auto isBarrel = otherCell.outer_detIndex(hh) < TrackerTraits::last_barrel_detIndex; + // TODO tune CA cuts below (theta and dca) + bool aligned = + areAlignedRZ(r1, + z1, + ri, + zi, + ro, + zo, + ptmin, + isBarrel ? caThetaCutBarrel : caThetaCutForward); + return (aligned && dcaCut(hh, + otherCell, + otherCell.inner_detIndex(hh) < TrackerTraits::last_bpix1_detIndex ? dcaCutInnerTriplet + : dcaCutOuterTriplet, + hardCurvCut)); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) static bool areAlignedRZ( + float r1, float z1, float ri, float zi, float ro, float zo, const float ptmin, const float thetaCut) { + float radius_diff = std::abs(r1 - ro); + float distance_13_squared = radius_diff * radius_diff + (z1 - zo) * (z1 - zo); + + float pMin = ptmin * std::sqrt(distance_13_squared); // this needs to be divided by + // radius_diff later + + float tan_12_13_half_mul_distance_13_squared = fabs(z1 * (ri - ro) + zi * (ro - r1) + zo * (r1 - ri)); + return tan_12_13_half_mul_distance_13_squared * pMin <= thetaCut * distance_13_squared * radius_diff; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool dcaCut(const HitsConstView& hh, + CACellT const& otherCell, + const float region_origin_radius_plus_tolerance, + const float maxCurv) const { + auto x1 = otherCell.inner_x(hh); + auto y1 = otherCell.inner_y(hh); + + auto x2 = inner_x(hh); + auto y2 = inner_y(hh); + + auto x3 = outer_x(hh); + auto y3 = outer_y(hh); + + CircleEq eq(x1, y1, x2, y2, x3, y3); + + if (eq.curvature() > maxCurv) + return false; + + return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature()); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) static bool dcaCutH( + float x1, + float y1, + float x2, + float y2, + float x3, + float y3, + const float region_origin_radius_plus_tolerance, + const float maxCurv) { + CircleEq eq(x1, y1, x2, y2, x3, y3); + + if (eq.curvature() > maxCurv) + return false; + + return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature()); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool hole0(const HitsConstView& hh, CACellT const& innerCell) const { + using namespace phase1PixelTopology; + + int p = innerCell.inner_iphi(hh); + if (p < 0) + p += std::numeric_limits::max(); + p = (max_ladder_bpx0 * p) / std::numeric_limits::max(); + p %= max_ladder_bpx0; + auto il = first_ladder_bpx0 + p; + auto r0 = hh.averageGeometry().ladderR[il]; + auto ri = innerCell.inner_r(hh); + auto zi = innerCell.inner_z(hh); + auto ro = outer_r(hh); + auto zo = outer_z(hh); + auto z0 = zi + (r0 - ri) * (zo - zi) / (ro - ri); + auto z_in_ladder = std::abs(z0 - hh.averageGeometry().ladderZ[il]); + auto z_in_module = z_in_ladder - module_length_bpx0 * int(z_in_ladder / module_length_bpx0); + auto gap = z_in_module < module_tolerance_bpx0 || z_in_module > (module_length_bpx0 - module_tolerance_bpx0); + return gap; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool hole4(const HitsConstView& hh, CACellT const& innerCell) const { + using namespace phase1PixelTopology; + + int p = outer_iphi(hh); + if (p < 0) + p += std::numeric_limits::max(); + p = (max_ladder_bpx4 * p) / std::numeric_limits::max(); + p %= max_ladder_bpx4; + auto il = first_ladder_bpx4 + p; + auto r4 = hh.averageGeometry().ladderR[il]; + auto ri = innerCell.inner_r(hh); + auto zi = innerCell.inner_z(hh); + auto ro = outer_r(hh); + auto zo = outer_z(hh); + auto z4 = zo + (r4 - ro) * (zo - zi) / (ro - ri); + auto z_in_ladder = std::abs(z4 - hh.averageGeometry().ladderZ[il]); + auto z_in_module = z_in_ladder - module_length_bpx4 * int(z_in_ladder / module_length_bpx4); + auto gap = z_in_module < module_tolerance_bpx4 || z_in_module > (module_length_bpx4 - module_tolerance_bpx4); + auto holeP = z4 > hh.averageGeometry().ladderMaxZ[il] && z4 < hh.averageGeometry().endCapZ[0]; + auto holeN = z4 < hh.averageGeometry().ladderMinZ[il] && z4 > hh.averageGeometry().endCapZ[1]; + return gap || holeP || holeN; + } + + // trying to free the track building process from hardcoded layers, leaving + // the visit of the graph based on the neighborhood connections between cells. + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void find_ntuplets(TAcc const& acc, + const HitsConstView& hh, + CACellT* __restrict__ cells, + CellTracksVector& cellTracks, + HitContainer& foundNtuplets, + cms::alpakatools::AtomicPairCounter& apc, + Quality* __restrict__ quality, + TmpTuple& tmpNtuplet, + const unsigned int minHitsPerNtuplet, + bool startAt0) const { + // the building process for a track ends if: + // it has no right neighbor + // it has no compatible neighbor + // the ntuplets is then saved if the number of hits it contains is greater + // than a threshold + + if constexpr (DEPTH <= 0) { + printf("ERROR: CACellT::find_ntuplets reached full depth!\n"); + ALPAKA_ASSERT_OFFLOAD(false); + } else { + auto doubletId = this - cells; + tmpNtuplet.push_back_unsafe(doubletId); + ALPAKA_ASSERT_OFFLOAD(tmpNtuplet.size() <= int(TrackerTraits::maxHitsOnTrack - 3)); + + bool last = true; + for (unsigned int otherCell : outerNeighbors()) { + if (cells[otherCell].isKilled()) + continue; // killed by earlyFishbone + last = false; + cells[otherCell].template find_ntuplets( + acc, hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0); + } + if (last) { // if long enough save... + if ((unsigned int)(tmpNtuplet.size()) >= minHitsPerNtuplet - 1) { +#ifdef ONLY_TRIPLETS_IN_HOLE + // triplets accepted only pointing to the hole + if (tmpNtuplet.size() >= 3 || (startAt0 && hole4(hh, cells[tmpNtuplet[0]])) || + ((!startAt0) && hole0(hh, cells[tmpNtuplet[0]]))) +#endif + { + hindex_type hits[TrackerTraits::maxDepth + 2]; + auto nh = 0U; + constexpr int maxFB = 2; // for the time being let's limit this + int nfb = 0; + for (auto c : tmpNtuplet) { + hits[nh++] = cells[c].theInnerHitId; + if (nfb < maxFB && cells[c].hasFishbone()) { + ++nfb; + hits[nh++] = cells[c].theFishboneId; // Fishbone hit is always outer than inner hit + } + } + assert(nh < TrackerTraits::maxHitsOnTrack); + hits[nh] = theOuterHitId; + auto it = foundNtuplets.bulkFill(acc, apc, hits, nh + 1); + if (it >= 0) { // if negative is overflow.... + for (auto c : tmpNtuplet) + cells[c].addTrack(acc, it, cellTracks); + quality[it] = bad; // initialize to bad + } + } + } + } + tmpNtuplet.pop_back(); + assert(tmpNtuplet.size() < int(TrackerTraits::maxHitsOnTrack - 1)); + } + } + + // Cell status management + ALPAKA_FN_ACC ALPAKA_FN_INLINE void kill() { theStatus_ |= uint16_t(StatusBit::kKilled); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isKilled() const { return theStatus_ & uint16_t(StatusBit::kKilled); } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE int16_t layerPairId() const { return theLayerPairId_; } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool unused() const { return 0 == (uint16_t(StatusBit::kUsed) & theStatus_); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE void setStatusBits(StatusBit mask) { theStatus_ |= uint16_t(mask); } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void setFishbone(TAcc const& acc, hindex_type id, float z, const HitsConstView& hh) { + // make it deterministic: use the farther apart (in z) + auto old = theFishboneId; + while (old != + alpaka::atomicCas( + acc, + &theFishboneId, + old, + (invalidHitId == old || std::abs(z - theInnerZ) > std::abs(hh[old].zGlobal() - theInnerZ)) ? id : old, + alpaka::hierarchy::Blocks{})) + old = theFishboneId; + } + ALPAKA_FN_ACC ALPAKA_FN_INLINE auto fishboneId() const { return theFishboneId; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool hasFishbone() const { return theFishboneId != invalidHitId; } + + private: + CellNeighbors* theOuterNeighbors; + CellTracks* theTracks; + + int16_t theLayerPairId_; + uint16_t theStatus_; // tbd + + float theInnerZ; + float theInnerR; + hindex_type theInnerHitId; + hindex_type theOuterHitId; + hindex_type theFishboneId; + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif // RecoPixelVertexing_PixelTriplets_plugins_CACellT_h diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAFishbone.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAFishbone.h new file mode 100644 index 0000000000000..343e0cf9ad005 --- /dev/null +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAFishbone.h @@ -0,0 +1,148 @@ +#ifndef RecoPixelVertexing_PixelTriplets_alpaka_CAFishbone_h +#define RecoPixelVertexing_PixelTriplets_alpaka_CAFishbone_h + +#include +#include +#include +#include +#include + +#include +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" +#include "HeterogeneousCore/AlpakaInterface/interface/VecArray.h" +#include "DataFormats/Math/interface/approx_atan2.h" + +#include "CACell.h" +#include "CAStructures.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace caPixelDoublets { + + template + using CellNeighbors = caStructures::CellNeighborsT; + template + using CellTracks = caStructures::CellTracksT; + template + using CellNeighborsVector = caStructures::CellNeighborsVectorT; + template + using CellTracksVector = caStructures::CellTracksVectorT; + template + using OuterHitOfCell = caStructures::OuterHitOfCellT; + template + using HitsConstView = typename CACellT::HitsConstView; + + template + class CAFishbone { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, + HitsConstView hh, + CACellT* cells, + uint32_t const* __restrict__ nCells, + OuterHitOfCell const* isOuterHitOfCellWrap, + int32_t nHits, + bool checkTrack) const { + if (nHits <= isOuterHitOfCellWrap->offset) + return; + constexpr auto maxCellsPerHit = CACellT::maxCellsPerHit; + + auto const isOuterHitOfCell = isOuterHitOfCellWrap->container; + + // x runs faster... + + float x[maxCellsPerHit], y[maxCellsPerHit], z[maxCellsPerHit], n[maxCellsPerHit]; + uint16_t d[maxCellsPerHit]; + uint32_t cc[maxCellsPerHit]; + uint8_t l[maxCellsPerHit]; + const uint32_t dimIndexY = 0u; + const uint32_t dimIndexX = 1u; + const uint32_t blockDimensionX(alpaka::getWorkDiv(acc)[dimIndexX]); + const auto& [firstElementIdxNoStrideX, endElementIdxNoStrideX] = + cms::alpakatools::element_index_range_in_block(acc, 0u, dimIndexX); + + // Outermost loop on Y + const uint32_t gridDimensionY(alpaka::getWorkDiv(acc)[dimIndexY]); + const auto& [firstElementIdxNoStrideY, endElementIdxNoStrideY] = + cms::alpakatools::element_index_range_in_grid(acc, 0u, dimIndexY); + uint32_t firstElementIdxY = firstElementIdxNoStrideY; + uint32_t endElementIdxY = endElementIdxNoStrideY; + + for (uint32_t idy = firstElementIdxY, nt = nHits; idy < nt; ++idy) { + if (not cms::alpakatools::next_valid_element_index_strided( + idy, firstElementIdxY, endElementIdxY, gridDimensionY, nt)) + break; + + auto const& vc = isOuterHitOfCell[idy]; + auto s = vc.size(); + if (s < 2) + continue; + + auto const& c0 = cells[vc[0]]; + auto xo = c0.outer_x(hh); + auto yo = c0.outer_y(hh); + auto zo = c0.outer_z(hh); + auto sg = 0; + for (int32_t ic = 0; ic < s; ++ic) { + auto& ci = cells[vc[ic]]; + if (ci.unused()) + continue; // for triplets equivalent to next + if (checkTrack && ci.tracks().empty()) + continue; + cc[sg] = vc[ic]; + d[sg] = ci.inner_detIndex(hh); + l[sg] = ci.layerPairId(); + x[sg] = ci.inner_x(hh) - xo; + y[sg] = ci.inner_y(hh) - yo; + z[sg] = ci.inner_z(hh) - zo; + n[sg] = x[sg] * x[sg] + y[sg] * y[sg] + z[sg] * z[sg]; + ++sg; + } + if (sg < 2) + continue; + // here we parallelize in X + uint32_t firstElementIdxX = firstElementIdxNoStrideX; + uint32_t endElementIdxX = endElementIdxNoStrideX; + for (uint32_t ic = firstElementIdxX; (int)ic < sg - 1; ++ic) { + if (not cms::alpakatools::next_valid_element_index_strided( + ic, firstElementIdxX, endElementIdxX, blockDimensionX, sg - 1)) + break; + + auto& ci = cells[cc[ic]]; + for (auto jc = ic + 1; (int)jc < sg; ++jc) { + auto& cj = cells[cc[jc]]; + // must be different detectors (in the same layer) + // if (d[ic]==d[jc]) continue; + // || l[ic]!=l[jc]) continue; + auto cos12 = x[ic] * x[jc] + y[ic] * y[jc] + z[ic] * z[jc]; + + if (d[ic] != d[jc] && cos12 * cos12 >= 0.99999f * (n[ic] * n[jc])) { + // alligned: kill farthest (prefer consecutive layers) + // if same layer prefer farthest (longer level arm) and make space for intermediate hit + bool sameLayer = l[ic] == l[jc]; + if (n[ic] > n[jc]) { + if (sameLayer) { + cj.kill(); // closest + ci.setFishbone(acc, cj.inner_hit_id(), cj.inner_z(hh), hh); + } else { + ci.kill(); // farthest + // break; // removed to improve reproducibility. keep it for reference and tests + } + } else { + if (!sameLayer) { + cj.kill(); // farthest + } else { + ci.kill(); // closest + cj.setFishbone(acc, ci.inner_hit_id(), ci.inner_z(hh), hh); + // break; // removed to improve reproducibility. keep it for reference and tests + } + } + } + } //cj + } // ci + } // hits + } + }; + } // namespace caPixelDoublets +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif // RecoPixelVertexing_PixelTriplets_alpaka_CAFishbone_h diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtuplet.cc b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtuplet.cc new file mode 100644 index 0000000000000..b878f2d19f0ce --- /dev/null +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtuplet.cc @@ -0,0 +1,98 @@ +#include + +#include "DataFormats/TrackSoA/interface/TrackSoAHost.h" +#include "DataFormats/TrackSoA/interface/alpaka/TrackSoACollection.h" +#include "DataFormats/TrackSoA/interface/TrackSoADevice.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitSoACollection.h" +#include "FWCore/Framework/interface/ConsumesCollector.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/ESGetToken.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "FWCore/Utilities/interface/RunningAverage.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDGetToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDPutToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/EDProducer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" +#include "RecoTracker/TkMSParametrization/interface/PixelRecoUtilities.h" +#include "RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h" + +#include "CAHitNtupletGenerator.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + template + class CAHitNtupletAlpaka : public stream::EDProducer<> { + using HitsConstView = TrackingRecHitAlpakaSoAConstView; + using HitsOnDevice = TrackingRecHitAlpakaCollection; + using HitsOnHost = TrackingRecHitHost; + + using TkSoAHost = TrackSoAHost; + using TkSoADevice = TrackSoACollection; + + using Algo = CAHitNtupletGenerator; + + public: + explicit CAHitNtupletAlpaka(const edm::ParameterSet& iConfig); + ~CAHitNtupletAlpaka() override = default; + void produce(device::Event& iEvent, const device::EventSetup& es) override; + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + private: + + const edm::ESGetToken tokenField_; + const device::ESGetToken, PixelCPEFastParamsRecord> cpeToken_; + const device::EDGetToken tokenHit_; + const device::EDPutToken tokenTrack_; + + Algo deviceAlgo_; + }; + + template + CAHitNtupletAlpaka::CAHitNtupletAlpaka(const edm::ParameterSet& iConfig) + : tokenField_(esConsumes()), + cpeToken_(esConsumes(edm::ESInputTag("", iConfig.getParameter("CPE")))), + tokenHit_(consumes(iConfig.getParameter("pixelRecHitSrc"))), + tokenTrack_(produces()), + deviceAlgo_(iConfig) { + + } + + template + void CAHitNtupletAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("pixelRecHitSrc", edm::InputTag("siPixelRecHitsPreSplittingAlpaka")); + + std::string cpe = "PixelCPEFastParams"; + cpe += TrackerTraits::nameModifier; + desc.add("CPE", cpe); + + Algo::fillPSetDescription(desc); + descriptions.addWithDefaultLabel(desc); + } + + template + void CAHitNtupletAlpaka::produce(device::Event& iEvent, const device::EventSetup& es) { + auto bf = 1. / es.getData(tokenField_).inverseBzAtOriginInGeV(); + + auto& fcpe = es.getData(cpeToken_); + + auto const& hits = iEvent.get(tokenHit_); + + iEvent.emplace(tokenTrack_, deviceAlgo_.makeTuplesAsync(hits, fcpe.const_buffer().data(), bf, iEvent.queue())); + } + + using CAHitNtupletAlpakaPhase1 = CAHitNtupletAlpaka; + using CAHitNtupletAlpakaPhase2 = CAHitNtupletAlpaka; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" + +DEFINE_FWK_ALPAKA_MODULE(CAHitNtupletAlpakaPhase1); +DEFINE_FWK_ALPAKA_MODULE(CAHitNtupletAlpakaPhase2); diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc new file mode 100644 index 0000000000000..8054ecb358354 --- /dev/null +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc @@ -0,0 +1,329 @@ +// +// Original Author: Felice Pantaleo, CERN +// + +// #define GPU_DEBUG +// #define DUMP_GPU_TK_TUPLES + +#include +#include +#include +#include + +#include "DataFormats/TrackSoA/interface/alpaka/TrackSoACollection.h" +#include "DataFormats/TrackSoA/interface/TrackSoADevice.h" +#include "DataFormats/TrackSoA/interface/TrackSoAHost.h" +#include "FWCore/Framework/interface/ConsumesCollector.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ServiceRegistry/interface/Service.h" + +#include "CAHitNtupletGenerator.h" +#include "CAHitNtupletGeneratorKernels.h" +#include "CAPixelDoublets.h" +#include "CAPixelDoubletsAlgos.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace { + + using namespace caHitNtupletGenerator; + using namespace caPixelDoublets; + using namespace pixelTopology; + using namespace pixelTrack; + + template + T sqr(T x) { + return x * x; + } + + //Common Params + AlgoParams makeCommonParams(edm::ParameterSet const& cfg) { + return AlgoParams({cfg.getParameter("minHitsForSharingCut"), + cfg.getParameter("useRiemannFit"), + cfg.getParameter("fitNas4"), + cfg.getParameter("includeJumpingForwardDoublets"), + cfg.getParameter("earlyFishbone"), + cfg.getParameter("lateFishbone"), + cfg.getParameter("fillStatistics"), + cfg.getParameter("doSharedHitCut"), + cfg.getParameter("dupPassThrough"), + cfg.getParameter("useSimpleTripletCleaner")}); + } + + //This is needed to have the partial specialization for isPhase1Topology/isPhase2Topology + template + struct topologyCuts {}; + + template + struct topologyCuts> { + static constexpr CAParamsT makeCACuts(edm::ParameterSet const& cfg) { + return CAParamsT{{cfg.getParameter("maxNumberOfDoublets"), + cfg.getParameter("minHitsPerNtuplet"), + (float)cfg.getParameter("ptmin"), + (float)cfg.getParameter("CAThetaCutBarrel"), + (float)cfg.getParameter("CAThetaCutForward"), + (float)cfg.getParameter("hardCurvCut"), + (float)cfg.getParameter("dcaCutInnerTriplet"), + (float)cfg.getParameter("dcaCutOuterTriplet")}}; + }; + + static constexpr ::pixelTrack::QualityCutsT makeQualityCuts(edm::ParameterSet const& pset) { + auto coeff = pset.getParameter>("chi2Coeff"); + auto ptMax = pset.getParameter("chi2MaxPt"); + + coeff[1] = (coeff[1] - coeff[0]) / log2(ptMax); + return ::pixelTrack::QualityCutsT{// polynomial coefficients for the pT-dependent chi2 cut + {(float)coeff[0], (float)coeff[1], 0.f, 0.f}, + // max pT used to determine the chi2 cut + (float)ptMax, + // chi2 scale factor: 8 for broken line fit, ?? for Riemann fit + (float)pset.getParameter("chi2Scale"), + // regional cuts for triplets + {(float)pset.getParameter("tripletMaxTip"), + (float)pset.getParameter("tripletMinPt"), + (float)pset.getParameter("tripletMaxZip")}, + // regional cuts for quadruplets + {(float)pset.getParameter("quadrupletMaxTip"), + (float)pset.getParameter("quadrupletMinPt"), + (float)pset.getParameter("quadrupletMaxZip")}}; + } + }; + + template + struct topologyCuts> { + static constexpr CAParamsT makeCACuts(edm::ParameterSet const& cfg) { + return CAParamsT{{cfg.getParameter("maxNumberOfDoublets"), + cfg.getParameter("minHitsPerNtuplet"), + (float)cfg.getParameter("ptmin"), + (float)cfg.getParameter("CAThetaCutBarrel"), + (float)cfg.getParameter("CAThetaCutForward"), + (float)cfg.getParameter("hardCurvCut"), + (float)cfg.getParameter("dcaCutInnerTriplet"), + (float)cfg.getParameter("dcaCutOuterTriplet")}, + {(bool)cfg.getParameter("includeFarForwards")}}; + } + + static constexpr ::pixelTrack::QualityCutsT makeQualityCuts(edm::ParameterSet const& pset) { + return ::pixelTrack::QualityCutsT{ + static_cast(pset.getParameter("maxChi2")), + static_cast(pset.getParameter("minPt")), + static_cast(pset.getParameter("maxTip")), + static_cast(pset.getParameter("maxZip")), + }; + } + }; + + //Cell Cuts, as they are the cuts have the same logic for Phase2 and Phase1 + //keeping them separate would allow further differentiation in the future + //moving them to topologyCuts and using the same syntax + template + CellCutsT makeCellCuts(edm::ParameterSet const& cfg) { + return CellCutsT{cfg.getParameter("doClusterCut"), + cfg.getParameter("doZ0Cut"), + cfg.getParameter("doPtCut"), + cfg.getParameter("idealConditions"), + (float)cfg.getParameter("cellZ0Cut"), + (float)cfg.getParameter("cellPtCut"), + cfg.getParameter>("phiCuts")}; + } + + } // namespace + + using namespace std; + + template + CAHitNtupletGenerator::CAHitNtupletGenerator(const edm::ParameterSet& cfg) + : m_params(makeCommonParams(cfg), + makeCellCuts(cfg), + topologyCuts::makeQualityCuts(cfg.getParameterSet("trackQualityCuts")), + topologyCuts::makeCACuts(cfg)) { +#ifdef DUMP_GPU_TK_TUPLES + printf("TK: %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n", + "tid", + "qual", + "nh", + "nl", + "charge", + "pt", + "eta", + "phi", + "tip", + "zip", + "chi2", + "h1", + "h2", + "h3", + "h4", + "h5", + "hn"); +#endif + } + + template + void CAHitNtupletGenerator::fillPSetDescription(edm::ParameterSetDescription& desc) { + fillDescriptionsCommon(desc); + edm::LogWarning("CAHitNtupletGenerator::fillPSetDescription") + << "Note: this fillPSetDescription is a dummy one. Most probably you are missing some parameters. \n" + "please implement your TrackerTraits descriptions in CAHitNtupletGenerator. \n"; + } + + template <> + void CAHitNtupletGenerator::fillPSetDescription(edm::ParameterSetDescription& desc) { + fillDescriptionsCommon(desc); + + desc.add("idealConditions", true); + desc.add("includeJumpingForwardDoublets", false); + desc.add("cellZ0Cut", 12.0); + desc.add("cellPtCut", 0.5); + + edm::ParameterSetDescription trackQualityCuts; + trackQualityCuts.add("chi2MaxPt", 10.)->setComment("max pT used to determine the pT-dependent chi2 cut"); + trackQualityCuts.add>("chi2Coeff", {0.9, 1.8})->setComment("chi2 at 1GeV and at ptMax above"); + trackQualityCuts.add("chi2Scale", 8.) + ->setComment( + "Factor to multiply the pT-dependent chi2 cut (currently: 8 for the broken line fit, ?? for the Riemann " + "fit)"); + trackQualityCuts.add("tripletMinPt", 0.5)->setComment("Min pT for triplets, in GeV"); + trackQualityCuts.add("tripletMaxTip", 0.3)->setComment("Max |Tip| for triplets, in cm"); + trackQualityCuts.add("tripletMaxZip", 12.)->setComment("Max |Zip| for triplets, in cm"); + trackQualityCuts.add("quadrupletMinPt", 0.3)->setComment("Min pT for quadruplets, in GeV"); + trackQualityCuts.add("quadrupletMaxTip", 0.5)->setComment("Max |Tip| for quadruplets, in cm"); + trackQualityCuts.add("quadrupletMaxZip", 12.)->setComment("Max |Zip| for quadruplets, in cm"); + desc.add("trackQualityCuts", trackQualityCuts) + ->setComment( + "Quality cuts based on the results of the track fit:\n - apply a pT-dependent chi2 cut;\n - apply " + "\"region " + "cuts\" based on the fit results (pT, Tip, Zip)."); + + desc.add>( + "phiCuts", std::vector(std::begin(phase1PixelTopology::phicuts), std::end(phase1PixelTopology::phicuts))) + ->setComment("Cuts in phi for cells"); + } + + template <> + void CAHitNtupletGenerator::fillPSetDescription(edm::ParameterSetDescription& desc) { + fillDescriptionsCommon(desc); + + desc.add("idealConditions", false); + desc.add("includeJumpingForwardDoublets", false); + desc.add("cellZ0Cut", 10.0); + desc.add("cellPtCut", 0.0); + + edm::ParameterSetDescription trackQualityCuts; + trackQualityCuts.add("chi2MaxPt", 10.)->setComment("max pT used to determine the pT-dependent chi2 cut"); + trackQualityCuts.add>("chi2Coeff", {0.9, 1.8})->setComment("chi2 at 1GeV and at ptMax above"); + trackQualityCuts.add("chi2Scale", 8.) + ->setComment( + "Factor to multiply the pT-dependent chi2 cut (currently: 8 for the broken line fit, ?? for the Riemann " + "fit)"); + trackQualityCuts.add("tripletMinPt", 0.0)->setComment("Min pT for triplets, in GeV"); + trackQualityCuts.add("tripletMaxTip", 0.1)->setComment("Max |Tip| for triplets, in cm"); + trackQualityCuts.add("tripletMaxZip", 6.)->setComment("Max |Zip| for triplets, in cm"); + trackQualityCuts.add("quadrupletMinPt", 0.0)->setComment("Min pT for quadruplets, in GeV"); + trackQualityCuts.add("quadrupletMaxTip", 0.5)->setComment("Max |Tip| for quadruplets, in cm"); + trackQualityCuts.add("quadrupletMaxZip", 6.)->setComment("Max |Zip| for quadruplets, in cm"); + + desc.add("trackQualityCuts", trackQualityCuts) + ->setComment( + "Quality cuts based on the results of the track fit:\n - apply a pT-dependent chi2 cut;\n - apply " + "\"region " + "cuts\" based on the fit results (pT, Tip, Zip)."); + + + desc.add>( + "phiCuts", std::vector(std::begin(phase1PixelTopology::phicuts), std::end(phase1PixelTopology::phicuts))) + ->setComment("Cuts in phi for cells"); + + } + + template <> + void CAHitNtupletGenerator::fillPSetDescription(edm::ParameterSetDescription& desc) { + fillDescriptionsCommon(desc); + + desc.add("idealConditions", false); + desc.add("includeFarForwards", true); + desc.add("includeJumpingForwardDoublets", true); + desc.add("cellZ0Cut", 7.5); + desc.add("cellPtCut", 0.85); + + edm::ParameterSetDescription trackQualityCuts; + trackQualityCuts.add("maxChi2", 5.)->setComment("Max normalized chi2"); + trackQualityCuts.add("minPt", 0.5)->setComment("Min pT in GeV"); + trackQualityCuts.add("maxTip", 0.3)->setComment("Max |Tip| in cm"); + trackQualityCuts.add("maxZip", 12.)->setComment("Max |Zip|, in cm"); + desc.add("trackQualityCuts", trackQualityCuts) + ->setComment( + "Quality cuts based on the results of the track fit:\n - apply cuts based on the fit results (pT, Tip, " + "Zip)."); + + desc.add>( + "phiCuts", std::vector(std::begin(phase2PixelTopology::phicuts), std::end(phase2PixelTopology::phicuts))) + ->setComment("Cuts in phi for cells"); + } + + template + void CAHitNtupletGenerator::fillDescriptionsCommon(edm::ParameterSetDescription& desc) { + // 87 cm/GeV = 1/(3.8T * 0.3) + // take less than radius given by the hardPtCut and reject everything below + // auto hardCurvCut = 1.f/(0.35 * 87.f); + desc.add("ptmin", 0.9f)->setComment("Cut on minimum pt"); + desc.add("CAThetaCutBarrel", 0.002f)->setComment("Cut on RZ alignement for Barrel"); + desc.add("CAThetaCutForward", 0.003f)->setComment("Cut on RZ alignment for Forward"); + desc.add("hardCurvCut", 1.f / (0.35 * 87.f))->setComment("Cut on minimum curvature, used in DCA ntuplet selection"); + desc.add("dcaCutInnerTriplet", 0.15f)->setComment("Cut on origin radius when the inner hit is on BPix1"); + desc.add("dcaCutOuterTriplet", 0.25f)->setComment("Cut on origin radius when the outer hit is on BPix1"); + desc.add("earlyFishbone", true); + desc.add("lateFishbone", false); + desc.add("fillStatistics", false); + desc.add("minHitsPerNtuplet", 4); + desc.add("maxNumberOfDoublets", TrackerTraits::maxNumberOfDoublets); + desc.add("minHitsForSharingCut", 10) + ->setComment("Maximum number of hits in a tuple to clean also if the shared hit is on bpx1"); + + desc.add("fitNas4", false)->setComment("fit only 4 hits out of N"); + desc.add("doClusterCut", true); + desc.add("doZ0Cut", true); + desc.add("doPtCut", true); + desc.add("useRiemannFit", false)->setComment("true for Riemann, false for BrokenLine"); + desc.add("doSharedHitCut", true)->setComment("Sharing hit nTuples cleaning"); + desc.add("dupPassThrough", false)->setComment("Do not reject duplicate"); + desc.add("useSimpleTripletCleaner", true)->setComment("use alternate implementation"); + } + + template + TrackSoACollection CAHitNtupletGenerator::makeTuplesAsync( + HitsOnDevice const& hits_d, ParamsOnDevice const* cpeParams, float bfield, Queue& queue) const { + using HelixFit = HelixFit; + using TrackSoA = TrackSoACollection; + using GPUKernels = CAHitNtupletGeneratorKernels; + + TrackSoA tracks(queue); + + GPUKernels kernels(m_params, hits_d.view().metadata().size(), queue); + + kernels.buildDoublets(hits_d.view(), queue); + kernels.launchKernels(hits_d.view(), tracks.view(), queue); + + HelixFit fitter(bfield, m_params.fitNas4_); + fitter.allocate(kernels.tupleMultiplicity(), tracks.view()); + if (m_params.useRiemannFit_) { + fitter.launchRiemannKernels( + hits_d.view(), cpeParams, hits_d.view().metadata().size(), TrackerTraits::maxNumberOfQuadruplets, queue); + } else { + fitter.launchBrokenLineKernels( + hits_d.view(), cpeParams, hits_d.view().metadata().size(), TrackerTraits::maxNumberOfQuadruplets, queue); + } + kernels.classifyTuples(hits_d.view(), tracks.view(), queue); +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "finished building pixel tracks on GPU" << std::endl; +#endif + + return tracks; + } + + template class CAHitNtupletGenerator; + template class CAHitNtupletGenerator; + template class CAHitNtupletGenerator; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.h new file mode 100644 index 0000000000000..e13df91bccb35 --- /dev/null +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.h @@ -0,0 +1,84 @@ +#ifndef RecoPixelVertexing_PixelTriplets_Alpaka_CAHitNtupletGenerator_h +#define RecoPixelVertexing_PixelTriplets_Alpaka_CAHitNtupletGenerator_h + +#include + +#include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h" +#include "DataFormats/TrackSoA/interface/TrackDefinitions.h" +#include "DataFormats/TrackSoA/interface/TrackSoAHost.h" +#include "DataFormats/TrackSoA/interface/alpaka/TrackSoACollection.h" +#include "DataFormats/TrackSoA/interface/TrackSoADevice.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitSoACollection.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" + +#include "CAHitNtupletGeneratorKernels.h" +#include "CACell.h" +#include "HelixFit.h" + +namespace edm { + class ParameterSetDescription; +} // namespace edm + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + template + class CAHitNtupletGenerator { + public: + using HitsView = TrackingRecHitAlpakaSoAView; + using HitsConstView = TrackingRecHitAlpakaSoAConstView; + using HitsOnDevice = TrackingRecHitAlpakaCollection; + using HitsOnHost = TrackingRecHitHost; + using hindex_type = typename TrackingRecHitAlpakaSoA::hindex_type; + + using HitToTuple = caStructures::HitToTupleT; + using TupleMultiplicity = caStructures::TupleMultiplicityT; + using OuterHitOfCell = caStructures::OuterHitOfCellT; + + using CACell = CACellT; + using TkSoAHost = TrackSoAHost; + using TkSoADevice = TrackSoACollection; + using HitContainer = typename TrackSoA::HitContainer; + using Tuple = HitContainer; + + using CellNeighborsVector = caStructures::CellNeighborsVectorT; + using CellTracksVector = caStructures::CellTracksVectorT; + + using Quality = ::pixelTrack::Quality; + + using QualityCuts = ::pixelTrack::QualityCutsT; + using Params = caHitNtupletGenerator::ParamsT; + using Counters = caHitNtupletGenerator::Counters; + + using ParamsOnDevice = pixelCPEforDevice::ParamsOnDeviceT; + + public: + CAHitNtupletGenerator(const edm::ParameterSet& cfg); + + static void fillPSetDescription(edm::ParameterSetDescription& desc); + static void fillDescriptionsCommon(edm::ParameterSetDescription& desc); + + // TODO: Check if still needed + // void beginJob(); + // void endJob(); + + TkSoADevice makeTuplesAsync(HitsOnDevice const& hits_d, + ParamsOnDevice const* cpeParams, + float bfield, + Queue& queue) const; + + private: + void buildDoublets(const HitsConstView& hh, Queue& queue) const; + + void hitNtuplets(const HitsConstView& hh, const edm::EventSetup& es, bool useRiemannFit, Queue& queue); + + void launchKernels(const HitsConstView& hh, bool useRiemannFit, Queue& queue) const; + + Params m_params; + + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif // RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGenerator_h diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc new file mode 100644 index 0000000000000..daca899740fe0 --- /dev/null +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc @@ -0,0 +1,480 @@ +#include +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" +#include "CAHitNtupletGeneratorKernels.h" +#include "CAHitNtupletGeneratorKernelsImpl.h" +#ifdef DUMP_GPU_TK_TUPLES +#include +#endif + +// #define NTUPLE_DEBUG +// #define GPU_DEBUG + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + template + void CAHitNtupletGeneratorKernels::launchKernels(const HitsConstView &hh, + TkSoAView &tracks_view, + Queue &queue) { + using namespace caPixelDoublets; + using namespace caHitNtupletGeneratorKernels; + + // zero tuples + cms::alpakatools::launchZero(&(tracks_view.hitIndices()), queue); + + int32_t nhits = hh.metadata().size(); + +#ifdef NTUPLE_DEBUG + std::cout << "start tuple building. N hits " << nhits << std::endl; + if (nhits < 2) + std::cout << "too few hits " << nhits << std::endl; +#endif + + // + // applying conbinatoric cleaning such as fishbone at this stage is too expensive + // + + const auto nthTot = 64; + const auto stride = 4; + auto blockSize = nthTot / stride; + auto numberOfBlocks = cms::alpakatools::divide_up_by(3 * m_params.caParams_.maxNumberOfDoublets_ / 4, blockSize); + const auto rescale = numberOfBlocks / 65536; + blockSize *= (rescale + 1); + numberOfBlocks = cms::alpakatools::divide_up_by(3 * m_params.caParams_.maxNumberOfDoublets_ / 4, blockSize); + ALPAKA_ASSERT_OFFLOAD(numberOfBlocks < 65536); + ALPAKA_ASSERT_OFFLOAD(blockSize > 0 && 0 == blockSize % 16); + const Vec2D blks{numberOfBlocks, 1u}; + const Vec2D thrs{blockSize, stride}; + const auto kernelConnectWorkDiv = cms::alpakatools::make_workdiv(blks, thrs); + + alpaka::exec(queue, + kernelConnectWorkDiv, + kernel_connect{}, + this->device_hitTuple_apc_, + this->device_hitToTuple_apc_, // needed only to be reset, ready for next kernel + hh, + this->device_theCells_.data(), + this->device_nCells_.data(), + this->device_theCellNeighbors_.data(), + this->isOuterHitOfCell_.data(), + this->m_params.caParams_); + + // do not run the fishbone if there are hits only in BPIX1 + if (this->m_params.earlyFishbone_) { + const auto nthTot = 128; + const auto stride = 16; + const auto blockSize = nthTot / stride; + const auto numberOfBlocks = cms::alpakatools::divide_up_by(nhits, blockSize); + const Vec2D blks{numberOfBlocks, 1u}; + const Vec2D thrs{blockSize, stride}; + const auto fishboneWorkDiv = cms::alpakatools::make_workdiv(blks, thrs); + alpaka::exec(queue, + fishboneWorkDiv, + CAFishbone{}, + hh, + this->device_theCells_.data(), + this->device_nCells_.data(), + this->isOuterHitOfCell_.data(), + nhits, + false); + } + blockSize = 64; + numberOfBlocks = cms::alpakatools::divide_up_by(3 * m_params.caParams_.maxNumberOfDoublets_ / 4, blockSize); + auto workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + alpaka::exec(queue, + workDiv1D, + kernel_find_ntuplets{}, + hh, + tracks_view, + this->device_theCells_.data(), + this->device_nCells_.data(), + this->device_theCellTracks_.data(), + this->device_hitTuple_apc_, + this->m_params.caParams_); +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + + if (this->m_params.doStats_) + alpaka::exec(queue, + workDiv1D, + kernel_mark_used{}, + this->device_theCells_.data(), + this->device_nCells_.data()); + +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + + blockSize = 128; + numberOfBlocks = cms::alpakatools::divide_up_by(HitContainer::totbins(), blockSize); + workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + + alpaka::exec(queue, + workDiv1D, + cms::alpakatools::finalizeBulk{}, + this->device_hitTuple_apc_, + &tracks_view.hitIndices()); + +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + + alpaka::exec(queue, workDiv1D, kernel_fillHitDetIndices{}, tracks_view, hh); + +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + alpaka::exec(queue, workDiv1D, kernel_fillNLayers{}, tracks_view, this->device_hitTuple_apc_); + +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + + // remove duplicates (tracks that share a doublet) + numberOfBlocks = cms::alpakatools::divide_up_by(3 * m_params.caParams_.maxNumberOfDoublets_ / 4, blockSize); + workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + + alpaka::exec(queue, + workDiv1D, + kernel_earlyDuplicateRemover{}, + this->device_theCells_.data(), + this->device_nCells_.data(), + tracks_view, + this->m_params.dupPassThrough_); +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + + blockSize = 128; + numberOfBlocks = cms::alpakatools::divide_up_by(3 * TrackerTraits::maxNumberOfTuples / 4, blockSize); + workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + + alpaka::exec(queue, + workDiv1D, + kernel_countMultiplicity{}, + tracks_view, + this->device_tupleMultiplicity_.data()); + cms::alpakatools::launchFinalize(this->device_tupleMultiplicity_.data(), queue); + + workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + alpaka::exec( + queue, workDiv1D, kernel_fillMultiplicity{}, tracks_view, this->device_tupleMultiplicity_.data()); +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + // do not run the fishbone if there are hits only in BPIX1 + if (this->m_params.lateFishbone_) { + const auto nthTot = 128; + const auto stride = 16; + const auto blockSize = nthTot / stride; + const auto numberOfBlocks = cms::alpakatools::divide_up_by(nhits, blockSize); + const Vec2D blks{numberOfBlocks, 1u}; + const Vec2D thrs{blockSize, stride}; + const auto workDiv2D = cms::alpakatools::make_workdiv(blks, thrs); + + alpaka::exec(queue, + workDiv2D, + CAFishbone{}, + hh, + this->device_theCells_.data(), + this->device_nCells_.data(), + this->isOuterHitOfCell_.data(), + nhits, + true); + } + +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + } + + template + void CAHitNtupletGeneratorKernels::buildDoublets(const HitsConstView &hh, Queue &queue) { + auto nhits = hh.metadata().size(); + + using namespace caPixelDoublets; + + using CACell = CACellT; + using OuterHitOfCell = typename CACell::OuterHitOfCell; + using CellNeighbors = typename CACell::CellNeighbors; + using CellTracks = typename CACell::CellTracks; + using OuterHitOfCellContainer = typename CACell::OuterHitOfCellContainer; + +#ifdef NTUPLE_DEBUG + std::cout << "building Doublets out of " << nhits << " Hits" << std::endl; +#endif + +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + + // in principle we can use "nhits" to heuristically dimension the workspace... + ALPAKA_ASSERT_OFFLOAD(this->device_isOuterHitOfCell_.data()); + + alpaka::exec( + queue, + cms::alpakatools::make_workdiv(1, 1), + [] ALPAKA_FN_ACC(Acc1D const &acc, + OuterHitOfCell *isOuterHitOfCell, + OuterHitOfCellContainer *container, + int32_t const *offset) { + // this code runs on the device + isOuterHitOfCell->container = container; + isOuterHitOfCell->offset = *offset; + }, + this->isOuterHitOfCell_.data(), + this->device_isOuterHitOfCell_.data(), + &hh.offsetBPIX2()); + + { + int threadsPerBlock = 128; + // at least one block! + int blocks = std::max(1u, cms::alpakatools::divide_up_by(nhits, threadsPerBlock)); + const auto workDiv1D = cms::alpakatools::make_workdiv(blocks, threadsPerBlock); + + alpaka::exec(queue, + workDiv1D, + initDoublets{}, + this->isOuterHitOfCell_.data(), + nhits, + this->device_theCellNeighbors_.data(), + this->device_theCellNeighborsContainer_, + this->device_theCellTracks_.data(), + this->device_theCellTracksContainer_); + } + +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + + if (0 == nhits) + return; // protect against empty events + + // take all layer pairs into account + auto nActualPairs = this->m_params.nPairs(); + + const int stride = 4; + const int threadsPerBlock = TrackerTraits::getDoubletsFromHistoMaxBlockSize / stride; + int blocks = (4 * nhits + threadsPerBlock - 1) / threadsPerBlock; + const Vec2D blks{blocks, 1u}; + const Vec2D thrs{threadsPerBlock, stride}; + const auto workDiv2D = cms::alpakatools::make_workdiv(blks, thrs); + + alpaka::exec(queue, + workDiv2D, + getDoubletsFromHisto{}, + this->device_theCells_.data(), + this->device_nCells_.data(), + this->device_theCellNeighbors_.data(), + this->device_theCellTracks_.data(), + hh, + this->isOuterHitOfCell_.data(), + nActualPairs, + this->m_params.caParams_.maxNumberOfDoublets_, + this->m_params.cellCuts_); + +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + } + + template + void CAHitNtupletGeneratorKernels::classifyTuples(const HitsConstView &hh, + TkSoAView &tracks_view, + Queue &queue) { + using namespace caHitNtupletGeneratorKernels; + + uint32_t nhits = hh.metadata().size(); + + auto blockSize = 64; + + // classify tracks based on kinematics + auto numberOfBlocks = cms::alpakatools::divide_up_by(3 * TrackerTraits::maxNumberOfQuadruplets / 4, blockSize); + auto workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + alpaka::exec( + queue, workDiv1D, kernel_classifyTracks{}, tracks_view, this->m_params.qualityCuts_); + + if (this->m_params.lateFishbone_) { + // apply fishbone cleaning to good tracks + numberOfBlocks = cms::alpakatools::divide_up_by(3 * m_params.caParams_.maxNumberOfDoublets_ / 4, blockSize); + workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + alpaka::exec(queue, + workDiv1D, + kernel_fishboneCleaner{}, + this->device_theCells_.data(), + this->device_nCells_.data(), + tracks_view); + } + + // mark duplicates (tracks that share a doublet) + numberOfBlocks = cms::alpakatools::divide_up_by(3 * m_params.caParams_.maxNumberOfDoublets_ / 4, blockSize); + workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + alpaka::exec(queue, + workDiv1D, + kernel_fastDuplicateRemover{}, + this->device_theCells_.data(), + this->device_nCells_.data(), + tracks_view, + this->m_params.dupPassThrough_); +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + + if (this->m_params.doSharedHitCut_ || this->m_params.doStats_) { + // fill hit->track "map" + numberOfBlocks = cms::alpakatools::divide_up_by(3 * TrackerTraits::maxNumberOfQuadruplets / 4, blockSize); + workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + alpaka::exec(queue, + workDiv1D, + kernel_countHitInTracks{}, + tracks_view, + this->device_hitToTuple_.data()); //CHECK + + cms::alpakatools::launchFinalize(this->device_hitToTuple_.data(), queue); + alpaka::exec( + queue, workDiv1D, kernel_fillHitInTracks{}, tracks_view, this->device_hitToTuple_.data()); +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + } + + if (this->m_params.doSharedHitCut_) { + // mark duplicates (tracks that share at least one hit) + numberOfBlocks = cms::alpakatools::divide_up_by(3 * TrackerTraits::maxNumberOfQuadruplets / 4, + blockSize); // TODO: Check if correct + workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + alpaka::exec(queue, + workDiv1D, + kernel_rejectDuplicate{}, + tracks_view, + this->m_params.minHitsForSharingCut_, + this->m_params.dupPassThrough_, + this->device_hitToTuple_.data()); + + alpaka::exec(queue, + workDiv1D, + kernel_sharedHitCleaner{}, + hh, + tracks_view, + this->m_params.minHitsForSharingCut_, + this->m_params.dupPassThrough_, + this->device_hitToTuple_.data()); + + if (this->m_params.useSimpleTripletCleaner_) { + numberOfBlocks = cms::alpakatools::divide_up_by(HitToTuple::capacity(), blockSize); + workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + alpaka::exec(queue, + workDiv1D, + kernel_simpleTripletCleaner{}, + tracks_view, + this->m_params.minHitsForSharingCut_, + this->m_params.dupPassThrough_, + this->device_hitToTuple_.data()); + } else { + numberOfBlocks = cms::alpakatools::divide_up_by(HitToTuple::capacity(), blockSize); + workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + alpaka::exec(queue, + workDiv1D, + kernel_tripletCleaner{}, + tracks_view, + this->m_params.minHitsForSharingCut_, + this->m_params.dupPassThrough_, + this->device_hitToTuple_.data()); + } +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + } + + if (this->m_params.doStats_) { + numberOfBlocks = + cms::alpakatools::divide_up_by(std::max(nhits, m_params.caParams_.maxNumberOfDoublets_), blockSize); + workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + + alpaka::exec(queue, + workDiv1D, + kernel_checkOverflows{}, + tracks_view, + this->device_tupleMultiplicity_.data(), + this->device_hitToTuple_.data(), + this->device_hitTuple_apc_, + this->device_theCells_.data(), + this->device_nCells_.data(), + this->device_theCellNeighbors_.data(), + this->device_theCellTracks_.data(), + this->isOuterHitOfCell_.data(), + nhits, + this->m_params.caParams_.maxNumberOfDoublets_, + this->counters_.data()); + } + + if (this->m_params.doStats_) { + // counters (add flag???) + + numberOfBlocks = cms::alpakatools::divide_up_by(HitToTuple::capacity(), blockSize); + workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + alpaka::exec(queue, + workDiv1D, + kernel_doStatsForHitInTracks{}, + this->device_hitToTuple_.data(), + this->counters_.data()); + + numberOfBlocks = cms::alpakatools::divide_up_by(3 * TrackerTraits::maxNumberOfQuadruplets / 4, blockSize); + workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + alpaka::exec( + queue, workDiv1D, kernel_doStatsForTracks{}, tracks_view, this->counters_.data()); + } +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + +#ifdef DUMP_GPU_TK_TUPLES + static std::atomic iev(0); + static std::mutex lock; + workDiv1D = cms::alpakatools::make_workdiv(1u, 32u); + { + std::lock_guard guard(lock); + ++iev; + for (int k = 0; k < 20000; k += 500) { + alpaka::exec(queue, + workDiv1D, + kernel_print_found_ntuplets{}, + hh, + tracks_view, + this->device_hitToTuple_.data(), + k, + k + 500, + iev); + alpaka::wait(queue); + } + alpaka::exec(queue, + workDiv1D, + kernel_print_found_ntuplets{}, + hh, + tracks_view, + this->device_hitToTuple_.data(), + 20000, + 1000000, + iev); + + alpaka::wait(queue); + } +#endif + } +/* +template +void CAHitNtupletGeneratorKernels::printCounters() { + auto workDiv1D = cms::alpakatools::make_workdiv(1,1); + alpaka::exec(queue_,workDiv1D,kernel_printCounters{},this->counters_.data()); +} +*/ + template class CAHitNtupletGeneratorKernels; + template class CAHitNtupletGeneratorKernels; + template class CAHitNtupletGeneratorKernels; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h new file mode 100644 index 0000000000000..0d4adb57155fc --- /dev/null +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h @@ -0,0 +1,315 @@ +#ifndef RecoPixelVertexing_PixelTriplets_CAHitNtupletGeneratorKernels_h +#define RecoPixelVertexing_PixelTriplets_CAHitNtupletGeneratorKernels_h + +// #define GPU_DEBUG +#include +#include +#include "CACell.h" +#include "CAPixelDoublets.h" +#include "CAStructures.h" + +#include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h" +#include "DataFormats/TrackSoA/interface/TrackDefinitions.h" +#include "DataFormats/TrackSoA/interface/TrackSoAHost.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h" +#include "HeterogeneousCore/AlpakaInterface/interface/AtomicPairCounter.h" +#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +// #define DUMP_GPU_TK_TUPLES + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace caHitNtupletGenerator { + + //Configuration params common to all topologies, for the algorithms + struct AlgoParams { + const uint32_t minHitsForSharingCut_; + const bool useRiemannFit_; + const bool fitNas4_; + const bool includeJumpingForwardDoublets_; + const bool earlyFishbone_; + const bool lateFishbone_; + const bool doStats_; + const bool doSharedHitCut_; + const bool dupPassThrough_; + const bool useSimpleTripletCleaner_; + }; + + //CAParams + struct CACommon { + const uint32_t maxNumberOfDoublets_; + const uint32_t minHitsPerNtuplet_; + const float ptmin_; + const float CAThetaCutBarrel_; + const float CAThetaCutForward_; + const float hardCurvCut_; + const float dcaCutInnerTriplet_; + const float dcaCutOuterTriplet_; + }; + + template + struct CAParamsT : public CACommon { + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool startingLayerPair(int16_t pid) const { return false; }; + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool startAt0(int16_t pid) const { return false; }; + }; + + template + struct CAParamsT> : public CACommon { + /// Is is a starting layer pair? + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool startingLayerPair(int16_t pid) const { + return minHitsPerNtuplet_ > 3 ? pid < 3 : pid < 8 || pid > 12; + } + + /// Is this a pair with inner == 0? + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool startAt0(int16_t pid) const { + assert((pixelTopology::Phase1::layerPairs[pid * 2] == 0) == + (pid < 3 || pid == 13 || pid == 15 || pid == 16)); // to be 100% sure it's working, may be removed + return pixelTopology::Phase1::layerPairs[pid * 2] == 0; + } + }; + + template + struct CAParamsT> : public CACommon { + const bool includeFarForwards_; + /// Is is a starting layer pair? + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool startingLayerPair(int16_t pid) const { + return pid < 33; // in principle one could remove 5,6,7 23, 28 and 29 + } + + /// Is this a pair with inner == 0 + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool startAt0(int16_t pid) const { + assert((pixelTopology::Phase2::layerPairs[pid * 2] == 0) == ((pid < 3) | (pid >= 23 && pid < 28))); + return pixelTopology::Phase2::layerPairs[pid * 2] == 0; + } + }; + + //Full list of params = algo params + ca params + cell params + quality cuts + //Generic template + template + struct ParamsT : public AlgoParams { + // one should define the params for its own pixelTopology + // not defining anything here + inline uint32_t nPairs() const { return 0; } + }; + + template + struct ParamsT> : public AlgoParams { + using TT = TrackerTraits; + using QualityCuts = ::pixelTrack::QualityCutsT; //track quality cuts + using CellCuts = caPixelDoublets::CellCutsT; //cell building cuts + using CAParams = CAParamsT; //params to be used on device + + ParamsT(AlgoParams const& commonCuts, + CellCuts const& cellCuts, + QualityCuts const& cutsCuts, + CAParams const& caParams) + : AlgoParams(commonCuts), cellCuts_(cellCuts), qualityCuts_(cutsCuts), caParams_(caParams) {} + + const CellCuts cellCuts_; + const QualityCuts qualityCuts_{// polynomial coefficients for the pT-dependent chi2 cut + {0.68177776, 0.74609577, -0.08035491, 0.00315399}, + // max pT used to determine the chi2 cut + 10., + // chi2 scale factor: 30 for broken line fit, 45 for Riemann fit + 30., + // regional cuts for triplets + { + 0.3, // |Tip| < 0.3 cm + 0.5, // pT > 0.5 GeV + 12.0 // |Zip| < 12.0 cm + }, + // regional cuts for quadruplets + { + 0.5, // |Tip| < 0.5 cm + 0.3, // pT > 0.3 GeV + 12.0 // |Zip| < 12.0 cm + }}; + const CAParams caParams_; + /// Compute the number of pairs + inline uint32_t nPairs() const { + // take all layer pairs into account + uint32_t nActualPairs = TT::nPairs; + if (not includeJumpingForwardDoublets_) { + // exclude forward "jumping" layer pairs + nActualPairs = TT::nPairsForTriplets; + } + if (caParams_.minHitsPerNtuplet_ > 3) { + // for quadruplets, exclude all "jumping" layer pairs + nActualPairs = TT::nPairsForQuadruplets; + } + + return nActualPairs; + } + + }; // Params Phase1 + + template + struct ParamsT> : public AlgoParams { + using TT = TrackerTraits; + using QualityCuts = ::pixelTrack::QualityCutsT; + using CellCuts = caPixelDoublets::CellCutsT; + using CAParams = CAParamsT; + + ParamsT(AlgoParams const& commonCuts, + CellCuts const& cellCuts, + QualityCuts const& qualityCuts, + CAParams const& caParams) + : AlgoParams(commonCuts), cellCuts_(cellCuts), qualityCuts_(qualityCuts), caParams_(caParams) {} + + // quality cuts + const CellCuts cellCuts_; + const QualityCuts qualityCuts_{5.0f, /*chi2*/ 0.9f, /* pT in Gev*/ 0.4f, /*zip in cm*/ 12.0f /*tip in cm*/}; + const CAParams caParams_; + + inline uint32_t nPairs() const { + // take all layer pairs into account + uint32_t nActualPairs = TT::nPairsMinimal; + if (caParams_.includeFarForwards_) { + // considera far forwards (> 11 & > 23) + nActualPairs = TT::nPairsFarForwards; + } + if (includeJumpingForwardDoublets_) { + // include jumping forwards + nActualPairs = TT::nPairs; + } + + return nActualPairs; + } + + }; // Params Phase1 + + // counters + struct Counters { + unsigned long long nEvents; + unsigned long long nHits; + unsigned long long nCells; + unsigned long long nTuples; + unsigned long long nFitTracks; + unsigned long long nLooseTracks; + unsigned long long nGoodTracks; + unsigned long long nUsedHits; + unsigned long long nDupHits; + unsigned long long nFishCells; + unsigned long long nKilledCells; + unsigned long long nEmptyCells; + unsigned long long nZeroTrackCells; + }; + + using Quality = ::pixelTrack::Quality; + + } // namespace caHitNtupletGenerator + + template + class CAHitNtupletGeneratorKernels { + public: + using TrackerTraits = TTTraits; + using QualityCuts = ::pixelTrack::QualityCutsT; + using CellCuts = caPixelDoublets::CellCutsT; + using Params = caHitNtupletGenerator::ParamsT; + using CAParams = caHitNtupletGenerator::CAParamsT; + using Counters = caHitNtupletGenerator::Counters; + + using HitsView = TrackingRecHitAlpakaSoAView; + using HitsConstView = TrackingRecHitAlpakaSoAConstView; + using TkSoAView = TrackSoAView; + + using HitToTuple = caStructures::HitToTupleT; + using TupleMultiplicity = caStructures::TupleMultiplicityT; + using CellNeighborsVector = caStructures::CellNeighborsVectorT; + using CellNeighbors = caStructures::CellNeighborsT; + using CellTracksVector = caStructures::CellTracksVectorT; + using CellTracks = caStructures::CellTracksT; + using OuterHitOfCellContainer = caStructures::OuterHitOfCellContainerT; + using OuterHitOfCell = caStructures::OuterHitOfCellT; + + using CACell = CACellT; + + using Quality = ::pixelTrack::Quality; + using HitContainer = typename TrackSoA::HitContainer; + + CAHitNtupletGeneratorKernels(Params const& params, uint32_t nhits, Queue& queue) + : m_params(params), + ////////////////////////////////////////////////////////// + // ALLOCATIONS FOR THE INTERMEDIATE RESULTS (STAYS ON WORKER) + ////////////////////////////////////////////////////////// + counters_{cms::alpakatools::make_device_buffer(queue)}, + + // workspace + device_hitToTuple_{cms::alpakatools::make_device_buffer(queue)}, + device_tupleMultiplicity_{cms::alpakatools::make_device_buffer(queue)}, + + // NB: In legacy, device_theCells_ and device_isOuterHitOfCell_ were allocated inside buildDoublets + device_theCells_{ + cms::alpakatools::make_device_buffer(queue, m_params.caParams_.maxNumberOfDoublets_)}, + // in principle we can use "nhits" to heuristically dimension the workspace... + device_isOuterHitOfCell_{ + cms::alpakatools::make_device_buffer(queue, std::max(1u, nhits))}, + isOuterHitOfCell_{cms::alpakatools::make_device_buffer(queue)}, + + device_theCellNeighbors_{cms::alpakatools::make_device_buffer(queue)}, + device_theCellTracks_{cms::alpakatools::make_device_buffer(queue)}, + // NB: In legacy, cellStorage_ was allocated inside buildDoublets + cellStorage_{cms::alpakatools::make_device_buffer( + queue, + TrackerTraits::maxNumOfActiveDoublets * sizeof(CellNeighbors) + + TrackerTraits::maxNumOfActiveDoublets * sizeof(CellTracks))}, + device_cellCuts_{cms::alpakatools::make_device_buffer(queue)}, + device_theCellNeighborsContainer_{reinterpret_cast(cellStorage_.data())}, + device_theCellTracksContainer_{reinterpret_cast( + cellStorage_.data() + TrackerTraits::maxNumOfActiveDoublets * sizeof(CellNeighbors))}, + + // NB: In legacy, device_storage_ was allocated inside allocateOnGPU + device_storage_{ + cms::alpakatools::make_device_buffer(queue, 3u)}, + device_hitTuple_apc_{reinterpret_cast(device_storage_.data())}, + device_hitToTuple_apc_{reinterpret_cast(device_storage_.data() + 1)}, + device_nCells_{cms::alpakatools::make_device_view(alpaka::getDev(queue), + *reinterpret_cast(device_storage_.data() + 2))} { + alpaka::memset(queue, counters_, 0); + alpaka::memset(queue, device_nCells_, 0); + alpaka::memset(queue, cellStorage_, 0); + + auto cellCuts_h = cms::alpakatools::make_host_view(m_params.cellCuts_); + alpaka::memcpy(queue, device_cellCuts_, cellCuts_h); + + cms::alpakatools::launchZero(device_tupleMultiplicity_.data(), queue); + cms::alpakatools::launchZero(device_hitToTuple_.data(), queue); + } + + ~CAHitNtupletGeneratorKernels() = default; + + TupleMultiplicity const* tupleMultiplicity() const { return device_tupleMultiplicity_.data(); } + + void launchKernels(const HitsConstView& hh, TkSoAView& track_view, Queue& queue); + + void classifyTuples(const HitsConstView& hh, TkSoAView& track_view, Queue& queue); + + void buildDoublets(const HitsConstView& hh, Queue& queue); + + static void printCounters(); + + protected: + // params + Params const& m_params; + cms::alpakatools::device_buffer counters_; + + // workspace + cms::alpakatools::device_buffer device_hitToTuple_; + cms::alpakatools::device_buffer device_tupleMultiplicity_; + cms::alpakatools::device_buffer device_theCells_; + cms::alpakatools::device_buffer device_isOuterHitOfCell_; + cms::alpakatools::device_buffer isOuterHitOfCell_; + cms::alpakatools::device_buffer device_theCellNeighbors_; + cms::alpakatools::device_buffer device_theCellTracks_; + cms::alpakatools::device_buffer cellStorage_; + cms::alpakatools::device_buffer device_cellCuts_; + CellNeighbors* device_theCellNeighborsContainer_; + CellTracks* device_theCellTracksContainer_; + cms::alpakatools::device_buffer device_storage_; + cms::alpakatools::AtomicPairCounter* device_hitTuple_apc_; + cms::alpakatools::AtomicPairCounter* device_hitToTuple_apc_; + cms::alpakatools::device_view device_nCells_; + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif // RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorKernels_h diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h new file mode 100644 index 0000000000000..dcf46ef08dc4e --- /dev/null +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h @@ -0,0 +1,1050 @@ +// +// Original Author: Felice Pantaleo, CERN +// + +// #define NTUPLE_DEBUG +// #define GPU_DEBUG + +#include +#include +#include +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" +#include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h" + +#include "CAStructures.h" +#include "CAHitNtupletGeneratorKernels.h" +#include "CACell.h" +#include "CAFishbone.h" +#include "CAPixelDoublets.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace caHitNtupletGeneratorKernels { + + constexpr uint32_t tkNotFound = std::numeric_limits::max(); + constexpr float maxScore = std::numeric_limits::max(); + constexpr float nSigma2 = 25.f; + + //all of these below are mostly to avoid brining around the relative namespace + + template + using HitToTuple = caStructures::HitToTupleT; + + template + using TupleMultiplicity = caStructures::TupleMultiplicityT; + + template + using CellNeighborsVector = caStructures::CellNeighborsVectorT; + + template + using CellTracksVector = caStructures::CellTracksVectorT; + + template + using OuterHitOfCell = caStructures::OuterHitOfCellT; + + using Quality = ::pixelTrack::Quality; + + template + using TkSoAView = TrackSoAView; + + template + using HitContainer = typename TrackSoA::HitContainer; + + template + using HitsConstView = typename CACellT::HitsConstView; + + template + using QualityCuts = ::pixelTrack::QualityCutsT; + + template + using CAParams = caHitNtupletGenerator::CAParamsT; + + using Counters = caHitNtupletGenerator::Counters; + + template + class kernel_checkOverflows { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + TkSoAView tracks_view, + TupleMultiplicity const *tupleMultiplicity, + HitToTuple const *hitToTuple, + cms::alpakatools::AtomicPairCounter *apc, + CACellT const *__restrict__ cells, + uint32_t const *__restrict__ nCells, + CellNeighborsVector const *cellNeighbors, + CellTracksVector const *cellTracks, + OuterHitOfCell const *isOuterHitOfCell, + int32_t nHits, + uint32_t maxNumberOfDoublets, + Counters *counters) const { + const uint32_t threadIdx(alpaka::getIdx(acc)[0u]); + + auto &c = *counters; + // counters once per event + if (0 == threadIdx) { + alpaka::atomicAdd(acc, &c.nEvents, 1ull, alpaka::hierarchy::Blocks{}); + alpaka::atomicAdd(acc, &c.nHits, static_cast(nHits), alpaka::hierarchy::Blocks{}); + alpaka::atomicAdd(acc, &c.nCells, static_cast(*nCells), alpaka::hierarchy::Blocks{}); + alpaka::atomicAdd( + acc, &c.nTuples, static_cast(apc->get().first), alpaka::hierarchy::Blocks{}); + alpaka::atomicAdd(acc, + &c.nFitTracks, + static_cast(tupleMultiplicity->size()), + alpaka::hierarchy::Blocks{}); + } + +#ifdef NTUPLE_DEBUGS + if (0 == threadIdx) { + printf("number of found cells %d \n found tuples %d with total hits %d out of %d\n", + *nCells, + apc->get().first, + apc->get().second, + nHits); + if (apc->get().first < TrackerTraits::maxNumberOfQuadruplets) { + ALPAKA_ASSERT_OFFLOAD(tracks_view.hitIndices().size(apc->get().first) == 0); + ALPAKA_ASSERT_OFFLOAD(tracks_view.hitIndices().size() == apc->get().second); + } + } + const auto ntNbins = foundNtuplets->nbins(); + + for (auto idx : cms::alpakatools::elements_with_stride(acc, ntBins)) { + if (tracks_view.hitIndices().size(idx) > TrackerTraits::maxHitsOnTrack) // current real limit + printf("ERROR %d, %d\n", idx, tracks_view.hitIndices().size(idx)); + ALPAKA_ASSERT_OFFLOAD(ftracks_view.hitIndices().size(idx) <= TrackerTraits::maxHitsOnTrack); + for (auto ih = tracks_view.hitIndices().begin(idx); ih != tracks_view.hitIndices().end(idx); ++ih) + ALPAKA_ASSERT_OFFLOAD(int(*ih) < nHits); + } +#endif + + if (0 == threadIdx) { + if (apc->get().first >= TrackerTraits::maxNumberOfQuadruplets) + printf("Tuples overflow\n"); + if (*nCells >= maxNumberOfDoublets) + printf("Cells overflow\n"); + if (cellNeighbors && cellNeighbors->full()) + printf("cellNeighbors overflow %d %d \n", cellNeighbors->capacity(), cellNeighbors->size()); + if (cellTracks && cellTracks->full()) + printf("cellTracks overflow\n"); + if (int(hitToTuple->nbins()) < nHits) + printf("ERROR hitToTuple overflow %d %d\n", hitToTuple->nbins(), nHits); +#ifdef GPU_DEBUG + printf("size of cellNeighbors %d \n cellTracks %d \n hitToTuple %d \n", + cellNeighbors->size(), + cellTracks->size(), + hitToTuple->size()); +#endif + } + + const auto ntNCells = (*nCells); + for (auto idx : cms::alpakatools::elements_with_stride(acc, ntNCells)) { + auto const &thisCell = cells[idx]; + if (thisCell.hasFishbone() && !thisCell.isKilled()) + alpaka::atomicAdd(acc, &c.nFishCells, 1ull, alpaka::hierarchy::Blocks{}); + if (thisCell.outerNeighbors().full()) //++tooManyNeighbors[thisCell.theLayerPairId]; + printf("OuterNeighbors overflow %d in %d\n", idx, thisCell.layerPairId()); + if (thisCell.tracks().full()) //++tooManyTracks[thisCell.theLayerPairId]; + printf("Tracks overflow %d in %d\n", idx, thisCell.layerPairId()); + if (thisCell.isKilled()) + alpaka::atomicAdd(acc, &c.nKilledCells, 1ull, alpaka::hierarchy::Blocks{}); + if (!thisCell.unused()) + alpaka::atomicAdd(acc, &c.nEmptyCells, 1ull, alpaka::hierarchy::Blocks{}); + if ((0 == hitToTuple->size(thisCell.inner_hit_id())) && (0 == hitToTuple->size(thisCell.outer_hit_id()))) + alpaka::atomicAdd(acc, &c.nZeroTrackCells, 1ull, alpaka::hierarchy::Blocks{}); + } + + for (auto idx : cms::alpakatools::elements_with_stride(acc, nHits)) + if ((*isOuterHitOfCell).container[idx].full()) // ++tooManyOuterHitOfCell; + printf("OuterHitOfCell overflow %d\n", idx); + } + }; + + template + class kernel_fishboneCleaner { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + CACellT const *cells, + uint32_t const *__restrict__ nCells, + TkSoAView tracks_view) const { + constexpr auto reject = Quality::dup; + const auto ntNCells = (*nCells); + + for (auto idx : cms::alpakatools::elements_with_stride(acc, ntNCells)) { + auto const &thisCell = cells[idx]; + if (!thisCell.isKilled()) + continue; + + for (auto it : thisCell.tracks()) + tracks_view[it].quality() = reject; + } + } + }; + // remove shorter tracks if sharing a cell + // It does not seem to affect efficiency in any way! + template + class kernel_earlyDuplicateRemover { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + CACellT const *cells, + uint32_t const *__restrict__ nCells, + TkSoAView tracks_view, + bool dupPassThrough) const { + // quality to mark rejected + constexpr auto reject = Quality::edup; /// cannot be loose + ALPAKA_ASSERT_OFFLOAD(nCells); + const auto ntNCells = (*nCells); + + for (auto idx : cms::alpakatools::elements_with_stride(acc, ntNCells)) { + auto const &thisCell = cells[idx]; + + if (thisCell.tracks().size() < 2) + continue; + + int8_t maxNl = 0; + + // find maxNl + for (auto it : thisCell.tracks()) { + auto nl = tracks_view[it].nLayers(); + maxNl = std::max(nl, maxNl); + } + + // if (maxNl<4) continue; + // quad pass through (leave it her for tests) + // maxNl = std::min(4, maxNl); + + for (auto it : thisCell.tracks()) { + if (tracks_view[it].nLayers() < maxNl) + tracks_view[it].quality() = reject; //no race: simple assignment of the same constant + } + } + } + }; + + // assume the above (so, short tracks already removed) + template + class kernel_fastDuplicateRemover { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + CACellT const *__restrict__ cells, + uint32_t const *__restrict__ nCells, + TkSoAView tracks_view, + bool dupPassThrough) const { + // quality to mark rejected + auto const reject = dupPassThrough ? Quality::loose : Quality::dup; + constexpr auto loose = Quality::loose; + + ALPAKA_ASSERT_OFFLOAD(nCells); + const auto ntNCells = (*nCells); + + for (auto idx : cms::alpakatools::elements_with_stride(acc, ntNCells)) { + auto const &thisCell = cells[idx]; + if (thisCell.tracks().size() < 2) + continue; + + float mc = maxScore; + uint16_t im = tkNotFound; + + auto score = [&](auto it) { return std::abs(TracksUtilities::tip(tracks_view, it)); }; + + // full crazy combinatorics + int ntr = thisCell.tracks().size(); + for (int i = 0; i < ntr - 1; ++i) { + auto it = thisCell.tracks()[i]; + auto qi = tracks_view[it].quality(); + if (qi <= reject) + continue; + auto opi = tracks_view[it].state()(2); + auto e2opi = tracks_view[it].covariance()(9); + auto cti = tracks_view[it].state()(3); + auto e2cti = tracks_view[it].covariance()(12); + for (auto j = i + 1; j < ntr; ++j) { + auto jt = thisCell.tracks()[j]; + auto qj = tracks_view[jt].quality(); + if (qj <= reject) + continue; + auto opj = tracks_view[jt].state()(2); + auto ctj = tracks_view[jt].state()(3); + auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti); + if ((cti - ctj) * (cti - ctj) > dct) + continue; + auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi); + if ((opi - opj) * (opi - opj) > dop) + continue; + if ((qj < qi) || (qj == qi && score(it) < score(jt))) + tracks_view[jt].quality() = reject; + else { + tracks_view[it].quality() = reject; + break; + } + } + } + + // find maxQual + auto maxQual = reject; // no duplicate! + for (auto it : thisCell.tracks()) { + if (tracks_view[it].quality() > maxQual) + maxQual = tracks_view[it].quality(); + } + + if (maxQual <= loose) + continue; + + // find min score + for (auto it : thisCell.tracks()) { + if (tracks_view[it].quality() == maxQual && score(it) < mc) { + mc = score(it); + im = it; + } + } + + if (tkNotFound == im) + continue; + + // mark all other duplicates (not yet, keep it loose) + for (auto it : thisCell.tracks()) { + if (tracks_view[it].quality() > loose && it != im) + tracks_view[it].quality() = loose; //no race: simple assignment of the same constant + } + } + } + }; + + template + class kernel_connect { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + cms::alpakatools::AtomicPairCounter *apc1, + cms::alpakatools::AtomicPairCounter *apc2, // just to zero them + HitsConstView hh, + CACellT *cells, + uint32_t *nCells, + CellNeighborsVector *cellNeighbors, + OuterHitOfCell const *isOuterHitOfCell, + CAParams params) const { + using Cell = CACellT; + + const uint32_t dimIndexY = 0u; + const uint32_t dimIndexX = 1u; + const uint32_t threadIdxY(alpaka::getIdx(acc)[dimIndexY]); + const uint32_t threadIdxLocalX(alpaka::getIdx(acc)[dimIndexX]); + + if (0 == (threadIdxY + threadIdxLocalX)) { + (*apc1) = 0; + (*apc2) = 0; + } // ready for next kernel + + constexpr uint32_t last_bpix1_detIndex = TrackerTraits::last_bpix1_detIndex; + constexpr uint32_t last_barrel_detIndex = TrackerTraits::last_barrel_detIndex; + + cms::alpakatools::for_each_element_in_grid_strided( + acc, + (*nCells), + 0u, + [&](uint32_t idx) { + auto cellIndex = idx; + auto &thisCell = cells[idx]; + auto innerHitId = thisCell.inner_hit_id(); + if (int(innerHitId) >= isOuterHitOfCell->offset) { + uint32_t numberOfPossibleNeighbors = (*isOuterHitOfCell)[innerHitId].size(); + auto vi = (*isOuterHitOfCell)[innerHitId].data(); + + auto ri = thisCell.inner_r(hh); + auto zi = thisCell.inner_z(hh); + + auto ro = thisCell.outer_r(hh); + auto zo = thisCell.outer_z(hh); + auto isBarrel = thisCell.inner_detIndex(hh) < last_barrel_detIndex; + + cms::alpakatools::for_each_element_in_block_strided( + acc, + numberOfPossibleNeighbors, + 0u, + [&](uint32_t j) { + auto otherCell = (vi[j]); + auto &oc = cells[otherCell]; + auto r1 = oc.inner_r(hh); + auto z1 = oc.inner_z(hh); + bool aligned = Cell::areAlignedRZ( + r1, + z1, + ri, + zi, + ro, + zo, + params.ptmin_, + isBarrel ? params.CAThetaCutBarrel_ + : params.CAThetaCutForward_); // 2.f*thetaCut); // FIXME tune cuts + if (aligned && + thisCell.dcaCut(hh, + oc, + oc.inner_detIndex(hh) < last_bpix1_detIndex ? params.dcaCutInnerTriplet_ + : params.dcaCutOuterTriplet_, + params.hardCurvCut_)) { // FIXME tune cuts + oc.addOuterNeighbor(acc, cellIndex, *cellNeighbors); + thisCell.setStatusBits(Cell::StatusBit::kUsed); + oc.setStatusBits(Cell::StatusBit::kUsed); + } + }, + dimIndexX); // loop on inner cells + } + }, + dimIndexY); // loop on outer cells + } + }; + template + class kernel_find_ntuplets { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + HitsConstView hh, + TkSoAView tracks_view, + CACellT *__restrict__ cells, + uint32_t const *nCells, + CellTracksVector *cellTracks, + cms::alpakatools::AtomicPairCounter *apc, + CAParams params) const { + // recursive: not obvious to widen + + using Cell = CACellT; + +#ifdef GPU_DEBUG + const uint32_t threadIdx(alpaka::getIdx(acc)[0u]); + if (threadIdx == 0) + printf("starting producing ntuplets from %d cells \n", *nCells); +#endif + + for (auto idx : cms::alpakatools::elements_with_stride(acc, (*nCells))) { + auto const &thisCell = cells[idx]; + + if (thisCell.isKilled()) + continue; // cut by earlyFishbone + + // we require at least three hits... + + if (thisCell.outerNeighbors().empty()) + continue; + + auto pid = thisCell.layerPairId(); + bool doit = params.startingLayerPair(pid); + + constexpr uint32_t maxDepth = TrackerTraits::maxDepth; + + if (doit) { + typename Cell::TmpTuple stack; + stack.reset(); + bool bpix1Start = params.startAt0(pid); + thisCell.template find_ntuplets(acc, + hh, + cells, + *cellTracks, + tracks_view.hitIndices(), + *apc, + tracks_view.quality(), + stack, + params.minHitsPerNtuplet_, + bpix1Start); + ALPAKA_ASSERT_OFFLOAD(stack.empty()); + } + } + } + }; + + template + class kernel_mark_used { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + CACellT *__restrict__ cells, + uint32_t const *nCells) const { + using Cell = CACellT; + for (auto idx : cms::alpakatools::elements_with_stride(acc, (*nCells))) { + auto &thisCell = cells[idx]; + if (!thisCell.tracks().empty()) + thisCell.setStatusBits(Cell::StatusBit::kInTrack); + } + } + }; + + template + class kernel_countMultiplicity { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + TkSoAView tracks_view, + TupleMultiplicity *tupleMultiplicity) const { + for (auto it : cms::alpakatools::elements_with_stride(acc, tracks_view.hitIndices().nbins())) { + auto nhits = tracks_view.hitIndices().size(it); + if (nhits < 3) + continue; + if (tracks_view[it].quality() == Quality::edup) + continue; + ALPAKA_ASSERT_OFFLOAD(tracks_view[it].quality() == Quality::bad); + if (nhits > TrackerTraits::maxHitsOnTrack) // current limit + printf("wrong mult %d %d\n", it, nhits); + ALPAKA_ASSERT_OFFLOAD(nhits <= TrackerTraits::maxHitsOnTrack); + tupleMultiplicity->count(acc, nhits); + } + } + }; + + template + class kernel_fillMultiplicity { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + TkSoAView tracks_view, + TupleMultiplicity *tupleMultiplicity) const { + for (auto it : cms::alpakatools::elements_with_stride(acc, tracks_view.hitIndices().nbins())) { + auto nhits = tracks_view.hitIndices().size(it); + if (nhits < 3) + continue; + if (tracks_view[it].quality() == Quality::edup) + continue; + ALPAKA_ASSERT_OFFLOAD(tracks_view[it].quality() == Quality::bad); + if (nhits > TrackerTraits::maxHitsOnTrack) + printf("wrong mult %d %d\n", it, nhits); + ALPAKA_ASSERT_OFFLOAD(nhits <= TrackerTraits::maxHitsOnTrack); + tupleMultiplicity->fill(acc, nhits, it); + } + } + }; + + template + class kernel_classifyTracks { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + TkSoAView tracks_view, + QualityCuts cuts) const { + for (auto it : cms::alpakatools::elements_with_stride(acc, tracks_view.hitIndices().nbins())) { + auto nhits = tracks_view.hitIndices().size(it); + if (nhits == 0) + break; // guard + + // if duplicate: not even fit + if (tracks_view[it].quality() == Quality::edup) + continue; + + ALPAKA_ASSERT_OFFLOAD(tracks_view[it].quality() == Quality::bad); + + // mark doublets as bad + if (nhits < 3) + continue; + + // if the fit has any invalid parameters, mark it as bad + bool isNaN = false; + for (int i = 0; i < 5; ++i) { + isNaN |= std::isnan(tracks_view[it].state()(i)); + } + if (isNaN) { +#ifdef NTUPLE_DEBUG + printf("NaN in fit %d size %d chi2 %f\n", it, tracks_view.hitIndices().size(it), tracks_view[it].chi2()); +#endif + continue; + } + + tracks_view[it].quality() = Quality::strict; + + if (cuts.strictCut(tracks_view, it)) + continue; + + tracks_view[it].quality() = Quality::tight; + + if (cuts.isHP(tracks_view, nhits, it)) + tracks_view[it].quality() = Quality::highPurity; + } + } + }; + + template + class kernel_doStatsForTracks { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView tracks_view, Counters *counters) const { + for (auto idx : cms::alpakatools::elements_with_stride(acc, tracks_view.hitIndices().nbins())) { + if (tracks_view.hitIndices().size(idx) == 0) + break; //guard + if (tracks_view[idx].quality() < Quality::loose) + continue; + alpaka::atomicAdd(acc, &(counters->nLooseTracks), 1ull, alpaka::hierarchy::Blocks{}); + if (tracks_view[idx].quality() < Quality::strict) + continue; + alpaka::atomicAdd(acc, &(counters->nGoodTracks), 1ull, alpaka::hierarchy::Blocks{}); + } + } + }; + + template + class kernel_countHitInTracks { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + TkSoAView tracks_view, + HitToTuple *hitToTuple) const { + for (auto idx : cms::alpakatools::elements_with_stride(acc, tracks_view.hitIndices().nbins())) { + if (tracks_view.hitIndices().size(idx) == 0) + break; // guard + for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h) + hitToTuple->count(acc, *h); + } + } + }; + + template + class kernel_fillHitInTracks { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + TkSoAView tracks_view, + HitToTuple *hitToTuple) const { + for (auto idx : cms::alpakatools::elements_with_stride(acc, tracks_view.hitIndices().nbins())) { + if (tracks_view.hitIndices().size(idx) == 0) + break; // guard + for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h) + hitToTuple->fill(acc, *h, idx); + } + } + }; + + template + class kernel_fillHitDetIndices { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + TkSoAView tracks_view, + HitsConstView hh) const { + // copy offsets + for (auto idx : cms::alpakatools::elements_with_stride(acc, tracks_view.hitIndices().totbins())) { + tracks_view.detIndices().off[idx] = tracks_view.hitIndices().off[idx]; + } + // fill hit indices + for (auto idx : cms::alpakatools::elements_with_stride(acc, tracks_view.hitIndices().size())) { + ALPAKA_ASSERT_OFFLOAD(tracks_view.hitIndices().bins[idx] < (uint32_t)hh.metadata().size()); + tracks_view.detIndices().bins[idx] = hh[tracks_view.hitIndices().bins[idx]].detectorIndex(); + } + } + }; + + template + class kernel_fillNLayers { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + TkSoAView tracks_view, + cms::alpakatools::AtomicPairCounter *apc) const { + // clamp the number of tracks to the capacity of the SoA + auto ntracks = std::min(apc->get().first, tracks_view.metadata().size() - 1); + const uint32_t threadIdx(alpaka::getIdx(acc)[0u]); + if (0 == threadIdx) + tracks_view.nTracks() = ntracks; + for (auto idx : cms::alpakatools::elements_with_stride(acc, ntracks)) { + ALPAKA_ASSERT_OFFLOAD(TracksUtilities::nHits(tracks_view, idx) >= 3); + tracks_view[idx].nLayers() = TracksUtilities::computeNumberOfLayers(tracks_view, idx); + } + } + }; + + template + class kernel_doStatsForHitInTracks { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + HitToTuple const *__restrict__ hitToTuple, + Counters *counters) const { + auto &c = *counters; + for (auto idx : cms::alpakatools::elements_with_stride(acc, hitToTuple->nbins())) { + if (hitToTuple->size(idx) == 0) + continue; // SHALL NOT BE break + alpaka::atomicAdd(acc, &c.nUsedHits, 1ull, alpaka::hierarchy::Blocks{}); + if (hitToTuple->size(idx) > 1) + alpaka::atomicAdd(acc, &c.nDupHits, 1ull, alpaka::hierarchy::Blocks{}); + } + } + }; + + template + class kernel_countSharedHit { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + int *__restrict__ nshared, + HitContainer const *__restrict__ ptuples, + Quality const *__restrict__ quality, + HitToTuple const *__restrict__ phitToTuple) const { + constexpr auto loose = Quality::loose; + + auto &hitToTuple = *phitToTuple; + auto const &foundNtuplets = *ptuples; + for (auto idx : cms::alpakatools::elements_with_stride(acc, hitToTuple->nbins())) { + if (hitToTuple.size(idx) < 2) + continue; + + int nt = 0; + + // count "good" tracks + for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) { + if (quality[*it] < loose) + continue; + ++nt; + } + + if (nt < 2) + continue; + + // now mark each track triplet as sharing a hit + for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) { + if (foundNtuplets.size(*it) > 3) + continue; + alpaka::atomicAdd(acc, &nshared[*it], 1ull, alpaka::hierarchy::Blocks{}); + } + + } // hit loop + } + }; + + template + class kernel_markSharedHit { + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + int const *__restrict__ nshared, + HitContainer const *__restrict__ tuples, + Quality *__restrict__ quality, + bool dupPassThrough) const { + // constexpr auto bad = Quality::bad; + constexpr auto dup = Quality::dup; + constexpr auto loose = Quality::loose; + // constexpr auto strict = Quality::strict; + + // quality to mark rejected + auto const reject = dupPassThrough ? loose : dup; + for (auto idx : cms::alpakatools::elements_with_stride(acc, tuples->nbins())) { + if (tuples->size(idx) == 0) + break; //guard + if (quality[idx] <= reject) + continue; + if (nshared[idx] > 2) + quality[idx] = reject; + } + } + }; + + // mostly for very forward triplets..... + template + class kernel_rejectDuplicate { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + TkSoAView tracks_view, + uint16_t nmin, + bool dupPassThrough, + HitToTuple const *__restrict__ phitToTuple) const { + // quality to mark rejected + auto const reject = dupPassThrough ? Quality::loose : Quality::dup; + + auto &hitToTuple = *phitToTuple; + + for (auto idx : cms::alpakatools::elements_with_stride(acc, hitToTuple.nbins())) { + if (hitToTuple.size(idx) < 2) + continue; + + auto score = [&](auto it, auto nl) { return std::abs(TracksUtilities::tip(tracks_view, it)); }; + + // full combinatorics + for (auto ip = hitToTuple.begin(idx); ip < hitToTuple.end(idx) - 1; ++ip) { + auto const it = *ip; + auto qi = tracks_view[it].quality(); + if (qi <= reject) + continue; + auto opi = tracks_view[it].state()(2); + auto e2opi = tracks_view[it].covariance()(9); + auto cti = tracks_view[it].state()(3); + auto e2cti = tracks_view[it].covariance()(12); + auto nli = tracks_view[it].nLayers(); + for (auto jp = ip + 1; jp < hitToTuple.end(idx); ++jp) { + auto const jt = *jp; + auto qj = tracks_view[jt].quality(); + if (qj <= reject) + continue; + auto opj = tracks_view[jt].state()(2); + auto ctj = tracks_view[jt].state()(3); + auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti); + if ((cti - ctj) * (cti - ctj) > dct) + continue; + auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi); + if ((opi - opj) * (opi - opj) > dop) + continue; + auto nlj = tracks_view[jt].nLayers(); + if (nlj < nli || (nlj == nli && (qj < qi || (qj == qi && score(it, nli) < score(jt, nlj))))) + tracks_view[jt].quality() = reject; + else { + tracks_view[it].quality() = reject; + break; + } + } + } + } + } + }; + + template + class kernel_sharedHitCleaner { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + HitsConstView hh, + TkSoAView tracks_view, + int nmin, + bool dupPassThrough, + HitToTuple const *__restrict__ phitToTuple) const { + // quality to mark rejected + auto const reject = dupPassThrough ? Quality::loose : Quality::dup; + // quality of longest track + auto const longTqual = Quality::highPurity; + + auto &hitToTuple = *phitToTuple; + + uint32_t l1end = hh.hitsLayerStart()[1]; + + for (auto idx : cms::alpakatools::elements_with_stride(acc, hitToTuple.nbins())) { + if (hitToTuple.size(idx) < 2) + continue; + + int8_t maxNl = 0; + + // find maxNl + for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) { + if (tracks_view[*it].quality() < longTqual) + continue; + // if (tracks_view[*it].nHits()==3) continue; + auto nl = tracks_view[*it].nLayers(); + maxNl = std::max(nl, maxNl); + } + + if (maxNl < 4) + continue; + + // quad pass through (leave for tests) + // maxNl = std::min(4, maxNl); + + // kill all tracks shorter than maxHl (only triplets??? + for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) { + auto nl = tracks_view[*it].nLayers(); + + //checking if shared hit is on bpix1 and if the tuple is short enough + if (idx < l1end and nl > nmin) + continue; + + if (nl < maxNl && tracks_view[*it].quality() > reject) + tracks_view[*it].quality() = reject; + } + } + } + }; + template + class kernel_tripletCleaner { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + TkSoAView tracks_view, + uint16_t nmin, + bool dupPassThrough, + HitToTuple const *__restrict__ phitToTuple) const { + // quality to mark rejected + auto const reject = Quality::loose; + /// min quality of good + auto const good = Quality::strict; + + auto &hitToTuple = *phitToTuple; + + for (auto idx : cms::alpakatools::elements_with_stride(acc, hitToTuple.nbins())) { + if (hitToTuple.size(idx) < 2) + continue; + + float mc = maxScore; + uint16_t im = tkNotFound; + bool onlyTriplets = true; + + // check if only triplets + for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) { + if (tracks_view[*it].quality() <= good) + continue; + onlyTriplets &= TracksUtilities::isTriplet(tracks_view, *it); + if (!onlyTriplets) + break; + } + + // only triplets + if (!onlyTriplets) + continue; + + // for triplets choose best tip! (should we first find best quality???) + for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) { + auto const it = *ip; + if (tracks_view[it].quality() >= good && + std::abs(TracksUtilities::tip(tracks_view, it)) < mc) { + mc = std::abs(TracksUtilities::tip(tracks_view, it)); + im = it; + } + } + + if (tkNotFound == im) + continue; + + // mark worse ambiguities + for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) { + auto const it = *ip; + if (tracks_view[it].quality() > reject && it != im) + tracks_view[it].quality() = reject; //no race: simple assignment of the same constant + } + + } // loop over hits + } + }; + + template + class kernel_simpleTripletCleaner { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + TkSoAView tracks_view, + uint16_t nmin, + bool dupPassThrough, + HitToTuple const *__restrict__ phitToTuple) const { + // quality to mark rejected + auto const reject = Quality::loose; + /// min quality of good + auto const good = Quality::loose; + + auto &hitToTuple = *phitToTuple; + + for (auto idx : cms::alpakatools::elements_with_stride(acc, hitToTuple.nbins())) { + if (hitToTuple.size(idx) < 2) + continue; + + float mc = maxScore; + uint16_t im = tkNotFound; + + // choose best tip! (should we first find best quality???) + for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) { + auto const it = *ip; + if (tracks_view[it].quality() >= good && + std::abs(TracksUtilities::tip(tracks_view, it)) < mc) { + mc = std::abs(TracksUtilities::tip(tracks_view, it)); + im = it; + } + } + + if (tkNotFound == im) + continue; + + // mark worse ambiguities + for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) { + auto const it = *ip; + if (tracks_view[it].quality() > reject && TracksUtilities::isTriplet(tracks_view, it) && + it != im) + tracks_view[it].quality() = reject; //no race: simple assignment of the same constant + } + + } // loop over hits + } + }; + + template + class kernel_print_found_ntuplets { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + HitsConstView hh, + TkSoAView tracks_view, + HitToTuple const *__restrict__ phitToTuple, + int32_t firstPrint, + int32_t lastPrint, + int iev) const { + constexpr auto loose = Quality::loose; + + for (auto i : cms::alpakatools::elements_with_stride(acc, tracks_view.hitIndices().nbins())) { + auto nh = tracks_view.hitIndices().size(i); + if (nh < 3) + continue; + if (tracks_view[i].quality() < loose) + continue; + printf("TK: %d %d %d %d %f %f %f %f %f %f %f %.3f %.3f %.3f %.3f %.3f %.3f %.3f\n", + 10000 * iev + i, + int(tracks_view[i].quality()), + nh, + tracks_view[i].nLayers(), + TracksUtilities::charge(tracks_view, i), + tracks_view[i].pt(), + tracks_view[i].eta(), + TracksUtilities::phi(tracks_view, i), + TracksUtilities::tip(tracks_view, i), + TracksUtilities::zip(tracks_view, i), + tracks_view[i].chi2(), + hh[*tracks_view.hitIndices().begin(i)].zGlobal(), + hh[*(tracks_view.hitIndices().begin(i) + 1)].zGlobal(), + hh[*(tracks_view.hitIndices().begin(i) + 2)].zGlobal(), + nh > 3 ? hh[int(*(tracks_view.hitIndices().begin(i) + 3))].zGlobal() : 0, + nh > 4 ? hh[int(*(tracks_view.hitIndices().begin(i) + 4))].zGlobal() : 0, + nh > 5 ? hh[int(*(tracks_view.hitIndices().begin(i) + 5))].zGlobal() : 0, + nh > 6 ? hh[int(*(tracks_view.hitIndices().begin(i) + nh - 1))].zGlobal() : 0); + } + } + }; + + class kernel_printCounters { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, Counters const *counters) const { + auto const &c = *counters; + printf( + "||Counters | nEvents | nHits | nCells | nTuples | nFitTacks | nLooseTracks | nGoodTracks | " + "nUsedHits " + "| " + "nDupHits | " + "nFishCells | " + "nKilledCells | " + "nUsedCells | nZeroTrackCells ||\n"); + printf("Counters Raw %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n", + c.nEvents, + c.nHits, + c.nCells, + c.nTuples, + c.nFitTracks, + c.nLooseTracks, + c.nGoodTracks, + c.nUsedHits, + c.nDupHits, + c.nFishCells, + c.nKilledCells, + c.nEmptyCells, + c.nZeroTrackCells); + printf( + "Counters Norm %lld || %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.3f| %.3f| " + "%.3f| " + "%.3f||\n", + c.nEvents, + c.nHits / double(c.nEvents), + c.nCells / double(c.nEvents), + c.nTuples / double(c.nEvents), + c.nFitTracks / double(c.nEvents), + c.nLooseTracks / double(c.nEvents), + c.nGoodTracks / double(c.nEvents), + c.nUsedHits / double(c.nEvents), + c.nDupHits / double(c.nEvents), + c.nFishCells / double(c.nCells), + c.nKilledCells / double(c.nCells), + c.nEmptyCells / double(c.nCells), + c.nZeroTrackCells / double(c.nCells)); + } + }; + } // namespace caHitNtupletGeneratorKernels +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h new file mode 100644 index 0000000000000..c97b0b06bb884 --- /dev/null +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h @@ -0,0 +1,74 @@ +#ifndef RecoPixelVertexing_PixelTriplets_alpaka_CAPixelDoublets_h +#define RecoPixelVertexing_PixelTriplets_alpaka_CAPixelDoublets_h + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" +#include "CAPixelDoubletsAlgos.h" + +#define CONSTANT_VAR __constant__ + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + using namespace alpaka; + using namespace cms::alpakatools; + namespace caPixelDoublets { + + template + class initDoublets { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, + OuterHitOfCell* isOuterHitOfCell, + int nHits, + CellNeighborsVector* cellNeighbors, + CellNeighbors* cellNeighborsContainer, + CellTracksVector* cellTracks, + CellTracks* cellTracksContainer) const { + ALPAKA_ASSERT_OFFLOAD((*isOuterHitOfCell).container); + + for (auto i : cms::alpakatools::elements_with_stride(acc, nHits)) + (*isOuterHitOfCell).container[i].reset(); + + const uint32_t threadIdx(alpaka::getIdx(acc)[0u]); + if (0 == threadIdx) { + cellNeighbors->construct(TrackerTraits::maxNumOfActiveDoublets, cellNeighborsContainer); + cellTracks->construct(TrackerTraits::maxNumOfActiveDoublets, cellTracksContainer); + auto i = cellNeighbors->extend(acc); + assert(0 == i); + (*cellNeighbors)[0].reset(); + i = cellTracks->extend(acc); + assert(0 == i); + (*cellTracks)[0].reset(); + } + } + }; + + constexpr auto getDoubletsFromHistoMaxBlockSize = 64; // for both x and y + constexpr auto getDoubletsFromHistoMinBlocksPerMP = 16; + + template + class getDoubletsFromHisto { + public: + template >> + // #ifdef __CUDACC__ + // __launch_bounds__(getDoubletsFromHistoMaxBlockSize, getDoubletsFromHistoMinBlocksPerMP) // TODO: Alapakify + // #endif + ALPAKA_FN_ACC void operator()(TAcc const& acc, + CACellT* cells, + uint32_t* nCells, + CellNeighborsVector* cellNeighbors, + CellTracksVector* cellTracks, + HitsConstView hh, + OuterHitOfCell* isOuterHitOfCell, + uint32_t nActualPairs, + const uint32_t maxNumOfDoublets, + CellCutsT cuts) const { + + doubletsFromHisto( + acc, nActualPairs, maxNumOfDoublets, cells, nCells, cellNeighbors, cellTracks, hh, *isOuterHitOfCell, cuts); + } + }; + } // namespace caPixelDoublets +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif // RecoPixelVertexing_PixelTriplets_plugins_CAPixelDoublets_h diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h new file mode 100644 index 0000000000000..bd1f996928f13 --- /dev/null +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h @@ -0,0 +1,331 @@ +#ifndef RecoPixelVertexing_PixelTriplets_alpaka_CAPixelDoubletsAlgos_h +#define RecoPixelVertexing_PixelTriplets_alpaka_CAPixelDoubletsAlgos_h + +#include +#include +#include +#include +#include +#include +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/VecArray.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h" +#include "DataFormats/Math/interface/approx_atan2.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "CAStructures.h" +#include "CACell.h" + +// #define GPU_DEBUG +//#define NTUPLE_DEBUG +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace caPixelDoublets { + using namespace cms::alpakatools; + + template + using CellNeighbors = caStructures::CellNeighborsT; + template + using CellTracks = caStructures::CellTracksT; + template + using CellNeighborsVector = caStructures::CellNeighborsVectorT; + template + using CellTracksVector = caStructures::CellTracksVectorT; + template + using OuterHitOfCell = caStructures::OuterHitOfCellT; + template + using HitsConstView = typename CACellT::HitsConstView; + + template + struct CellCutsT { + using H = HitsConstView; + using T = TrackerTraits; + + CellCutsT() = default; + + CellCutsT(const bool doClusterCut, + const bool doZ0Cut, + const bool doPtCut, + const bool idealConditions, + const float z0Cut, + const float ptCut, + const std::vector& phiCutsV) + : doClusterCut_(doClusterCut), + doZ0Cut_(doZ0Cut), + doPtCut_(doPtCut), + idealConditions_(idealConditions), + z0Cut_(z0Cut), + ptCut_(ptCut) { + assert(phiCutsV.size() == TrackerTraits::nPairs); + std::copy(phiCutsV.begin(), phiCutsV.end(), &phiCuts[0]); + } + + bool doClusterCut_; + bool doZ0Cut_; + bool doPtCut_; + bool idealConditions_; //this is actually not used by phase2 + + float z0Cut_; //FIXME: check if could be const now + float ptCut_; + + int phiCuts[T::nPairs]; + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool __attribute__((always_inline)) + zSizeCut(const TAcc& acc, H hh, int i, int o) const { + const uint32_t mi = hh[i].detectorIndex(); + + bool innerB1 = mi < T::last_bpix1_detIndex; + bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2; + auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1; + + if (mes < 0) + return false; + + const uint32_t mo = hh[o].detectorIndex(); + auto so = hh[o].clusterSizeY(); + + auto dz = hh[i].zGlobal() - hh[o].zGlobal(); + auto dr = hh[i].rGlobal() - hh[o].rGlobal(); + + auto innerBarrel = mi < T::last_barrel_detIndex; + auto onlyBarrel = mo < T::last_barrel_detIndex; + + if (not innerBarrel and not onlyBarrel) + return false; + auto dy = innerB1 ? T::maxDYsize12 : T::maxDYsize; + + return onlyBarrel ? so > 0 && std::abs(so - mes) > dy + : innerBarrel && std::abs(mes - int(std::abs(dz / dr) * T::dzdrFact + 0.5f)) > T::maxDYPred; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool __attribute__((always_inline)) + clusterCut(const TAcc& acc, H hh, uint32_t i) const { + const uint32_t mi = hh[i].detectorIndex(); + bool innerB1orB2 = mi < T::last_bpix2_detIndex; + + if (!innerB1orB2) + return false; + + bool innerB1 = mi < T::last_bpix1_detIndex; + bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2; + auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1; + + if (innerB1) // B1 + if (mes > 0 && mes < T::minYsizeB1) + return true; // only long cluster (5*8) + bool innerB2 = (mi >= T::last_bpix1_detIndex) && (mi < T::last_bpix2_detIndex); //FIXME number + if (innerB2) // B2 and F1 + if (mes > 0 && mes < T::minYsizeB2) + return true; + + return false; + } + }; + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline)) + doubletsFromHisto(const TAcc& acc, + uint32_t nPairs, + const uint32_t maxNumOfDoublets, + CACellT* cells, + uint32_t* nCells, + CellNeighborsVector* cellNeighbors, + CellTracksVector* cellTracks, + HitsConstView hh, + OuterHitOfCell isOuterHitOfCell, + CellCutsT const& cuts) { // ysize cuts (z in the barrel) times 8 + // these are used if doClusterCut is true + + const bool doClusterCut = cuts.doClusterCut_; + const bool doZ0Cut = cuts.doZ0Cut_; + const bool doPtCut = cuts.doPtCut_; + + const float z0cut = cuts.z0Cut_; // cm + const float hardPtCut = cuts.ptCut_; // GeV + // cm (1 GeV track has 1 GeV/c / (e * 3.8T) ~ 87 cm radius in a 3.8T field) + const float minRadius = hardPtCut * 87.78f; + const float minRadius2T4 = 4.f * minRadius * minRadius; + + using PhiBinner = typename TrackingRecHitAlpakaSoA::PhiBinner; + + auto const& __restrict__ phiBinner = hh.phiBinner(); + uint32_t const* __restrict__ offsets = hh.hitsLayerStart().data(); + ALPAKA_ASSERT_OFFLOAD(offsets); + + auto layerSize = [=](uint8_t li) { return offsets[li + 1] - offsets[li]; }; + + // nPairsMax to be optimized later (originally was 64). + // If it should much be bigger, consider using a block-wide parallel prefix scan, + // e.g. see https://nvlabs.github.io/cub/classcub_1_1_warp_scan.html + auto& innerLayerCumulativeSize = alpaka::declareSharedVar(acc); + auto& ntot = alpaka::declareSharedVar(acc); + + constexpr uint32_t dimIndexY = 0u; + constexpr uint32_t dimIndexX = 1u; + const uint32_t threadIdxLocalY(alpaka::getIdx(acc)[dimIndexY]); + const uint32_t threadIdxLocalX(alpaka::getIdx(acc)[dimIndexX]); + + if (threadIdxLocalY == 0 && threadIdxLocalX == 0) { + innerLayerCumulativeSize[0] = layerSize(TrackerTraits::layerPairs[0]); + for (uint32_t i = 1; i < nPairs; ++i) { + innerLayerCumulativeSize[i] = innerLayerCumulativeSize[i - 1] + layerSize(TrackerTraits::layerPairs[2 * i]); + } + ntot = innerLayerCumulativeSize[nPairs - 1]; + } + alpaka::syncBlockThreads(acc); + + // x runs faster + const uint32_t blockDimensionX(alpaka::getWorkDiv(acc)[dimIndexX]); + const auto& [firstElementIdxNoStrideX, endElementIdxNoStrideX] = + cms::alpakatools::element_index_range_in_block(acc, 0u, dimIndexX); + + uint32_t pairLayerId = 0; // cannot go backward + + // Outermost loop on Y + const uint32_t gridDimensionY(alpaka::getWorkDiv(acc)[dimIndexY]); + const auto& [firstElementIdxNoStrideY, endElementIdxNoStrideY] = + cms::alpakatools::element_index_range_in_grid(acc, 0u, dimIndexY); + uint32_t firstElementIdxY = firstElementIdxNoStrideY; + + for (uint32_t j = firstElementIdxY; j < ntot; j += gridDimensionY) { + + while (j >= innerLayerCumulativeSize[pairLayerId++]) + ; + --pairLayerId; // move to lower_bound ?? + + ALPAKA_ASSERT_OFFLOAD(pairLayerId < nPairs); + ALPAKA_ASSERT_OFFLOAD(j < innerLayerCumulativeSize[pairLayerId]); + ALPAKA_ASSERT_OFFLOAD(0 == pairLayerId || j >= innerLayerCumulativeSize[pairLayerId - 1]); + + uint8_t inner = TrackerTraits::layerPairs[2 * pairLayerId]; + uint8_t outer = TrackerTraits::layerPairs[2 * pairLayerId + 1]; + ALPAKA_ASSERT_OFFLOAD(outer > inner); + + auto hoff = PhiBinner::histOff(outer); + auto i = (0 == pairLayerId) ? j : j - innerLayerCumulativeSize[pairLayerId - 1]; + i += offsets[inner]; + + ALPAKA_ASSERT_OFFLOAD(i >= offsets[inner]); + ALPAKA_ASSERT_OFFLOAD(i < offsets[inner + 1]); + + // found hit corresponding to our cuda thread, now do the job + if (hh[i].detectorIndex() > pixelClustering::maxNumModules) + continue; // invalid + + /* maybe clever, not effective when zoCut is on + auto bpos = (mi%8)/4; // if barrel is 1 for z>0 + auto fpos = (outer>3) & (outer<7); + if ( ((inner<3) & (outer>3)) && bpos!=fpos) continue; + */ + + auto mez = hh[i].zGlobal(); + + if (mez < TrackerTraits::minz[pairLayerId] || mez > TrackerTraits::maxz[pairLayerId]) + continue; + + if (doClusterCut && outer > pixelTopology::last_barrel_layer && cuts.clusterCut(acc, hh, i)) + continue; + + auto mep = hh[i].iphi(); + auto mer = hh[i].rGlobal(); + + // all cuts: true if fails + auto ptcut = [&](int j, int16_t idphi) { + auto r2t4 = minRadius2T4; + auto ri = mer; + auto ro = hh[j].rGlobal(); + auto dphi = short2phi(idphi); + return dphi * dphi * (r2t4 - ri * ro) > (ro - ri) * (ro - ri); + }; + auto z0cutoff = [&](int j) { + auto zo = hh[j].zGlobal(); + auto ro = hh[j].rGlobal(); + auto dr = ro - mer; + return dr > TrackerTraits::maxr[pairLayerId] || dr < 0 || std::abs((mez * ro - mer * zo)) > z0cut * dr; + }; + + auto iphicut = cuts.phiCuts[pairLayerId]; + + auto kl = PhiBinner::bin(int16_t(mep - iphicut)); + auto kh = PhiBinner::bin(int16_t(mep + iphicut)); + auto incr = [](auto& k) { return k = (k + 1) % PhiBinner::nbins(); }; + +#ifdef GPU_DEBUG + int tot = 0; + int nmin = 0; + int tooMany = 0; +#endif + + auto khh = kh; + incr(khh); + for (auto kk = kl; kk != khh; incr(kk)) { +#ifdef GPU_DEBUG + if (kk != kl && kk != kh) + nmin += phiBinner.size(kk + hoff); +#endif + auto const* __restrict__ p = phiBinner.begin(kk + hoff); + auto const* __restrict__ e = phiBinner.end(kk + hoff); + auto const maxpIndex = e - p; + + // Here we parallelize in X + uint32_t firstElementIdxX = firstElementIdxNoStrideX; + for (uint32_t pIndex = firstElementIdxX; pIndex < maxpIndex; pIndex += blockDimensionX) { + + auto oi = p[pIndex]; // auto oi = __ldg(p); is not allowed since __ldg is device-only + ALPAKA_ASSERT_OFFLOAD(oi >= offsets[outer]); + ALPAKA_ASSERT_OFFLOAD(oi < offsets[outer + 1]); + auto mo = hh[oi].detectorIndex(); + + if (mo > pixelClustering::maxNumModules) + continue; // invalid + + if (doZ0Cut && z0cutoff(oi)) + continue; + + auto mop = hh[oi].iphi(); + uint16_t idphi = std::min(std::abs(int16_t(mop - mep)), std::abs(int16_t(mep - mop))); + + if (idphi > iphicut) + continue; + + if (doClusterCut && cuts.zSizeCut(acc, hh, i, oi)) + continue; + + if (doPtCut && ptcut(oi, idphi)) + continue; + + auto ind = alpaka::atomicAdd(acc, nCells, (uint32_t)1, alpaka::hierarchy::Blocks{}); + if (ind >= maxNumOfDoublets) { + alpaka::atomicSub(acc, nCells, (uint32_t)1, alpaka::hierarchy::Blocks{}); + break; + } // move to SimpleVector?? + cells[ind].init(*cellNeighbors, *cellTracks, hh, pairLayerId, i, oi); + isOuterHitOfCell[oi].push_back(acc, ind); +#ifdef GPU_DEBUG + if (isOuterHitOfCell[oi].full()) + ++tooMany; + ++tot; +#endif + } + } +// #endif +#ifdef GPU_DEBUG + if (tooMany > 0 or tot > 0) + printf("OuterHitOfCell for %d in layer %d/%d, %d,%d %d, %d %.3f %.3f\n", + i, + inner, + outer, + nmin, + tot, + tooMany, + iphicut, + TrackerTraits::minz[pairLayerId], + TrackerTraits::maxz[pairLayerId], + tooMany > 0 ? "FULL!!" : "not full."); +#endif + } // loop in block... + } // namespace caPixelDoublets + } // namespace caPixelDoublets +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif // RecoPixelVertexing_PixelTriplets_CAPixelDoubletsAlgos_h diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAStructures.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAStructures.h new file mode 100644 index 0000000000000..479f79617c16c --- /dev/null +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAStructures.h @@ -0,0 +1,51 @@ +#ifndef RecoPixelVertexing_PixelTriplets_CAStructures_h +#define RecoPixelVertexing_PixelTriplets_CAStructures_h + +#include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" +#include "HeterogeneousCore/AlpakaInterface/interface/VecArray.h" +#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" + +namespace caStructures { + + template + using CellNeighborsT = + cms::alpakatools::VecArray; + + template + using CellTracksT = cms::alpakatools::VecArray; + + template + using CellNeighborsVectorT = cms::alpakatools::SimpleVector>; + + template + using CellTracksVectorT = cms::alpakatools::SimpleVector>; + + template + using OuterHitOfCellContainerT = cms::alpakatools::VecArray; + + template + using TupleMultiplicityT = cms::alpakatools::OneToManyAssoc; + + template + using HitToTupleT = cms::alpakatools::OneToManyAssoc; // 3.5 should be enough + + template + using TuplesContainerT = cms::alpakatools::OneToManyAssoc; + + template + struct OuterHitOfCellT { + OuterHitOfCellContainerT* container; + int32_t offset; + constexpr auto& operator[](int i) { return container[i - offset]; } + constexpr auto const& operator[](int i) const { return container[i - offset]; } + }; + +} // namespace caStructures + +#endif diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.cc b/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.cc new file mode 100644 index 0000000000000..078cbe8de45a4 --- /dev/null +++ b/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.cc @@ -0,0 +1,21 @@ +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HelixFit.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + template + void HelixFit::allocate(TupleMultiplicity const *tupleMultiplicity, OutputSoAView &helix_fit_results) { + tuples_ = &helix_fit_results.hitIndices(); + tupleMultiplicity_ = tupleMultiplicity; + outputSoa_ = helix_fit_results; + + ALPAKA_ASSERT_OFFLOAD(tuples_); + ALPAKA_ASSERT_OFFLOAD(tupleMultiplicity_); + } + + template + void HelixFit::deallocate() {} + + template class HelixFit; + template class HelixFit; + template class HelixFit; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.h b/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.h new file mode 100644 index 0000000000000..d2ef5e66a1731 --- /dev/null +++ b/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.h @@ -0,0 +1,85 @@ +#ifndef RecoPixelVertexing_PixelTriplets_HelixFit_h +#define RecoPixelVertexing_PixelTriplets_HelixFit_h + +#include +#include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h" +#include "RecoTracker/PixelTrackFitting/interface/alpaka/FitResult.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" + +#include "CAStructures.h" +namespace riemannFit { + // TODO: Can this be taken from TrackerTraits or somewhere else? + // in case of memory issue can be made smaller + constexpr uint32_t maxNumberOfConcurrentFits = 32 * 1024; + constexpr uint32_t stride = maxNumberOfConcurrentFits; + using Matrix3x4d = Eigen::Matrix; + using Map3x4d = Eigen::Map >; + using Matrix6x4f = Eigen::Matrix; + using Map6x4f = Eigen::Map >; + + // hits + template + using Matrix3xNd = Eigen::Matrix; + template + using Map3xNd = Eigen::Map, 0, Eigen::Stride<3 * stride, stride> >; + // errors + template + using Matrix6xNf = Eigen::Matrix; + template + using Map6xNf = Eigen::Map, 0, Eigen::Stride<6 * stride, stride> >; + // fast fit + using Map4d = Eigen::Map >; + + template //a compile-time bounded for loop + constexpr void rolling_fits(F &&f) { + if constexpr (Start < End) { + f(std::integral_constant()); + rolling_fits(f); + } + } + +} // namespace riemannFit + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + template + class HelixFit { + public: + using TrackingRecHitSoAs = TrackingRecHitAlpakaSoA; + + using HitView = TrackingRecHitAlpakaSoAView; + using HitConstView = TrackingRecHitAlpakaSoAConstView; + + using Tuples = typename TrackSoA::HitContainer; + using OutputSoAView = TrackSoAView; + + using TupleMultiplicity = caStructures::TupleMultiplicityT; + + using ParamsOnDevice = pixelCPEforDevice::ParamsOnDeviceT; + + explicit HelixFit(float bf, bool fitNas4) : bField_(bf), fitNas4_(fitNas4) {} + ~HelixFit() { deallocate(); } + + void setBField(double bField) { bField_ = bField; } + void launchRiemannKernels(const HitConstView &hv, ParamsOnDevice const* cpeParams, uint32_t nhits, uint32_t maxNumberOfTuples, Queue &queue); + void launchBrokenLineKernels(const HitConstView &hv, ParamsOnDevice const* cpeParams, uint32_t nhits, uint32_t maxNumberOfTuples, Queue &queue); + + void allocate(TupleMultiplicity const *tupleMultiplicity, OutputSoAView &helix_fit_results); + void deallocate(); + + private: + static constexpr uint32_t maxNumberOfConcurrentFits_ = riemannFit::maxNumberOfConcurrentFits; + + // fowarded + Tuples const *tuples_ = nullptr; + TupleMultiplicity const *tupleMultiplicity_ = nullptr; + OutputSoAView outputSoa_; + float bField_; + + const bool fitNas4_; + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif // RecoPixelVertexing_PixelTriplets_plugins_HelixFit_h diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc new file mode 100644 index 0000000000000..7545ccecaa66d --- /dev/null +++ b/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc @@ -0,0 +1,401 @@ +// +// Author: Felice Pantaleo, CERN +// + +#include +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsLayout.h" +#include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" +#include "RecoTracker/PixelTrackFitting/interface/alpaka/RiemannFit.h" +#include "HelixFit.h" +#include "CAStructures.h" + +template +using Tuples = typename TrackSoA::HitContainer; +template +using OutputSoAView = TrackSoAView; +template +using TupleMultiplicity = caStructures::TupleMultiplicityT; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + using namespace alpaka; + using namespace cms::alpakatools; + + template + class kernel_FastFit { // TODO + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + Tuples const *__restrict__ foundNtuplets, + TupleMultiplicity const *__restrict__ tupleMultiplicity, + uint32_t nHits, + TrackingRecHitAlpakaSoAConstView hh, + pixelCPEforDevice::ParamsOnDeviceT const *__restrict__ cpeParams, + double *__restrict__ phits, + float *__restrict__ phits_ge, + double *__restrict__ pfast_fit, + uint32_t offset) const { + constexpr uint32_t hitsInFit = N; + + ALPAKA_ASSERT_OFFLOAD(hitsInFit <= nHits); + + ALPAKA_ASSERT_OFFLOAD(pfast_fit); + ALPAKA_ASSERT_OFFLOAD(foundNtuplets); + ALPAKA_ASSERT_OFFLOAD(tupleMultiplicity); + + // look in bin for this hit multiplicity + +#ifdef RIEMANN_DEBUG + const uint32_t threadIdx(alpaka::getIdx(acc)[0u]); + if (0 == threadIdx) + printf("%d Ntuple of size %d for %d hits to fit\n", tupleMultiplicity->size(nHits), nHits, hitsInFit); +#endif + + const auto nt = riemannFit::maxNumberOfConcurrentFits; + for (auto local_idx : cms::alpakatools::elements_with_stride(acc, nt)) { + auto tuple_idx = local_idx + offset; + if (tuple_idx >= tupleMultiplicity->size(nHits)) + break; + + // get it from the ntuple container (one to one to helix) + auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx); + ALPAKA_ASSERT_OFFLOAD(tkid < foundNtuplets->nbins()); + + ALPAKA_ASSERT_OFFLOAD(foundNtuplets->size(tkid) == nHits); + + riemannFit::Map3xNd hits(phits + local_idx); + riemannFit::Map4d fast_fit(pfast_fit + local_idx); + riemannFit::Map6xNf hits_ge(phits_ge + local_idx); + + // Prepare data structure + auto const *hitId = foundNtuplets->begin(tkid); + for (unsigned int i = 0; i < hitsInFit; ++i) { + auto hit = hitId[i]; + float ge[6]; + cpeParams->detParams(hh[hit].detectorIndex()).frame.toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge); + + hits.col(i) << hh[hit].xGlobal(), hh[hit].yGlobal(), hh[hit].zGlobal(); + hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]; + } + riemannFit::fastFit(acc, hits, fast_fit); + + // no NaN here.... + ALPAKA_ASSERT_OFFLOAD(fast_fit(0) == fast_fit(0)); + ALPAKA_ASSERT_OFFLOAD(fast_fit(1) == fast_fit(1)); + ALPAKA_ASSERT_OFFLOAD(fast_fit(2) == fast_fit(2)); + ALPAKA_ASSERT_OFFLOAD(fast_fit(3) == fast_fit(3)); + } + } + }; + + template + class kernel_CircleFit { // TODO + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + TupleMultiplicity const *__restrict__ tupleMultiplicity, + uint32_t nHits, + double bField, + double *__restrict__ phits, + float *__restrict__ phits_ge, + double *__restrict__ pfast_fit_input, + riemannFit::CircleFit *circle_fit, + uint32_t offset) const { + ALPAKA_ASSERT_OFFLOAD(circle_fit); + ALPAKA_ASSERT_OFFLOAD(N <= nHits); + + // same as above... + + // look in bin for this hit multiplicity + const auto nt = riemannFit::maxNumberOfConcurrentFits; + for (auto local_idx : cms::alpakatools::elements_with_stride(acc, nt)) { + auto tuple_idx = local_idx + offset; + if (tuple_idx >= tupleMultiplicity->size(nHits)) + break; + + riemannFit::Map3xNd hits(phits + local_idx); + riemannFit::Map4d fast_fit(pfast_fit_input + local_idx); + riemannFit::Map6xNf hits_ge(phits_ge + local_idx); + + riemannFit::VectorNd rad = (hits.block(0, 0, 2, N).colwise().norm()); + + riemannFit::Matrix2Nd hits_cov = riemannFit::Matrix2Nd::Zero(); + riemannFit::loadCovariance2D(acc, hits_ge, hits_cov); + + circle_fit[local_idx] = + riemannFit::circleFit(acc, hits.block(0, 0, 2, N), hits_cov, fast_fit, rad, bField, true); + +#ifdef RIEMANN_DEBUG +// auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx); +// printf("kernelCircleFit circle.par(0,1,2): %d %f,%f,%f\n", tkid, +// circle_fit[local_idx].par(0), circle_fit[local_idx].par(1), circle_fit[local_idx].par(2)); +#endif + } + } + }; + + template + class kernel_LineFit { // TODO + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + TupleMultiplicity const *__restrict__ tupleMultiplicity, + uint32_t nHits, + double bField, + OutputSoAView results_view, + double *__restrict__ phits, + float *__restrict__ phits_ge, + double *__restrict__ pfast_fit_input, + riemannFit::CircleFit *__restrict__ circle_fit, + uint32_t offset) const { + ALPAKA_ASSERT_OFFLOAD(circle_fit); + ALPAKA_ASSERT_OFFLOAD(N <= nHits); + + // same as above... + + // look in bin for this hit multiplicity + const auto nt = riemannFit::maxNumberOfConcurrentFits; + for (auto local_idx : cms::alpakatools::elements_with_stride(acc, nt)) { + auto tuple_idx = local_idx + offset; + if (tuple_idx >= tupleMultiplicity->size(nHits)) + break; + + // get it for the ntuple container (one to one to helix) + int32_t tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx); + + riemannFit::Map3xNd hits(phits + local_idx); + riemannFit::Map4d fast_fit(pfast_fit_input + local_idx); + riemannFit::Map6xNf hits_ge(phits_ge + local_idx); + + auto const &line_fit = riemannFit::lineFit(acc, hits, hits_ge, circle_fit[local_idx], fast_fit, bField, true); + + riemannFit::fromCircleToPerigee(acc, circle_fit[local_idx]); + + TracksUtilities::copyFromCircle(results_view, + circle_fit[local_idx].par, + circle_fit[local_idx].cov, + line_fit.par, + line_fit.cov, + 1.f / float(bField), + tkid); + results_view[tkid].pt() = bField / std::abs(circle_fit[local_idx].par(2)); + results_view[tkid].eta() = asinhf(line_fit.par(0)); + results_view[tkid].chi2() = (circle_fit[local_idx].chi2 + line_fit.chi2) / (2 * N - 5); + +#ifdef RIEMANN_DEBUG + printf("kernelLineFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n", + N, + nHits, + tkid, + circle_fit[local_idx].par(0), + circle_fit[local_idx].par(1), + circle_fit[local_idx].par(2)); + printf("kernelLineFit line.par(0,1): %d %f,%f\n", tkid, line_fit.par(0), line_fit.par(1)); + printf("kernelLineFit chi2 cov %f/%f %e,%e,%e,%e,%e\n", + circle_fit[local_idx].chi2, + line_fit.chi2, + circle_fit[local_idx].cov(0, 0), + circle_fit[local_idx].cov(1, 1), + circle_fit[local_idx].cov(2, 2), + line_fit.cov(0, 0), + line_fit.cov(1, 1)); +#endif + } + } + }; + + template + void HelixFit::launchRiemannKernels(const TrackingRecHitAlpakaSoAConstView &hv, + pixelCPEforDevice::ParamsOnDeviceT const *cpeParams, + uint32_t nhits, + uint32_t maxNumberOfTuples, + Queue &queue) { + assert(tuples_); + + auto blockSize = 64; + auto numberOfBlocks = (maxNumberOfConcurrentFits_ + blockSize - 1) / blockSize; + const auto workDivTriplets = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + const auto workDivQuadsPenta = cms::alpakatools::make_workdiv(numberOfBlocks / 4, blockSize); + + // Fit internals + auto hitsDevice = cms::alpakatools::make_device_buffer( + queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<4>) / sizeof(double)); + auto hits_geDevice = cms::alpakatools::make_device_buffer( + queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6x4f) / sizeof(float)); + auto fast_fit_resultsDevice = cms::alpakatools::make_device_buffer( + queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double)); + auto circle_fit_resultsDevice_holder = + cms::alpakatools::make_device_buffer(queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::CircleFit)); + riemannFit::CircleFit *circle_fit_resultsDevice_ = + (riemannFit::CircleFit *)(circle_fit_resultsDevice_holder.data()); + + for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) { + // triplets + alpaka::exec(queue, + workDivTriplets, + kernel_FastFit<3, TrackerTraits>{}, + tuples_, + tupleMultiplicity_, + 3, + hv, + cpeParams, + hitsDevice.data(), + hits_geDevice.data(), + fast_fit_resultsDevice.data(), + offset); + + alpaka::exec(queue, + workDivTriplets, + kernel_CircleFit<3, TrackerTraits>{}, + tupleMultiplicity_, + 3, + bField_, + hitsDevice.data(), + hits_geDevice.data(), + fast_fit_resultsDevice.data(), + circle_fit_resultsDevice_, + offset); + + alpaka::exec(queue, + workDivTriplets, + kernel_LineFit<3, TrackerTraits>{}, + tupleMultiplicity_, + 3, + bField_, + outputSoa_, + hitsDevice.data(), + hits_geDevice.data(), + fast_fit_resultsDevice.data(), + circle_fit_resultsDevice_, + offset); + + // quads + alpaka::exec(queue, + workDivQuadsPenta, + kernel_FastFit<4, TrackerTraits>{}, + tuples_, + tupleMultiplicity_, + 4, + hv, + cpeParams, + hitsDevice.data(), + hits_geDevice.data(), + fast_fit_resultsDevice.data(), + offset); + + alpaka::exec(queue, + workDivQuadsPenta, + kernel_CircleFit<4, TrackerTraits>{}, + tupleMultiplicity_, + 4, + bField_, + hitsDevice.data(), + hits_geDevice.data(), + fast_fit_resultsDevice.data(), + circle_fit_resultsDevice_, + offset); + + alpaka::exec(queue, + workDivQuadsPenta, + kernel_LineFit<4, TrackerTraits>{}, + tupleMultiplicity_, + 4, + bField_, + outputSoa_, + hitsDevice.data(), + hits_geDevice.data(), + fast_fit_resultsDevice.data(), + circle_fit_resultsDevice_, + offset); + + if (fitNas4_) { + // penta + alpaka::exec(queue, + workDivQuadsPenta, + kernel_FastFit<4, TrackerTraits>{}, + tuples_, + tupleMultiplicity_, + 5, + hv, + cpeParams, + hitsDevice.data(), + hits_geDevice.data(), + fast_fit_resultsDevice.data(), + offset); + + alpaka::exec(queue, + workDivQuadsPenta, + kernel_CircleFit<4, TrackerTraits>{}, + tupleMultiplicity_, + 5, + bField_, + hitsDevice.data(), + hits_geDevice.data(), + fast_fit_resultsDevice.data(), + circle_fit_resultsDevice_, + offset); + + alpaka::exec(queue, + workDivQuadsPenta, + kernel_LineFit<4, TrackerTraits>{}, + tupleMultiplicity_, + 5, + bField_, + outputSoa_, + hitsDevice.data(), + hits_geDevice.data(), + fast_fit_resultsDevice.data(), + circle_fit_resultsDevice_, + offset); + } else { + // penta all 5 + alpaka::exec(queue, + workDivQuadsPenta, + kernel_FastFit<5, TrackerTraits>{}, + tuples_, + tupleMultiplicity_, + 5, + hv, + cpeParams, + hitsDevice.data(), + hits_geDevice.data(), + fast_fit_resultsDevice.data(), + offset); + + alpaka::exec(queue, + workDivQuadsPenta, + kernel_CircleFit<5, TrackerTraits>{}, + tupleMultiplicity_, + 5, + bField_, + hitsDevice.data(), + hits_geDevice.data(), + fast_fit_resultsDevice.data(), + circle_fit_resultsDevice_, + offset); + + alpaka::exec(queue, + workDivQuadsPenta, + kernel_LineFit<5, TrackerTraits>{}, + tupleMultiplicity_, + 5, + bField_, + outputSoa_, + hitsDevice.data(), + hits_geDevice.data(), + fast_fit_resultsDevice.data(), + circle_fit_resultsDevice_, + offset); + } + } + } + + template class HelixFit; + template class HelixFit; + template class HelixFit; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoTracker/PixelSeeding/plugins/gpuPixelDoubletsAlgos.h b/RecoTracker/PixelSeeding/plugins/gpuPixelDoubletsAlgos.h index b86ba09949416..ac5975abb2dd5 100644 --- a/RecoTracker/PixelSeeding/plugins/gpuPixelDoubletsAlgos.h +++ b/RecoTracker/PixelSeeding/plugins/gpuPixelDoubletsAlgos.h @@ -287,8 +287,8 @@ namespace gpuPixelDoublets { } // #endif #ifdef GPU_DEBUG - if (tooMany > 0) - printf("OuterHitOfCell full for %d in layer %d/%d, %d,%d %d, %d %.3f %.3f\n", + if (tooMany > 0 || tot > 0) + printf("OuterHitOfCell for %d in layer %d/%d, %d,%d %d, %d %.3f %.3f %s\n", i, inner, outer, @@ -297,7 +297,8 @@ namespace gpuPixelDoublets { tooMany, iphicut, TrackerTraits::minz[pairLayerId], - TrackerTraits::maxz[pairLayerId]); + TrackerTraits::maxz[pairLayerId], + tooMany > 0 ? "FULL!!" : "not full."); #endif } // loop in block... } diff --git a/RecoTracker/PixelSeeding/test/BuildFile.xml b/RecoTracker/PixelSeeding/test/BuildFile.xml index 37e12c0ec6aed..86b680a535b16 100644 --- a/RecoTracker/PixelSeeding/test/BuildFile.xml +++ b/RecoTracker/PixelSeeding/test/BuildFile.xml @@ -28,3 +28,11 @@ + + + + + + + + diff --git a/RecoTracker/PixelSeeding/test/CASizes_t.cpp b/RecoTracker/PixelSeeding/test/CASizes_t.cpp new file mode 100644 index 0000000000000..cfb18eb7ef492 --- /dev/null +++ b/RecoTracker/PixelSeeding/test/CASizes_t.cpp @@ -0,0 +1,38 @@ +#include "RecoTracker/PixelSeeding/plugins/GPUCACell.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include +#include + +template +void print() { + std::cout << "size of " << typeid(T).name() << ' ' << sizeof(T) << std::endl; +} + +int main() { + using namespace pixelTopology; + using namespace caStructures; + //for Phase-I + print>(); + print>(); + print>(); + print>(); + print>(); + print>(); + print>(); + + print>(); + + //for Phase-II + + print>(); + print>(); + print>(); + print>(); + print>(); + print>(); + print>(); + + print>(); + + return 0; +} \ No newline at end of file diff --git a/RecoTracker/PixelSeeding/test/alpaka/CAsizes_t.cpp b/RecoTracker/PixelSeeding/test/alpaka/CAsizes_t.cpp new file mode 100644 index 0000000000000..770957d9a79c0 --- /dev/null +++ b/RecoTracker/PixelSeeding/test/alpaka/CAsizes_t.cpp @@ -0,0 +1,40 @@ +#include "RecoTracker/PixelSeeding/plugins/alpaka/CACell.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include +#include + +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +template +void print() { + std::cout << "size of " << typeid(T).name() << ' ' << sizeof(T) << std::endl; +} + +int main() { + using namespace pixelTopology; + using namespace caStructures; + //for Phase-I + print>(); + print>(); + print>(); + print>(); + print>(); + print>(); + print>(); + + print>(); + + //for Phase-II + + print>(); + print>(); + print>(); + print>(); + print>(); + print>(); + print>(); + + print>(); + + return 0; +} diff --git a/RecoTracker/PixelTrackFitting/BuildFile.xml b/RecoTracker/PixelTrackFitting/BuildFile.xml index b57493ad60503..132ce98dbf7f7 100644 --- a/RecoTracker/PixelTrackFitting/BuildFile.xml +++ b/RecoTracker/PixelTrackFitting/BuildFile.xml @@ -1,3 +1,4 @@ + @@ -8,11 +9,16 @@ + + + + + diff --git a/RecoTracker/PixelTrackFitting/interface/alpaka/BrokenLine.h b/RecoTracker/PixelTrackFitting/interface/alpaka/BrokenLine.h new file mode 100644 index 0000000000000..5dc1fd7d3e445 --- /dev/null +++ b/RecoTracker/PixelTrackFitting/interface/alpaka/BrokenLine.h @@ -0,0 +1,636 @@ +#ifndef RecoPixelVertexing_PixelTrackFitting_interface_BrokenLine_h +#define RecoPixelVertexing_PixelTrackFitting_interface_BrokenLine_h +#include +#include +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "RecoTracker/PixelTrackFitting/interface/alpaka/FitUtils.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace brokenline { + using namespace cms::alpakatools; + using namespace ::riemannFit; + + //!< Karimäki's parameters: (phi, d, k=1/R) + /*!< covariance matrix: \n + |cov(phi,phi)|cov( d ,phi)|cov( k ,phi)| \n + |cov(phi, d )|cov( d , d )|cov( k , d )| \n + |cov(phi, k )|cov( d , k )|cov( k , k )| \n + as defined in Karimäki V., 1990, Effective circle fitting for particle trajectories, + Nucl. Instr. and Meth. A305 (1991) 187. + */ + using karimaki_circle_fit = riemannFit::CircleFit; + + /*! + \brief data needed for the Broken Line fit procedure. + */ + template + struct PreparedBrokenLineData { + int qCharge; //!< particle charge + riemannFit::Matrix2xNd radii; //!< xy data in the system in which the pre-fitted center is the origin + riemannFit::VectorNd sTransverse; //!< total distance traveled in the transverse plane + // starting from the pre-fitted closest approach + riemannFit::VectorNd sTotal; //!< total distance traveled (three-dimensional) + riemannFit::VectorNd zInSZplane; //!< orthogonal coordinate to the pre-fitted line in the sz plane + riemannFit::VectorNd varBeta; //!< kink angles in the SZ plane + }; + + /*! + \brief Computes the Coulomb multiple scattering variance of the planar angle. + + \param length length of the track in the material. + \param bField magnetic field in Gev/cm/c. + \param radius radius of curvature (needed to evaluate p). + \param layer denotes which of the four layers of the detector is the endpoint of the + * multiple scattered track. For example, if Layer=3, then the particle has + * just gone through the material between the second and the third layer. + + \todo add another Layer variable to identify also the start point of the track, + * so if there are missing hits or multiple hits, the part of the detector that + * the particle has traversed can be exactly identified. + + \warning the formula used here assumes beta=1, and so neglects the dependence + * of theta_0 on the mass of the particle at fixed momentum. + + \return the variance of the planar angle ((theta_0)^2 /3). + */ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE double multScatt( + const TAcc& acc, const double& length, const double bField, const double radius, int layer, double slope) { + // limit R to 20GeV... + auto pt2 = alpaka::math::min(acc, 20., bField * radius); + pt2 *= pt2; + constexpr double inv_X0 = 0.06 / 16.; //!< inverse of radiation length of the material in cm + //if(Layer==1) XXI_0=0.06/16.; + // else XXI_0=0.06/16.; + //XX_0*=1; + + //! number between 1/3 (uniform material) and 1 (thin scatterer) to be manually tuned + constexpr double geometry_factor = 0.7; + constexpr double fact = geometry_factor * riemannFit::sqr(13.6 / 1000.); + return fact / (pt2 * (1. + riemannFit::sqr(slope))) * (alpaka::math::abs(acc, length) * inv_X0) * + riemannFit::sqr(1. + 0.038 * log(alpaka::math::abs(acc, length) * inv_X0)); + } + + /*! + \brief Computes the 2D rotation matrix that transforms the line y=slope*x into the line y=0. + + \param slope tangent of the angle of rotation. + + \return 2D rotation matrix. + */ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE riemannFit::Matrix2d rotationMatrix(const TAcc& acc, double slope) { + riemannFit::Matrix2d rot; + rot(0, 0) = 1. / alpaka::math::sqrt(acc, 1. + riemannFit::sqr(slope)); + rot(0, 1) = slope * rot(0, 0); + rot(1, 0) = -rot(0, 1); + rot(1, 1) = rot(0, 0); + return rot; + } + + /*! + \brief Changes the Karimäki parameters (and consequently their covariance matrix) under a + * translation of the coordinate system, such that the old origin has coordinates (x0,y0) + * in the new coordinate system. The formulas are taken from Karimäki V., 1990, Effective + * circle fitting for particle trajectories, Nucl. Instr. and Meth. A305 (1991) 187. + + \param circle circle fit in the old coordinate system. circle.par(0) is phi, circle.par(1) is d and circle.par(2) is rho. + \param x0 x coordinate of the translation vector. + \param y0 y coordinate of the translation vector. + \param jacobian passed by reference in order to save stack. + */ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void translateKarimaki( + const TAcc& acc, karimaki_circle_fit& circle, double x0, double y0, riemannFit::Matrix3d& jacobian) { + // Avoid multiple access to the circle.par vector. + using scalar = typename std::remove_reference::type; + scalar phi = circle.par(0); + scalar dee = circle.par(1); + scalar rho = circle.par(2); + + // Avoid repeated trig. computations + scalar sinPhi = alpaka::math::sin(acc, phi); + scalar cosPhi = alpaka::math::cos(acc, phi); + + // Intermediate computations for the circle parameters + scalar deltaPara = x0 * cosPhi + y0 * sinPhi; + scalar deltaOrth = x0 * sinPhi - y0 * cosPhi + dee; + scalar tempSmallU = 1 + rho * dee; + scalar tempC = -rho * y0 + tempSmallU * cosPhi; + scalar tempB = rho * x0 + tempSmallU * sinPhi; + scalar tempA = 2. * deltaOrth + rho * (riemannFit::sqr(deltaOrth) + riemannFit::sqr(deltaPara)); + scalar tempU = alpaka::math::sin(acc, 1. + rho * tempA); + + // Intermediate computations for the error matrix transform + scalar xi = 1. / (riemannFit::sqr(tempB) + riemannFit::sqr(tempC)); + scalar tempV = 1. + rho * deltaOrth; + scalar lambda = (0.5 * tempA) / (riemannFit::sqr(1. + tempU) * tempU); + scalar mu = 1. / (tempU * (1. + tempU)) + rho * lambda; + scalar zeta = riemannFit::sqr(deltaOrth) + riemannFit::sqr(deltaPara); + jacobian << xi * tempSmallU * tempV, -xi * riemannFit::sqr(rho) * deltaOrth, xi * deltaPara, + 2. * mu * tempSmallU * deltaPara, 2. * mu * tempV, mu * zeta - lambda * tempA, 0, 0, 1.; + + // translated circle parameters + // phi + circle.par(0) = alpaka::math::atan2(acc, tempB, tempC); + // d + circle.par(1) = tempA / (1 + tempU); + // rho after translation. It is invariant, so noop + // circle.par(2)= rho; + + // translated error matrix + circle.cov = jacobian * circle.cov * jacobian.transpose(); + } + + /*! + \brief Computes the data needed for the Broken Line fit procedure that are mainly common for the circle and the line fit. + + \param hits hits coordinates. + \param fast_fit pre-fit result in the form (X0,Y0,R,tan(theta)). + \param bField magnetic field in Gev/cm/c. + \param results PreparedBrokenLineData to be filled (see description of PreparedBrokenLineData). + */ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline)) + prepareBrokenLineData(const TAcc& acc, + const M3xN& hits, + const V4& fast_fit, + const double bField, + PreparedBrokenLineData& results) { + riemannFit::Vector2d dVec; + riemannFit::Vector2d eVec; + + int mId = 1; + + if constexpr (n > 3) { + riemannFit::Vector2d middle = 0.5 * (hits.block(0, n - 1, 2, 1) + hits.block(0, 0, 2, 1)); + auto d1 = (hits.block(0, n / 2, 2, 1) - middle).squaredNorm(); + auto d2 = (hits.block(0, n / 2 - 1, 2, 1) - middle).squaredNorm(); + mId = d1 < d2 ? n / 2 : n / 2 - 1; + } + + dVec = hits.block(0, mId, 2, 1) - hits.block(0, 0, 2, 1); + eVec = hits.block(0, n - 1, 2, 1) - hits.block(0, mId, 2, 1); + results.qCharge = riemannFit::cross2D(acc, dVec, eVec) > 0 ? -1 : 1; + + const double slope = -results.qCharge / fast_fit(3); + + riemannFit::Matrix2d rotMat = rotationMatrix(acc, slope); + + // calculate radii and s + results.radii = hits.block(0, 0, 2, n) - fast_fit.head(2) * riemannFit::MatrixXd::Constant(1, n, 1); + eVec = -fast_fit(2) * fast_fit.head(2) / fast_fit.head(2).norm(); + for (u_int i = 0; i < n; i++) { + dVec = results.radii.block(0, i, 2, 1); + results.sTransverse(i) = + results.qCharge * fast_fit(2) * + alpaka::math::atan2( + acc, riemannFit::cross2D(acc, dVec, eVec), dVec.dot(eVec)); // calculates the arc length + } + riemannFit::VectorNd zVec = hits.block(2, 0, 1, n).transpose(); + + //calculate sTotal and zVec + riemannFit::Matrix2xNd pointsSZ = riemannFit::Matrix2xNd::Zero(); + for (u_int i = 0; i < n; i++) { + pointsSZ(0, i) = results.sTransverse(i); + pointsSZ(1, i) = zVec(i); + pointsSZ.block(0, i, 2, 1) = rotMat * pointsSZ.block(0, i, 2, 1); + } + results.sTotal = pointsSZ.block(0, 0, 1, n).transpose(); + results.zInSZplane = pointsSZ.block(1, 0, 1, n).transpose(); + + //calculate varBeta + results.varBeta(0) = results.varBeta(n - 1) = 0; + for (u_int i = 1; i < n - 1; i++) { + results.varBeta(i) = + multScatt(acc, results.sTotal(i + 1) - results.sTotal(i), bField, fast_fit(2), i + 2, slope) + + multScatt(acc, results.sTotal(i) - results.sTotal(i - 1), bField, fast_fit(2), i + 1, slope); + } + } + + /*! + \brief Computes the n-by-n band matrix obtained minimizing the Broken Line's cost function w.r.t u. + * This is the whole matrix in the case of the line fit and the main n-by-n block in the case + * of the circle fit. + + \param weights weights of the first part of the cost function, the one with the measurements + * and not the angles (\sum_{i=1}^n w*(y_i-u_i)^2). + \param sTotal total distance traveled by the particle from the pre-fitted closest approach. + \param varBeta kink angles' variance. + + \return the n-by-n matrix of the linear system + */ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE riemannFit::MatrixNd matrixC_u(const TAcc& acc, + const riemannFit::VectorNd& weights, + const riemannFit::VectorNd& sTotal, + const riemannFit::VectorNd& varBeta) { + riemannFit::MatrixNd c_uMat = riemannFit::MatrixNd::Zero(); + for (u_int i = 0; i < n; i++) { + c_uMat(i, i) = weights(i); + if (i > 1) + c_uMat(i, i) += 1. / (varBeta(i - 1) * riemannFit::sqr(sTotal(i) - sTotal(i - 1))); + if (i > 0 && i < n - 1) + c_uMat(i, i) += + (1. / varBeta(i)) * riemannFit::sqr((sTotal(i + 1) - sTotal(i - 1)) / + ((sTotal(i + 1) - sTotal(i)) * (sTotal(i) - sTotal(i - 1)))); + if (i < n - 2) + c_uMat(i, i) += 1. / (varBeta(i + 1) * riemannFit::sqr(sTotal(i + 1) - sTotal(i))); + + if (i > 0 && i < n - 1) + c_uMat(i, i + 1) = + 1. / (varBeta(i) * (sTotal(i + 1) - sTotal(i))) * + (-(sTotal(i + 1) - sTotal(i - 1)) / ((sTotal(i + 1) - sTotal(i)) * (sTotal(i) - sTotal(i - 1)))); + if (i < n - 2) + c_uMat(i, i + 1) += + 1. / (varBeta(i + 1) * (sTotal(i + 1) - sTotal(i))) * + (-(sTotal(i + 2) - sTotal(i)) / ((sTotal(i + 2) - sTotal(i + 1)) * (sTotal(i + 1) - sTotal(i)))); + + if (i < n - 2) + c_uMat(i, i + 2) = 1. / (varBeta(i + 1) * (sTotal(i + 2) - sTotal(i + 1)) * (sTotal(i + 1) - sTotal(i))); + + c_uMat(i, i) *= 0.5; + } + return c_uMat + c_uMat.transpose(); + } + + /*! + \brief A very fast helix fit. + + \param hits the measured hits. + + \return (X0,Y0,R,tan(theta)). + + \warning sign of theta is (intentionally, for now) mistaken for negative charges. + */ + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void fastFit(const TAcc& acc, const M3xN& hits, V4& result) { + constexpr uint32_t n = M3xN::ColsAtCompileTime; + + int mId = 1; + + if constexpr (n > 3) { + riemannFit::Vector2d middle = 0.5 * (hits.block(0, n - 1, 2, 1) + hits.block(0, 0, 2, 1)); + auto d1 = (hits.block(0, n / 2, 2, 1) - middle).squaredNorm(); + auto d2 = (hits.block(0, n / 2 - 1, 2, 1) - middle).squaredNorm(); + mId = d1 < d2 ? n / 2 : n / 2 - 1; + } + + const riemannFit::Vector2d a = hits.block(0, mId, 2, 1) - hits.block(0, 0, 2, 1); + const riemannFit::Vector2d b = hits.block(0, n - 1, 2, 1) - hits.block(0, mId, 2, 1); + const riemannFit::Vector2d c = hits.block(0, 0, 2, 1) - hits.block(0, n - 1, 2, 1); + + auto tmp = 0.5 / riemannFit::cross2D(acc, c, a); + result(0) = hits(0, 0) - (a(1) * c.squaredNorm() + c(1) * a.squaredNorm()) * tmp; + result(1) = hits(1, 0) + (a(0) * c.squaredNorm() + c(0) * a.squaredNorm()) * tmp; + // check Wikipedia for these formulas + + result(2) = alpaka::math::sqrt(acc, a.squaredNorm() * b.squaredNorm() * c.squaredNorm()) / + (2. * alpaka::math::abs(acc, riemannFit::cross2D(acc, b, a))); + // Using Math Olympiad's formula R=abc/(4A) + + const riemannFit::Vector2d d = hits.block(0, 0, 2, 1) - result.head(2); + const riemannFit::Vector2d e = hits.block(0, n - 1, 2, 1) - result.head(2); + + result(3) = result(2) * atan2(riemannFit::cross2D(acc, d, e), d.dot(e)) / (hits(2, n - 1) - hits(2, 0)); + // ds/dz slope between last and first point + } + + /*! + \brief Performs the Broken Line fit in the curved track case (that is, the fit + * parameters are the interceptions u and the curvature correction \Delta\kappa). + + \param hits hits coordinates. + \param hits_cov hits covariance matrix. + \param fast_fit pre-fit result in the form (X0,Y0,R,tan(theta)). + \param bField magnetic field in Gev/cm/c. + \param data PreparedBrokenLineData. + \param circle_results struct to be filled with the results in this form: + -par parameter of the line in this form: (phi, d, k); \n + -cov covariance matrix of the fitted parameter; \n + -chi2 value of the cost function in the minimum. + + \details The function implements the steps 2 and 3 of the Broken Line fit + * with the curvature correction.\n + * The step 2 is the least square fit, done by imposing the minimum constraint on + * the cost function and solving the consequent linear system. It determines the + * fitted parameters u and \Delta\kappa and their covariance matrix. + * The step 3 is the correction of the fast pre-fitted parameters for the innermost + * part of the track. It is first done in a comfortable coordinate system (the one + * in which the first hit is the origin) and then the parameters and their + * covariance matrix are transformed to the original coordinate system. + */ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void circleFit(const TAcc& acc, + const M3xN& hits, + const M6xN& hits_ge, + const V4& fast_fit, + const double bField, + PreparedBrokenLineData& data, + karimaki_circle_fit& circle_results) { + circle_results.qCharge = data.qCharge; + auto& radii = data.radii; + const auto& sTransverse = data.sTransverse; + const auto& sTotal = data.sTotal; + auto& zInSZplane = data.zInSZplane; + auto& varBeta = data.varBeta; + const double slope = -circle_results.qCharge / fast_fit(3); + varBeta *= 1. + riemannFit::sqr(slope); // the kink angles are projected! + + for (u_int i = 0; i < n; i++) { + zInSZplane(i) = radii.block(0, i, 2, 1).norm() - fast_fit(2); + } + + riemannFit::Matrix2d vMat; // covariance matrix + riemannFit::VectorNd weightsVec; // weights + riemannFit::Matrix2d rotMat; // rotation matrix point by point + for (u_int i = 0; i < n; i++) { + vMat(0, 0) = hits_ge.col(i)[0]; // x errors + vMat(0, 1) = vMat(1, 0) = hits_ge.col(i)[1]; // cov_xy + vMat(1, 1) = hits_ge.col(i)[2]; // y errors + rotMat = rotationMatrix(acc, -radii(0, i) / radii(1, i)); + weightsVec(i) = + 1. / ((rotMat * vMat * rotMat.transpose())(1, 1)); // compute the orthogonal weight point by point + } + + riemannFit::VectorNplusONEd r_uVec; + r_uVec(n) = 0; + for (u_int i = 0; i < n; i++) { + r_uVec(i) = weightsVec(i) * zInSZplane(i); + } + + riemannFit::MatrixNplusONEd c_uMat; + c_uMat.block(0, 0, n, n) = matrixC_u(acc, weightsVec, sTransverse, varBeta); + c_uMat(n, n) = 0; + //add the border to the c_uMat matrix + for (u_int i = 0; i < n; i++) { + c_uMat(i, n) = 0; + if (i > 0 && i < n - 1) { + c_uMat(i, n) += + -(sTransverse(i + 1) - sTransverse(i - 1)) * (sTransverse(i + 1) - sTransverse(i - 1)) / + (2. * varBeta(i) * (sTransverse(i + 1) - sTransverse(i)) * (sTransverse(i) - sTransverse(i - 1))); + } + if (i > 1) { + c_uMat(i, n) += + (sTransverse(i) - sTransverse(i - 2)) / (2. * varBeta(i - 1) * (sTransverse(i) - sTransverse(i - 1))); + } + if (i < n - 2) { + c_uMat(i, n) += + (sTransverse(i + 2) - sTransverse(i)) / (2. * varBeta(i + 1) * (sTransverse(i + 1) - sTransverse(i))); + } + c_uMat(n, i) = c_uMat(i, n); + if (i > 0 && i < n - 1) + c_uMat(n, n) += riemannFit::sqr(sTransverse(i + 1) - sTransverse(i - 1)) / (4. * varBeta(i)); + } + +#ifdef CPP_DUMP + std::cout << "CU5\n" << c_uMat << std::endl; +#endif + riemannFit::MatrixNplusONEd iMat; + math::cholesky::invert(c_uMat, iMat); +#ifdef CPP_DUMP + std::cout << "I5\n" << iMat << std::endl; +#endif + riemannFit::VectorNplusONEd uVec = iMat * r_uVec; // obtain the fitted parameters by solving the linear system + + // compute (phi, d_ca, k) in the system in which the midpoint of the first two corrected hits is the origin... + + radii.block(0, 0, 2, 1) /= radii.block(0, 0, 2, 1).norm(); + radii.block(0, 1, 2, 1) /= radii.block(0, 1, 2, 1).norm(); + + riemannFit::Vector2d dVec = hits.block(0, 0, 2, 1) + (-zInSZplane(0) + uVec(0)) * radii.block(0, 0, 2, 1); + riemannFit::Vector2d eVec = hits.block(0, 1, 2, 1) + (-zInSZplane(1) + uVec(1)) * radii.block(0, 1, 2, 1); + auto eMinusd = eVec - dVec; + auto eMinusd2 = eMinusd.squaredNorm(); + auto tmp1 = 1. / eMinusd2; + auto tmp2 = alpaka::math::sqrt(acc, riemannFit::sqr(fast_fit(2)) - 0.25 * eMinusd2); + + circle_results.par << atan2(eMinusd(1), eMinusd(0)), circle_results.qCharge * (tmp2 - fast_fit(2)), + circle_results.qCharge * (1. / fast_fit(2) + uVec(n)); + + tmp2 = 1. / tmp2; + + riemannFit::Matrix3d jacobian; + jacobian << (radii(1, 0) * eMinusd(0) - eMinusd(1) * radii(0, 0)) * tmp1, + (radii(1, 1) * eMinusd(0) - eMinusd(1) * radii(0, 1)) * tmp1, 0, + circle_results.qCharge * (eMinusd(0) * radii(0, 0) + eMinusd(1) * radii(1, 0)) * tmp2, + circle_results.qCharge * (eMinusd(0) * radii(0, 1) + eMinusd(1) * radii(1, 1)) * tmp2, 0, 0, 0, + circle_results.qCharge; + + circle_results.cov << iMat(0, 0), iMat(0, 1), iMat(0, n), iMat(1, 0), iMat(1, 1), iMat(1, n), iMat(n, 0), + iMat(n, 1), iMat(n, n); + + circle_results.cov = jacobian * circle_results.cov * jacobian.transpose(); + + //...Translate in the system in which the first corrected hit is the origin, adding the m.s. correction... + + translateKarimaki(acc, circle_results, 0.5 * eMinusd(0), 0.5 * eMinusd(1), jacobian); + circle_results.cov(0, 0) += + (1 + riemannFit::sqr(slope)) * multScatt(acc, sTotal(1) - sTotal(0), bField, fast_fit(2), 2, slope); + + //...And translate back to the original system + + translateKarimaki(acc, circle_results, dVec(0), dVec(1), jacobian); + + // compute chi2 + circle_results.chi2 = 0; + for (u_int i = 0; i < n; i++) { + circle_results.chi2 += weightsVec(i) * riemannFit::sqr(zInSZplane(i) - uVec(i)); + if (i > 0 && i < n - 1) + circle_results.chi2 += + riemannFit::sqr(uVec(i - 1) / (sTransverse(i) - sTransverse(i - 1)) - + uVec(i) * (sTransverse(i + 1) - sTransverse(i - 1)) / + ((sTransverse(i + 1) - sTransverse(i)) * (sTransverse(i) - sTransverse(i - 1))) + + uVec(i + 1) / (sTransverse(i + 1) - sTransverse(i)) + + (sTransverse(i + 1) - sTransverse(i - 1)) * uVec(n) / 2) / + varBeta(i); + } + } + + /*! + \brief Performs the Broken Line fit in the straight track case (that is, the fit parameters are only the interceptions u). + + \param hits hits coordinates. + \param fast_fit pre-fit result in the form (X0,Y0,R,tan(theta)). + \param bField magnetic field in Gev/cm/c. + \param data PreparedBrokenLineData. + \param line_results struct to be filled with the results in this form: + -par parameter of the line in this form: (cot(theta), Zip); \n + -cov covariance matrix of the fitted parameter; \n + -chi2 value of the cost function in the minimum. + + \details The function implements the steps 2 and 3 of the Broken Line fit without + * the curvature correction.\n + * The step 2 is the least square fit, done by imposing the minimum constraint + * on the cost function and solving the consequent linear system. It determines + * the fitted parameters u and their covariance matrix. + * The step 3 is the correction of the fast pre-fitted parameters for the innermost + * part of the track. It is first done in a comfortable coordinate system (the one + * in which the first hit is the origin) and then the parameters and their covariance + * matrix are transformed to the original coordinate system. + */ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void lineFit(const TAcc& acc, + const M6xN& hits_ge, + const V4& fast_fit, + const double bField, + const PreparedBrokenLineData& data, + riemannFit::LineFit& line_results) { + const auto& radii = data.radii; + const auto& sTotal = data.sTotal; + const auto& zInSZplane = data.zInSZplane; + const auto& varBeta = data.varBeta; + + const double slope = -data.qCharge / fast_fit(3); + riemannFit::Matrix2d rotMat = rotationMatrix(acc, slope); + + riemannFit::Matrix3d vMat = riemannFit::Matrix3d::Zero(); // covariance matrix XYZ + riemannFit::Matrix2x3d jacobXYZtosZ = + riemannFit::Matrix2x3d::Zero(); // jacobian for computation of the error on s (xyz -> sz) + riemannFit::VectorNd weights = riemannFit::VectorNd::Zero(); + for (u_int i = 0; i < n; i++) { + vMat(0, 0) = hits_ge.col(i)[0]; // x errors + vMat(0, 1) = vMat(1, 0) = hits_ge.col(i)[1]; // cov_xy + vMat(0, 2) = vMat(2, 0) = hits_ge.col(i)[3]; // cov_xz + vMat(1, 1) = hits_ge.col(i)[2]; // y errors + vMat(2, 1) = vMat(1, 2) = hits_ge.col(i)[4]; // cov_yz + vMat(2, 2) = hits_ge.col(i)[5]; // z errors + auto tmp = 1. / radii.block(0, i, 2, 1).norm(); + jacobXYZtosZ(0, 0) = radii(1, i) * tmp; + jacobXYZtosZ(0, 1) = -radii(0, i) * tmp; + jacobXYZtosZ(1, 2) = 1.; + weights(i) = 1. / ((rotMat * jacobXYZtosZ * vMat * jacobXYZtosZ.transpose() * rotMat.transpose())( + 1, 1)); // compute the orthogonal weight point by point + } + + riemannFit::VectorNd r_u; + for (u_int i = 0; i < n; i++) { + r_u(i) = weights(i) * zInSZplane(i); + } +#ifdef CPP_DUMP + std::cout << "CU4\n" << matrixC_u(w, sTotal, varBeta) << std::endl; +#endif + riemannFit::MatrixNd iMat; + math::cholesky::invert(matrixC_u(acc, weights, sTotal, varBeta), iMat); +#ifdef CPP_DUMP + std::cout << "I4\n" << iMat << std::endl; +#endif + + riemannFit::VectorNd uVec = iMat * r_u; // obtain the fitted parameters by solving the linear system + + // line parameters in the system in which the first hit is the origin and with axis along SZ + line_results.par << (uVec(1) - uVec(0)) / (sTotal(1) - sTotal(0)), uVec(0); + auto idiff = 1. / (sTotal(1) - sTotal(0)); + line_results.cov << (iMat(0, 0) - 2 * iMat(0, 1) + iMat(1, 1)) * riemannFit::sqr(idiff) + + multScatt(acc, sTotal(1) - sTotal(0), bField, fast_fit(2), 2, slope), + (iMat(0, 1) - iMat(0, 0)) * idiff, (iMat(0, 1) - iMat(0, 0)) * idiff, iMat(0, 0); + + // translate to the original SZ system + riemannFit::Matrix2d jacobian; + jacobian(0, 0) = 1.; + jacobian(0, 1) = 0; + jacobian(1, 0) = -sTotal(0); + jacobian(1, 1) = 1.; + line_results.par(1) += -line_results.par(0) * sTotal(0); + line_results.cov = jacobian * line_results.cov * jacobian.transpose(); + + // rotate to the original sz system + auto tmp = rotMat(0, 0) - line_results.par(0) * rotMat(0, 1); + jacobian(1, 1) = 1. / tmp; + jacobian(0, 0) = jacobian(1, 1) * jacobian(1, 1); + jacobian(0, 1) = 0; + jacobian(1, 0) = line_results.par(1) * rotMat(0, 1) * jacobian(0, 0); + line_results.par(1) = line_results.par(1) * jacobian(1, 1); + line_results.par(0) = (rotMat(0, 1) + line_results.par(0) * rotMat(0, 0)) * jacobian(1, 1); + line_results.cov = jacobian * line_results.cov * jacobian.transpose(); + + // compute chi2 + line_results.chi2 = 0; + for (u_int i = 0; i < n; i++) { + line_results.chi2 += weights(i) * riemannFit::sqr(zInSZplane(i) - uVec(i)); + if (i > 0 && i < n - 1) + line_results.chi2 += riemannFit::sqr(uVec(i - 1) / (sTotal(i) - sTotal(i - 1)) - + uVec(i) * (sTotal(i + 1) - sTotal(i - 1)) / + ((sTotal(i + 1) - sTotal(i)) * (sTotal(i) - sTotal(i - 1))) + + uVec(i + 1) / (sTotal(i + 1) - sTotal(i))) / + varBeta(i); + } + } + + /*! + \brief Helix fit by three step: + -fast pre-fit (see Fast_fit() for further info); \n + -circle fit of the hits projected in the transverse plane by Broken Line algorithm (see BL_Circle_fit() for further info); \n + -line fit of the hits projected on the (pre-fitted) cilinder surface by Broken Line algorithm (see BL_Line_fit() for further info); \n + Points must be passed ordered (from inner to outer layer). + + \param hits Matrix3xNd hits coordinates in this form: \n + |x1|x2|x3|...|xn| \n + |y1|y2|y3|...|yn| \n + |z1|z2|z3|...|zn| + \param hits_cov Matrix3Nd covariance matrix in this form (()->cov()): \n + |(x1,x1)|(x2,x1)|(x3,x1)|(x4,x1)|.|(y1,x1)|(y2,x1)|(y3,x1)|(y4,x1)|.|(z1,x1)|(z2,x1)|(z3,x1)|(z4,x1)| \n + |(x1,x2)|(x2,x2)|(x3,x2)|(x4,x2)|.|(y1,x2)|(y2,x2)|(y3,x2)|(y4,x2)|.|(z1,x2)|(z2,x2)|(z3,x2)|(z4,x2)| \n + |(x1,x3)|(x2,x3)|(x3,x3)|(x4,x3)|.|(y1,x3)|(y2,x3)|(y3,x3)|(y4,x3)|.|(z1,x3)|(z2,x3)|(z3,x3)|(z4,x3)| \n + |(x1,x4)|(x2,x4)|(x3,x4)|(x4,x4)|.|(y1,x4)|(y2,x4)|(y3,x4)|(y4,x4)|.|(z1,x4)|(z2,x4)|(z3,x4)|(z4,x4)| \n + . . . . . . . . . . . . . . . \n + |(x1,y1)|(x2,y1)|(x3,y1)|(x4,y1)|.|(y1,y1)|(y2,y1)|(y3,x1)|(y4,y1)|.|(z1,y1)|(z2,y1)|(z3,y1)|(z4,y1)| \n + |(x1,y2)|(x2,y2)|(x3,y2)|(x4,y2)|.|(y1,y2)|(y2,y2)|(y3,x2)|(y4,y2)|.|(z1,y2)|(z2,y2)|(z3,y2)|(z4,y2)| \n + |(x1,y3)|(x2,y3)|(x3,y3)|(x4,y3)|.|(y1,y3)|(y2,y3)|(y3,x3)|(y4,y3)|.|(z1,y3)|(z2,y3)|(z3,y3)|(z4,y3)| \n + |(x1,y4)|(x2,y4)|(x3,y4)|(x4,y4)|.|(y1,y4)|(y2,y4)|(y3,x4)|(y4,y4)|.|(z1,y4)|(z2,y4)|(z3,y4)|(z4,y4)| \n + . . . . . . . . . . . . . . . \n + |(x1,z1)|(x2,z1)|(x3,z1)|(x4,z1)|.|(y1,z1)|(y2,z1)|(y3,z1)|(y4,z1)|.|(z1,z1)|(z2,z1)|(z3,z1)|(z4,z1)| \n + |(x1,z2)|(x2,z2)|(x3,z2)|(x4,z2)|.|(y1,z2)|(y2,z2)|(y3,z2)|(y4,z2)|.|(z1,z2)|(z2,z2)|(z3,z2)|(z4,z2)| \n + |(x1,z3)|(x2,z3)|(x3,z3)|(x4,z3)|.|(y1,z3)|(y2,z3)|(y3,z3)|(y4,z3)|.|(z1,z3)|(z2,z3)|(z3,z3)|(z4,z3)| \n + |(x1,z4)|(x2,z4)|(x3,z4)|(x4,z4)|.|(y1,z4)|(y2,z4)|(y3,z4)|(y4,z4)|.|(z1,z4)|(z2,z4)|(z3,z4)|(z4,z4)| + \param bField magnetic field in the center of the detector in Gev/cm/c, in order to perform the p_t calculation. + + \warning see BL_Circle_fit(), BL_Line_fit() and Fast_fit() warnings. + + \bug see BL_Circle_fit(), BL_Line_fit() and Fast_fit() bugs. + + \return (phi,Tip,p_t,cot(theta)),Zip), their covariance matrix and the chi2's of the circle and line fits. + */ + + template + class helixFit { + public: + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void operator()(const TAcc& acc, + const riemannFit::Matrix3xNd* hits, + const Eigen::Matrix* hits_ge, + const double bField, + riemannFit::HelixFit* helix) const { + riemannFit::Vector4d fast_fit; + fastFit(acc, *hits, fast_fit); + + PreparedBrokenLineData data; + karimaki_circle_fit circle; + riemannFit::LineFit line; + riemannFit::Matrix3d jacobian; + + prepareBrokenLineData(acc, *hits, fast_fit, bField, data); + lineFit(acc, *hits_ge, fast_fit, bField, data, line); + circleFit(acc, *hits, *hits_ge, fast_fit, bField, data, circle); + + // the circle fit gives k, but here we want p_t, so let's change the parameter and the covariance matrix + jacobian << 1., 0, 0, 0, 1., 0, 0, 0, + -alpaka::math::abs(acc, circle.par(2)) * bField / (riemannFit::sqr(circle.par(2)) * circle.par(2)); + circle.par(2) = bField / alpaka::math::abs(acc, circle.par(2)); + circle.cov = jacobian * circle.cov * jacobian.transpose(); + + helix->par << circle.par, line.par; + helix->cov = riemannFit::MatrixXd::Zero(5, 5); + helix->cov.block(0, 0, 3, 3) = circle.cov; + helix->cov.block(3, 3, 2, 2) = line.cov; + helix->qCharge = circle.qCharge; + helix->chi2_circle = circle.chi2; + helix->chi2_line = line.chi2; + } + }; + } // namespace brokenline +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif // RecoPixelVertexing_PixelTrackFitting_interface_BrokenLine_h diff --git a/RecoTracker/PixelTrackFitting/interface/alpaka/FitResult.h b/RecoTracker/PixelTrackFitting/interface/alpaka/FitResult.h new file mode 100644 index 0000000000000..3daf271a5ca13 --- /dev/null +++ b/RecoTracker/PixelTrackFitting/interface/alpaka/FitResult.h @@ -0,0 +1,64 @@ +#ifndef RecoPixelVertexing_PixelTrackFitting_interface_FitResult_h +#define RecoPixelVertexing_PixelTrackFitting_interface_FitResult_h + +#include +#include + +#include +#include + +namespace riemannFit { + + using Vector2d = Eigen::Vector2d; + using Vector3d = Eigen::Vector3d; + using Vector4d = Eigen::Vector4d; + using Vector5d = Eigen::Matrix; + using Matrix2d = Eigen::Matrix2d; + using Matrix3d = Eigen::Matrix3d; + using Matrix4d = Eigen::Matrix4d; + using Matrix5d = Eigen::Matrix; + using Matrix6d = Eigen::Matrix; + + template + using Matrix3xNd = Eigen::Matrix; // used for inputs hits + + struct CircleFit { + Vector3d par; //!< parameter: (X0,Y0,R) + Matrix3d cov; + /*!< covariance matrix: \n + |cov(X0,X0)|cov(Y0,X0)|cov( R,X0)| \n + |cov(X0,Y0)|cov(Y0,Y0)|cov( R,Y0)| \n + |cov(X0, R)|cov(Y0, R)|cov( R, R)| + */ + int32_t qCharge; //!< particle charge + float chi2; + }; + + struct LineFit { + Vector2d par; //!<(cotan(theta),Zip) + Matrix2d cov; + /*!< + |cov(c_t,c_t)|cov(Zip,c_t)| \n + |cov(c_t,Zip)|cov(Zip,Zip)| + */ + double chi2; + }; + + struct HelixFit { + Vector5d par; //!<(phi,Tip,pt,cotan(theta)),Zip) + Matrix5d cov; + /*!< ()->cov() \n + |(phi,phi)|(Tip,phi)|(p_t,phi)|(c_t,phi)|(Zip,phi)| \n + |(phi,Tip)|(Tip,Tip)|(p_t,Tip)|(c_t,Tip)|(Zip,Tip)| \n + |(phi,p_t)|(Tip,p_t)|(p_t,p_t)|(c_t,p_t)|(Zip,p_t)| \n + |(phi,c_t)|(Tip,c_t)|(p_t,c_t)|(c_t,c_t)|(Zip,c_t)| \n + |(phi,Zip)|(Tip,Zip)|(p_t,Zip)|(c_t,Zip)|(Zip,Zip)| + */ + float chi2_circle; + float chi2_line; + // Vector4d fast_fit; + int32_t qCharge; //!< particle charge + }; // __attribute__((aligned(16))); + +} // namespace riemannFit +#endif diff --git a/RecoTracker/PixelTrackFitting/interface/alpaka/FitUtils.h b/RecoTracker/PixelTrackFitting/interface/alpaka/FitUtils.h new file mode 100644 index 0000000000000..b060a0f440ad3 --- /dev/null +++ b/RecoTracker/PixelTrackFitting/interface/alpaka/FitUtils.h @@ -0,0 +1,253 @@ +#ifndef RecoPixelVertexing_PixelTrackFitting_alpaka_FitUtils_h +#define RecoPixelVertexing_PixelTrackFitting_alpaka_FitUtils_h +#include +#include "DataFormats/Math/interface/choleskyInversion.h" +#include "FitResult.h" +namespace riemannFit { + + constexpr double epsilon = 1.e-4; //!< used in numerical derivative (J2 in Circle_fit()) + + using VectorXd = Eigen::VectorXd; + using MatrixXd = Eigen::MatrixXd; + template + using MatrixNd = Eigen::Matrix; + template + using MatrixNplusONEd = Eigen::Matrix; + template + using ArrayNd = Eigen::Array; + template + using Matrix2Nd = Eigen::Matrix; + template + using Matrix3Nd = Eigen::Matrix; + template + using Matrix2xNd = Eigen::Matrix; + template + using Array2xNd = Eigen::Array; + template + using MatrixNx3d = Eigen::Matrix; + template + using MatrixNx5d = Eigen::Matrix; + template + using VectorNd = Eigen::Matrix; + template + using VectorNplusONEd = Eigen::Matrix; + template + using Vector2Nd = Eigen::Matrix; + template + using Vector3Nd = Eigen::Matrix; + template + using RowVectorNd = Eigen::Matrix; + template + using RowVector2Nd = Eigen::Matrix; + + using Matrix2x3d = Eigen::Matrix; + + using Matrix3f = Eigen::Matrix3f; + using Vector3f = Eigen::Vector3f; + using Vector4f = Eigen::Vector4f; + using Vector6f = Eigen::Matrix; + // transformation between the "perigee" to cmssw localcoord frame + // the plane of the latter is the perigee plane... + // from //!<(phi,Tip,q/pt,cotan(theta)),Zip) + // to q/p,dx/dz,dy/dz,x,z + template + inline void transformToPerigeePlane(VI5 const& ip, MI5 const& icov, VO5& op, MO5& ocov) { + auto sinTheta2 = 1. / (1. + ip(3) * ip(3)); + auto sinTheta = std::sqrt(sinTheta2); + auto cosTheta = ip(3) * sinTheta; + + op(0) = sinTheta * ip(2); + op(1) = 0.; + op(2) = -ip(3); + op(3) = ip(1); + op(4) = -ip(4); + + Matrix5d jMat = Matrix5d::Zero(); + + jMat(0, 2) = sinTheta; + jMat(0, 3) = -sinTheta2 * cosTheta * ip(2); + jMat(1, 0) = 1.; + jMat(2, 3) = -1.; + jMat(3, 1) = 1.; + jMat(4, 4) = -1; + + ocov = jMat * icov * jMat.transpose(); + } + +} // namespace riemannFit + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace riemannFit { + using namespace ::riemannFit; + + template + ALPAKA_FN_ACC void printIt(const TAcc& acc, C* m, const char* prefix = "") { +#ifdef RFIT_DEBUG + for (uint r = 0; r < m->rows(); ++r) { + for (uint c = 0; c < m->cols(); ++c) { + printf("%s Matrix(%d,%d) = %g\n", prefix, r, c, (*m)(r, c)); + } + } +#endif + } + + /*! + \brief raise to square. + */ + template + constexpr T sqr(const T a) { + return a * a; + } + + /*! + \brief Compute cross product of two 2D vector (assuming z component 0), + returning z component of the result. + \param a first 2D vector in the product. + \param b second 2D vector in the product. + \return z component of the cross product. + */ + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE double cross2D(const TAcc& acc, const Vector2d& a, const Vector2d& b) { + return a.x() * b.y() - a.y() * b.x(); + } + + /*! + * load error in CMSSW format to our formalism + * + */ + template + ALPAKA_FN_ACC void loadCovariance2D(const TAcc& acc, M6xNf const& ge, M2Nd& hits_cov) { + // Index numerology: + // i: index of the hits/point (0,..,3) + // j: index of space component (x,y,z) + // l: index of space components (x,y,z) + // ge is always in sync with the index i and is formatted as: + // ge[] ==> [xx, xy, yy, xz, yz, zz] + // in (j,l) notation, we have: + // ge[] ==> [(0,0), (0,1), (1,1), (0,2), (1,2), (2,2)] + // so the index ge_idx corresponds to the matrix elements: + // | 0 1 3 | + // | 1 2 4 | + // | 3 4 5 | + constexpr uint32_t hits_in_fit = M6xNf::ColsAtCompileTime; + for (uint32_t i = 0; i < hits_in_fit; ++i) { + { + constexpr uint32_t ge_idx = 0, j = 0, l = 0; + hits_cov(i + j * hits_in_fit, i + l * hits_in_fit) = ge.col(i)[ge_idx]; + } + { + constexpr uint32_t ge_idx = 2, j = 1, l = 1; + hits_cov(i + j * hits_in_fit, i + l * hits_in_fit) = ge.col(i)[ge_idx]; + } + { + constexpr uint32_t ge_idx = 1, j = 1, l = 0; + hits_cov(i + l * hits_in_fit, i + j * hits_in_fit) = hits_cov(i + j * hits_in_fit, i + l * hits_in_fit) = + ge.col(i)[ge_idx]; + } + } + } + + template + ALPAKA_FN_ACC void loadCovariance(const TAcc& acc, M6xNf const& ge, M3xNd& hits_cov) { + // Index numerology: + // i: index of the hits/point (0,..,3) + // j: index of space component (x,y,z) + // l: index of space components (x,y,z) + // ge is always in sync with the index i and is formatted as: + // ge[] ==> [xx, xy, yy, xz, yz, zz] + // in (j,l) notation, we have: + // ge[] ==> [(0,0), (0,1), (1,1), (0,2), (1,2), (2,2)] + // so the index ge_idx corresponds to the matrix elements: + // | 0 1 3 | + // | 1 2 4 | + // | 3 4 5 | + constexpr uint32_t hits_in_fit = M6xNf::ColsAtCompileTime; + for (uint32_t i = 0; i < hits_in_fit; ++i) { + { + constexpr uint32_t ge_idx = 0, j = 0, l = 0; + hits_cov(i + j * hits_in_fit, i + l * hits_in_fit) = ge.col(i)[ge_idx]; + } + { + constexpr uint32_t ge_idx = 2, j = 1, l = 1; + hits_cov(i + j * hits_in_fit, i + l * hits_in_fit) = ge.col(i)[ge_idx]; + } + { + constexpr uint32_t ge_idx = 5, j = 2, l = 2; + hits_cov(i + j * hits_in_fit, i + l * hits_in_fit) = ge.col(i)[ge_idx]; + } + { + constexpr uint32_t ge_idx = 1, j = 1, l = 0; + hits_cov(i + l * hits_in_fit, i + j * hits_in_fit) = hits_cov(i + j * hits_in_fit, i + l * hits_in_fit) = + ge.col(i)[ge_idx]; + } + { + constexpr uint32_t ge_idx = 3, j = 2, l = 0; + hits_cov(i + l * hits_in_fit, i + j * hits_in_fit) = hits_cov(i + j * hits_in_fit, i + l * hits_in_fit) = + ge.col(i)[ge_idx]; + } + { + constexpr uint32_t ge_idx = 4, j = 2, l = 1; + hits_cov(i + l * hits_in_fit, i + j * hits_in_fit) = hits_cov(i + j * hits_in_fit, i + l * hits_in_fit) = + ge.col(i)[ge_idx]; + } + } + } + + /*! + \brief Transform circle parameter from (X0,Y0,R) to (phi,Tip,p_t) and + consequently covariance matrix. + \param circle_uvr parameter (X0,Y0,R), covariance matrix to + be transformed and particle charge. + \param B magnetic field in Gev/cm/c unit. + \param error flag for errors computation. + */ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void par_uvrtopak(const TAcc& acc, + CircleFit& circle, + const double B, + const bool error) { + Vector3d par_pak; + const double temp0 = circle.par.head(2).squaredNorm(); + const double temp1 = alpaka::math::sqrt(acc, temp0); + par_pak << alpaka::math::atan2(acc, circle.qCharge * circle.par(0), -circle.qCharge * circle.par(1)), + circle.qCharge * (temp1 - circle.par(2)), circle.par(2) * B; + if (error) { + const double temp2 = sqr(circle.par(0)) * 1. / temp0; + const double temp3 = 1. / temp1 * circle.qCharge; + Matrix3d j4Mat; + j4Mat << -circle.par(1) * temp2 * 1. / sqr(circle.par(0)), temp2 * 1. / circle.par(0), 0., + circle.par(0) * temp3, circle.par(1) * temp3, -circle.qCharge, 0., 0., B; + circle.cov = j4Mat * circle.cov * j4Mat.transpose(); + } + circle.par = par_pak; + } + + /*! + \brief Transform circle parameter from (X0,Y0,R) to (phi,Tip,q/R) and + consequently covariance matrix. + \param circle_uvr parameter (X0,Y0,R), covariance matrix to + be transformed and particle charge. + */ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void fromCircleToPerigee(const TAcc& acc, CircleFit& circle) { + Vector3d par_pak; + const double temp0 = circle.par.head(2).squaredNorm(); + const double temp1 = alpaka::math::sqrt(acc, temp0); + par_pak << alpaka::math::atan2(acc, circle.qCharge * circle.par(0), -circle.qCharge * circle.par(1)), + circle.qCharge * (temp1 - circle.par(2)), circle.qCharge / circle.par(2); + + const double temp2 = sqr(circle.par(0)) * 1. / temp0; + const double temp3 = 1. / temp1 * circle.qCharge; + Matrix3d j4Mat; + j4Mat << -circle.par(1) * temp2 * 1. / sqr(circle.par(0)), temp2 * 1. / circle.par(0), 0., circle.par(0) * temp3, + circle.par(1) * temp3, -circle.qCharge, 0., 0., -circle.qCharge / (circle.par(2) * circle.par(2)); + circle.cov = j4Mat * circle.cov * j4Mat.transpose(); + + circle.par = par_pak; + } + + } // namespace riemannFit + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif // RecoPixelVertexing_PixelTrackFitting_interface_FitUtils_h diff --git a/RecoTracker/PixelTrackFitting/interface/alpaka/PixelNtupletsFitter.h b/RecoTracker/PixelTrackFitting/interface/alpaka/PixelNtupletsFitter.h new file mode 100644 index 0000000000000..d21a43d3fe15c --- /dev/null +++ b/RecoTracker/PixelTrackFitting/interface/alpaka/PixelNtupletsFitter.h @@ -0,0 +1,30 @@ +#ifndef RecoPixelVertexing_PixelTrackFitting_interface_PixelNtupletsFitter_h +#define RecoPixelVertexing_PixelTrackFitting_interface_PixelNtupletsFitter_h + +#include +#include +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" +#include "DataFormats/TrackReco/interface/Track.h" +#include "DataFormats/TrackingRecHit/interface/TrackingRecHit.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "RecoTracker/PixelTrackFitting/interface/PixelFitterBase.h" +#include "RecoTracker/TkTrackingRegions/interface/TrackingRegion.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + class PixelNtupletsFitter final : public PixelFitterBase { + public: + explicit PixelNtupletsFitter(Queue& queue, float nominalB, const MagneticField* field, bool useRiemannFit); + ~PixelNtupletsFitter() override = default; + std::unique_ptr run(const std::vector& hits, + const TrackingRegion& region) const override; + + private: + Queue& queue_; + float nominalB_; + const MagneticField* field_; + bool useRiemannFit_; + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif // RecoPixelVertexing_PixelTrackFitting_interface_PixelNtupletsFitter_h diff --git a/RecoTracker/PixelTrackFitting/interface/alpaka/RiemannFit.h b/RecoTracker/PixelTrackFitting/interface/alpaka/RiemannFit.h new file mode 100644 index 0000000000000..97be053af467f --- /dev/null +++ b/RecoTracker/PixelTrackFitting/interface/alpaka/RiemannFit.h @@ -0,0 +1,1023 @@ +#ifndef RecoPixelVertexing_PixelTrackFitting_interface_RiemannFit_h +#define RecoPixelVertexing_PixelTrackFitting_interface_RiemannFit_h +#include +#include "RecoTracker/PixelTrackFitting/interface/alpaka/FitUtils.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + using namespace cms::alpakatools; + namespace riemannFit { + using namespace ::riemannFit; + /*! Compute the Radiation length in the uniform hypothesis + * + * The Pixel detector, barrel and forward, is considered as an homogeneous + * cylinder of material, whose radiation lengths has been derived from the TDR + * plot that shows that 16cm correspond to 0.06 radiation lengths. Therefore + * one radiation length corresponds to 16cm/0.06 =~ 267 cm. All radiation + * lengths are computed using this unique number, in both regions, barrel and + * endcap. + * + * NB: no angle corrections nor projections are computed inside this routine. + * It is therefore the responsibility of the caller to supply the proper + * lengths in input. These lengths are the path traveled by the particle along + * its trajectory, namely the so called S of the helix in 3D space. + * + * \param length_values vector of incremental distances that will be translated + * into radiation length equivalent. Each radiation length i is computed + * incrementally with respect to the previous length i-1. The first length has + * no reference point (i.e. it has the dca). + * + * \return incremental radiation lengths that correspond to each segment. + */ + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeRadLenUniformMaterial(const TAcc& acc, + const VNd1& length_values, + VNd2& rad_lengths) { + // Radiation length of the pixel detector in the uniform assumption, with + // 0.06 rad_len at 16 cm + constexpr double xx_0_inv = 0.06 / 16.; + uint n = length_values.rows(); + rad_lengths(0) = length_values(0) * xx_0_inv; + for (uint j = 1; j < n; ++j) { + rad_lengths(j) = alpaka::math::abs(acc, length_values(j) - length_values(j - 1)) * xx_0_inv; + } + } + + /*! + \brief Compute the covariance matrix along cartesian S-Z of points due to + multiple Coulomb scattering to be used in the line_fit, for the barrel + and forward cases. + The input covariance matrix is in the variables s-z, original and + unrotated. + The multiple scattering component is computed in the usual linear + approximation, using the 3D path which is computed as the squared root of + the squared sum of the s and z components passed in. + Internally a rotation by theta is performed and the covariance matrix + returned is the one in the direction orthogonal to the rotated S3D axis, + i.e. along the rotated Z axis. + The choice of the rotation is not arbitrary, but derived from the fact that + putting the horizontal axis along the S3D direction allows the usage of the + ordinary least squared fitting techiques with the trivial parametrization y + = mx + q, avoiding the patological case with m = +/- inf, that would + correspond to the case at eta = 0. + */ + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE auto scatterCovLine(const TAcc& acc, + Matrix2d const* cov_sz, + const V4& fast_fit, + VNd1 const& s_arcs, + VNd2 const& z_values, + const double theta, + const double bField, + MatrixNd& ret) { +#ifdef RFIT_DEBUG + riemannFit::printIt(&s_arcs, "Scatter_cov_line - s_arcs: "); +#endif + constexpr uint n = N; + double p_t = alpaka::math::min(acc, 20., fast_fit(2) * bField); // limit pt to avoid too small error!!! + double p_2 = p_t * p_t * (1. + 1. / sqr(fast_fit(3))); + VectorNd rad_lengths_S; + // See documentation at http://eigen.tuxfamily.org/dox/group__TutorialArrayClass.html + // Basically, to perform cwise operations on Matrices and Vectors, you need + // to transform them into Array-like objects. + VectorNd s_values = s_arcs.array() * s_arcs.array() + z_values.array() * z_values.array(); + s_values = s_values.array().sqrt(); + computeRadLenUniformMaterial(acc, s_values, rad_lengths_S); + VectorNd sig2_S; + sig2_S = .000225 / p_2 * (1. + 0.038 * rad_lengths_S.array().log()).abs2() * rad_lengths_S.array(); +#ifdef RFIT_DEBUG + riemannFit::printIt(cov_sz, "Scatter_cov_line - cov_sz: "); +#endif + Matrix2Nd tmp = Matrix2Nd::Zero(); + for (uint k = 0; k < n; ++k) { + tmp(k, k) = cov_sz[k](0, 0); + tmp(k + n, k + n) = cov_sz[k](1, 1); + tmp(k, k + n) = tmp(k + n, k) = cov_sz[k](0, 1); + } + for (uint k = 0; k < n; ++k) { + for (uint l = k; l < n; ++l) { + for (uint i = 0; i < uint(alpaka::math::min(acc, k, l)); ++i) { + tmp(k + n, l + n) += alpaka::math::abs(acc, s_values(k) - s_values(i)) * + alpaka::math::abs(acc, s_values(l) - s_values(i)) * sig2_S(i); + } + tmp(l + n, k + n) = tmp(k + n, l + n); + } + } + // We are interested only in the errors orthogonal to the rotated s-axis + // which, in our formalism, are in the lower square matrix. +#ifdef RFIT_DEBUG + riemannFit::printIt(&tmp, "Scatter_cov_line - tmp: "); +#endif + ret = tmp.block(n, n, n, n); + } + + /*! + \brief Compute the covariance matrix (in radial coordinates) of points in + the transverse plane due to multiple Coulomb scattering. + \param p2D 2D points in the transverse plane. + \param fast_fit fast_fit Vector4d result of the previous pre-fit + structured in this form:(X0, Y0, R, Tan(Theta))). + \param B magnetic field use to compute p + \return scatter_cov_rad errors due to multiple scattering. + \warning input points must be ordered radially from the detector center + (from inner layer to outer ones; points on the same layer must ordered too). + \details Only the tangential component is computed (the radial one is + negligible). + */ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE MatrixNd scatter_cov_rad( + const TAcc& acc, const M2xN& p2D, const V4& fast_fit, VectorNd const& rad, double B) { + constexpr uint n = N; + double p_t = alpaka::math::min(acc, 20., fast_fit(2) * B); // limit pt to avoid too small error!!! + double p_2 = p_t * p_t * (1. + 1. / sqr(fast_fit(3))); + double theta = atan(fast_fit(3)); + theta = theta < 0. ? theta + M_PI : theta; + VectorNd s_values; + VectorNd rad_lengths; + const Vector2d oVec(fast_fit(0), fast_fit(1)); + + // associated Jacobian, used in weights and errors computation + for (uint i = 0; i < n; ++i) { // x + Vector2d pVec = p2D.block(0, i, 2, 1) - oVec; + const double cross = cross2D(acc, -oVec, pVec); + const double dot = (-oVec).dot(pVec); + const double tempAtan2 = atan2(cross, dot); + s_values(i) = alpaka::math::abs(acc, tempAtan2 * fast_fit(2)); + } + computeRadLenUniformMaterial(acc, s_values * sqrt(1. + 1. / sqr(fast_fit(3))), rad_lengths); + MatrixNd scatter_cov_rad = MatrixNd::Zero(); + VectorNd sig2 = (1. + 0.038 * rad_lengths.array().log()).abs2() * rad_lengths.array(); + sig2 *= 0.000225 / (p_2 * sqr(sin(theta))); + for (uint k = 0; k < n; ++k) { + for (uint l = k; l < n; ++l) { + for (uint i = 0; i < uint(alpaka::math::min(acc, k, l)); ++i) { + scatter_cov_rad(k, l) += (rad(k) - rad(i)) * (rad(l) - rad(i)) * sig2(i); + } + scatter_cov_rad(l, k) = scatter_cov_rad(k, l); + } + } +#ifdef RFIT_DEBUG + riemannFit::printIt(&scatter_cov_rad, "Scatter_cov_rad - scatter_cov_rad: "); +#endif + return scatter_cov_rad; + } + + /*! + \brief Transform covariance matrix from radial (only tangential component) + to Cartesian coordinates (only transverse plane component). + \param p2D 2D points in the transverse plane. + \param cov_rad covariance matrix in radial coordinate. + \return cov_cart covariance matrix in Cartesian coordinates. +*/ + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE Matrix2Nd cov_radtocart(const TAcc& acc, + const M2xN& p2D, + const MatrixNd& cov_rad, + const VectorNd& rad) { +#ifdef RFIT_DEBUG + printf("Address of p2D: %p\n", &p2D); +#endif + printIt(&p2D, "cov_radtocart - p2D:"); + constexpr uint n = N; + Matrix2Nd cov_cart = Matrix2Nd::Zero(); + VectorNd rad_inv = rad.cwiseInverse(); + printIt(&rad_inv, "cov_radtocart - rad_inv:"); + for (uint i = 0; i < n; ++i) { + for (uint j = i; j < n; ++j) { + cov_cart(i, j) = cov_rad(i, j) * p2D(1, i) * rad_inv(i) * p2D(1, j) * rad_inv(j); + cov_cart(i + n, j + n) = cov_rad(i, j) * p2D(0, i) * rad_inv(i) * p2D(0, j) * rad_inv(j); + cov_cart(i, j + n) = -cov_rad(i, j) * p2D(1, i) * rad_inv(i) * p2D(0, j) * rad_inv(j); + cov_cart(i + n, j) = -cov_rad(i, j) * p2D(0, i) * rad_inv(i) * p2D(1, j) * rad_inv(j); + cov_cart(j, i) = cov_cart(i, j); + cov_cart(j + n, i + n) = cov_cart(i + n, j + n); + cov_cart(j + n, i) = cov_cart(i, j + n); + cov_cart(j, i + n) = cov_cart(i + n, j); + } + } + return cov_cart; + } + + /*! + \brief Transform covariance matrix from Cartesian coordinates (only + transverse plane component) to radial coordinates (both radial and + tangential component but only diagonal terms, correlation between different + point are not managed). + \param p2D 2D points in transverse plane. + \param cov_cart covariance matrix in Cartesian coordinates. + \return cov_rad covariance matrix in raidal coordinate. + \warning correlation between different point are not computed. +*/ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE VectorNd cov_carttorad(const TAcc& acc, + const M2xN& p2D, + const Matrix2Nd& cov_cart, + const VectorNd& rad) { + constexpr uint n = N; + VectorNd cov_rad; + const VectorNd rad_inv2 = rad.cwiseInverse().array().square(); + for (uint i = 0; i < n; ++i) { + //!< in case you have (0,0) to avoid dividing by 0 radius + if (rad(i) < 1.e-4) + cov_rad(i) = cov_cart(i, i); + else { + cov_rad(i) = rad_inv2(i) * (cov_cart(i, i) * sqr(p2D(1, i)) + cov_cart(i + n, i + n) * sqr(p2D(0, i)) - + 2. * cov_cart(i, i + n) * p2D(0, i) * p2D(1, i)); + } + } + return cov_rad; + } + + /*! + \brief Transform covariance matrix from Cartesian coordinates (only + transverse plane component) to coordinates system orthogonal to the + pre-fitted circle in each point. + Further information in attached documentation. + \param p2D 2D points in transverse plane. + \param cov_cart covariance matrix in Cartesian coordinates. + \param fast_fit fast_fit Vector4d result of the previous pre-fit + structured in this form:(X0, Y0, R, tan(theta))). + \return cov_rad covariance matrix in the pre-fitted circle's + orthogonal system. +*/ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE VectorNd cov_carttorad_prefit( + const TAcc& acc, const M2xN& p2D, const Matrix2Nd& cov_cart, V4& fast_fit, const VectorNd& rad) { + constexpr uint n = N; + VectorNd cov_rad; + for (uint i = 0; i < n; ++i) { + //!< in case you have (0,0) to avoid dividing by 0 radius + if (rad(i) < 1.e-4) + cov_rad(i) = cov_cart(i, i); // TO FIX + else { + Vector2d a = p2D.col(i); + Vector2d b = p2D.col(i) - fast_fit.head(2); + const double x2 = a.dot(b); + const double y2 = cross2D(acc, a, b); + const double tan_c = -y2 / x2; + const double tan_c2 = sqr(tan_c); + cov_rad(i) = + 1. / (1. + tan_c2) * (cov_cart(i, i) + cov_cart(i + n, i + n) * tan_c2 + 2 * cov_cart(i, i + n) * tan_c); + } + } + return cov_rad; + } + + /*! + \brief Compute the points' weights' vector for the circle fit when multiple + scattering is managed. + Further information in attached documentation. + \param cov_rad_inv covariance matrix inverse in radial coordinated + (or, beter, pre-fitted circle's orthogonal system). + \return weight VectorNd points' weights' vector. + \bug I'm not sure this is the right way to compute the weights for non + diagonal cov matrix. Further investigation needed. +*/ + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE VectorNd weightCircle(const TAcc& acc, const MatrixNd& cov_rad_inv) { + return cov_rad_inv.colwise().sum().transpose(); + } + + /*! + \brief Find particle q considering the sign of cross product between + particles velocity (estimated by the first 2 hits) and the vector radius + between the first hit and the center of the fitted circle. + \param p2D 2D points in transverse plane. + \param par_uvr result of the circle fit in this form: (X0,Y0,R). + \return q int 1 or -1. +*/ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE int32_t charge(const TAcc& acc, const M2xN& p2D, const Vector3d& par_uvr) { + return ((p2D(0, 1) - p2D(0, 0)) * (par_uvr.y() - p2D(1, 0)) - + (p2D(1, 1) - p2D(1, 0)) * (par_uvr.x() - p2D(0, 0)) > + 0) + ? -1 + : 1; + } + + /*! + \brief Compute the eigenvector associated to the minimum eigenvalue. + \param A the Matrix you want to know eigenvector and eigenvalue. + \param chi2 the double were the chi2-related quantity will be stored. + \return the eigenvector associated to the minimum eigenvalue. + \warning double precision is needed for a correct assessment of chi2. + \details The minimus eigenvalue is related to chi2. + We exploit the fact that the matrix is symmetrical and small (2x2 for line + fit and 3x3 for circle fit), so the SelfAdjointEigenSolver from Eigen + library is used, with the computedDirect method (available only for 2x2 + and 3x3 Matrix) wich computes eigendecomposition of given matrix using a + fast closed-form algorithm. + For this optimization the matrix type must be known at compiling time. +*/ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE Vector3d min_eigen3D(const TAcc& acc, const Matrix3d& A, double& chi2) { +#ifdef RFIT_DEBUG + printf("min_eigen3D - enter\n"); +#endif + Eigen::SelfAdjointEigenSolver solver(3); + solver.computeDirect(A); + int min_index; + chi2 = solver.eigenvalues().minCoeff(&min_index); +#ifdef RFIT_DEBUG + printf("min_eigen3D - exit\n"); +#endif + return solver.eigenvectors().col(min_index); + } + + /*! + \brief A faster version of min_eigen3D() where double precision is not + needed. + \param A the Matrix you want to know eigenvector and eigenvalue. + \param chi2 the double were the chi2-related quantity will be stored + \return the eigenvector associated to the minimum eigenvalue. + \detail The computedDirect() method of SelfAdjointEigenSolver for 3x3 Matrix + indeed, use trigonometry function (it solves a third degree equation) which + speed up in single precision. +*/ + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE Vector3d min_eigen3D_fast(const TAcc& acc, const Matrix3d& A) { + Eigen::SelfAdjointEigenSolver solver(3); + solver.computeDirect(A.cast()); + int min_index; + solver.eigenvalues().minCoeff(&min_index); + return solver.eigenvectors().col(min_index).cast(); + } + + /*! + \brief 2D version of min_eigen3D(). + \param aMat the Matrix you want to know eigenvector and eigenvalue. + \param chi2 the double were the chi2-related quantity will be stored + \return the eigenvector associated to the minimum eigenvalue. + \detail The computedDirect() method of SelfAdjointEigenSolver for 2x2 Matrix + do not use special math function (just sqrt) therefore it doesn't speed up + significantly in single precision. +*/ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE Vector2d min_eigen2D(const TAcc& acc, const Matrix2d& aMat, double& chi2) { + Eigen::SelfAdjointEigenSolver solver(2); + solver.computeDirect(aMat); + int min_index; + chi2 = solver.eigenvalues().minCoeff(&min_index); + return solver.eigenvectors().col(min_index); + } + + /*! + \brief A very fast helix fit: it fits a circle by three points (first, middle + and last point) and a line by two points (first and last). + \param hits points to be fitted + \return result in this form: (X0,Y0,R,tan(theta)). + \warning points must be passed ordered (from internal layer to external) in + order to maximize accuracy and do not mistake tan(theta) sign. + \details This fast fit is used as pre-fit which is needed for: + - weights estimation and chi2 computation in line fit (fundamental); + - weights estimation and chi2 computation in circle fit (useful); + - computation of error due to multiple scattering. +*/ + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void fastFit(const TAcc& acc, const M3xN& hits, V4& result) { + constexpr uint32_t N = M3xN::ColsAtCompileTime; + constexpr auto n = N; // get the number of hits + printIt(&hits, "Fast_fit - hits: "); + + // CIRCLE FIT + // Make segments between middle-to-first(b) and last-to-first(c) hits + const Vector2d bVec = hits.block(0, n / 2, 2, 1) - hits.block(0, 0, 2, 1); + const Vector2d cVec = hits.block(0, n - 1, 2, 1) - hits.block(0, 0, 2, 1); + printIt(&bVec, "Fast_fit - b: "); + printIt(&cVec, "Fast_fit - c: "); + // Compute their lengths + auto b2 = bVec.squaredNorm(); + auto c2 = cVec.squaredNorm(); + // The algebra has been verified (MR). The usual approach has been followed: + // * use an orthogonal reference frame passing from the first point. + // * build the segments (chords) + // * build orthogonal lines through mid points + // * make a system and solve for X0 and Y0. + // * add the initial point + bool flip = abs(bVec.x()) < abs(bVec.y()); + auto bx = flip ? bVec.y() : bVec.x(); + auto by = flip ? bVec.x() : bVec.y(); + auto cx = flip ? cVec.y() : cVec.x(); + auto cy = flip ? cVec.x() : cVec.y(); + //!< in case b.x is 0 (2 hits with same x) + auto div = 2. * (cx * by - bx * cy); + // if aligned TO FIX + auto y0 = (cx * b2 - bx * c2) / div; + auto x0 = (0.5 * b2 - y0 * by) / bx; + result(0) = hits(0, 0) + (flip ? y0 : x0); + result(1) = hits(1, 0) + (flip ? x0 : y0); + result(2) = sqrt(sqr(x0) + sqr(y0)); + printIt(&result, "Fast_fit - result: "); + + // LINE FIT + const Vector2d dVec = hits.block(0, 0, 2, 1) - result.head(2); + const Vector2d eVec = hits.block(0, n - 1, 2, 1) - result.head(2); + printIt(&eVec, "Fast_fit - e: "); + printIt(&dVec, "Fast_fit - d: "); + // Compute the arc-length between first and last point: L = R * theta = R * atan (tan (Theta) ) + auto dr = result(2) * atan2(cross2D(acc, dVec, eVec), dVec.dot(eVec)); + // Simple difference in Z between last and first hit + auto dz = hits(2, n - 1) - hits(2, 0); + + result(3) = (dr / dz); + +#ifdef RFIT_DEBUG + printf("Fast_fit: [%f, %f, %f, %f]\n", result(0), result(1), result(2), result(3)); +#endif + } + + /*! + \brief Fit a generic number of 2D points with a circle using Riemann-Chernov + algorithm. Covariance matrix of fitted parameter is optionally computed. + Multiple scattering (currently only in barrel layer) is optionally handled. + \param hits2D 2D points to be fitted. + \param hits_cov2D covariance matrix of 2D points. + \param fast_fit pre-fit result in this form: (X0,Y0,R,tan(theta)). + (tan(theta) is not used). + \param bField magnetic field + \param error flag for error computation. + \param scattering flag for multiple scattering + \return circle circle_fit: + -par parameter of the fitted circle in this form (X0,Y0,R); \n + -cov covariance matrix of the fitted parameter (not initialized if + error = false); \n + -q charge of the particle; \n + -chi2. + \warning hits must be passed ordered from inner to outer layer (double hits + on the same layer must be ordered too) so that multiple scattering is + treated properly. + \warning Multiple scattering for barrel is still not tested. + \warning Multiple scattering for endcap hits is not handled (yet). Do not + fit endcap hits with scattering = true ! + \bug for small pt (<0.3 Gev/c) chi2 could be slightly underestimated. + \bug further investigation needed for error propagation with multiple + scattering. +*/ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE CircleFit circleFit(const TAcc& acc, + const M2xN& hits2D, + const Matrix2Nd& hits_cov2D, + const V4& fast_fit, + const VectorNd& rad, + const double bField, + const bool error) { +#ifdef RFIT_DEBUG + printf("circle_fit - enter\n"); +#endif + // INITIALIZATION + Matrix2Nd vMat = hits_cov2D; + constexpr uint n = N; + printIt(&hits2D, "circle_fit - hits2D:"); + printIt(&hits_cov2D, "circle_fit - hits_cov2D:"); + +#ifdef RFIT_DEBUG + printf("circle_fit - WEIGHT COMPUTATION\n"); +#endif + // WEIGHT COMPUTATION + VectorNd weight; + MatrixNd gMat; + double renorm; + { + MatrixNd cov_rad = cov_carttorad_prefit(acc, hits2D, vMat, fast_fit, rad).asDiagonal(); + MatrixNd scatterCovRadMat = scatter_cov_rad(acc, hits2D, fast_fit, rad, bField); + printIt(&scatterCovRadMat, "circle_fit - scatter_cov_rad:"); + printIt(&hits2D, "circle_fit - hits2D bis:"); +#ifdef RFIT_DEBUG + printf("Address of hits2D: a) %p\n", &hits2D); +#endif + vMat += cov_radtocart(acc, hits2D, scatterCovRadMat, rad); + printIt(&vMat, "circle_fit - V:"); + cov_rad += scatterCovRadMat; + printIt(&cov_rad, "circle_fit - cov_rad:"); + math::cholesky::invert(cov_rad, gMat); + // gMat = cov_rad.inverse(); + renorm = gMat.sum(); + gMat *= 1. / renorm; + weight = weightCircle(acc, gMat); + } + printIt(&weight, "circle_fit - weight:"); + + // SPACE TRANSFORMATION +#ifdef RFIT_DEBUG + printf("circle_fit - SPACE TRANSFORMATION\n"); +#endif + + // center +#ifdef RFIT_DEBUG + printf("Address of hits2D: b) %p\n", &hits2D); +#endif + const Vector2d hCentroid = hits2D.rowwise().mean(); // centroid + printIt(&hCentroid, "circle_fit - h_:"); + Matrix3xNd p3D; + p3D.block(0, 0, 2, n) = hits2D.colwise() - hCentroid; + printIt(&p3D, "circle_fit - p3D: a)"); + Vector2Nd mc; // centered hits, used in error computation + mc << p3D.row(0).transpose(), p3D.row(1).transpose(); + printIt(&mc, "circle_fit - mc(centered hits):"); + + // scale + const double tempQ = mc.squaredNorm(); + const double tempS = sqrt(n * 1. / tempQ); // scaling factor + p3D.block(0, 0, 2, n) *= tempS; + + // project on paraboloid + p3D.row(2) = p3D.block(0, 0, 2, n).colwise().squaredNorm(); + printIt(&p3D, "circle_fit - p3D: b)"); + +#ifdef RFIT_DEBUG + printf("circle_fit - COST FUNCTION\n"); +#endif + // COST FUNCTION + + // compute + Vector3d r0; + r0.noalias() = p3D * weight; // center of gravity + const Matrix3xNd xMat = p3D.colwise() - r0; + Matrix3d aMat = xMat * gMat * xMat.transpose(); + printIt(&aMat, "circle_fit - A:"); + +#ifdef RFIT_DEBUG + printf("circle_fit - MINIMIZE\n"); +#endif + // minimize + double chi2; + Vector3d vVec = min_eigen3D(acc, aMat, chi2); +#ifdef RFIT_DEBUG + printf("circle_fit - AFTER MIN_EIGEN\n"); +#endif + printIt(&vVec, "v BEFORE INVERSION"); + vVec *= (vVec(2) > 0) ? 1 : -1; // TO FIX dovrebbe essere N(3)>0 + printIt(&vVec, "v AFTER INVERSION"); + // This hack to be able to run on GPU where the automatic assignment to a + // double from the vector multiplication is not working. +#ifdef RFIT_DEBUG + printf("circle_fit - AFTER MIN_EIGEN 1\n"); +#endif + Eigen::Matrix cm; +#ifdef RFIT_DEBUG + printf("circle_fit - AFTER MIN_EIGEN 2\n"); +#endif + cm = -vVec.transpose() * r0; +#ifdef RFIT_DEBUG + printf("circle_fit - AFTER MIN_EIGEN 3\n"); +#endif + const double tempC = cm(0, 0); + +#ifdef RFIT_DEBUG + printf("circle_fit - COMPUTE CIRCLE PARAMETER\n"); +#endif + // COMPUTE CIRCLE PARAMETER + + // auxiliary quantities + const double tempH = sqrt(1. - sqr(vVec(2)) - 4. * tempC * vVec(2)); + const double v2x2_inv = 1. / (2. * vVec(2)); + const double s_inv = 1. / tempS; + Vector3d par_uvr; // used in error propagation + par_uvr << -vVec(0) * v2x2_inv, -vVec(1) * v2x2_inv, tempH * v2x2_inv; + + CircleFit circle; + circle.par << par_uvr(0) * s_inv + hCentroid(0), par_uvr(1) * s_inv + hCentroid(1), par_uvr(2) * s_inv; + circle.qCharge = charge(acc, hits2D, circle.par); + circle.chi2 = abs(chi2) * renorm / sqr(2 * vVec(2) * par_uvr(2) * tempS); + printIt(&circle.par, "circle_fit - CIRCLE PARAMETERS:"); + printIt(&circle.cov, "circle_fit - CIRCLE COVARIANCE:"); +#ifdef RFIT_DEBUG + printf("circle_fit - CIRCLE CHARGE: %d\n", circle.qCharge); +#endif + +#ifdef RFIT_DEBUG + printf("circle_fit - ERROR PROPAGATION\n"); +#endif + // ERROR PROPAGATION + if (error) { +#ifdef RFIT_DEBUG + printf("circle_fit - ERROR PRPAGATION ACTIVATED\n"); +#endif + ArrayNd vcsMat[2][2]; // cov matrix of center & scaled points + MatrixNd cMat[3][3]; // cov matrix of 3D transformed points +#ifdef RFIT_DEBUG + printf("circle_fit - ERROR PRPAGATION ACTIVATED 2\n"); +#endif + { + Eigen::Matrix cm; + Eigen::Matrix cm2; + cm = mc.transpose() * vMat * mc; + const double tempC2 = cm(0, 0); + Matrix2Nd tempVcsMat; + tempVcsMat.template triangularView() = + (sqr(tempS) * vMat + sqr(sqr(tempS)) * 1. / (4. * tempQ * n) * + (2. * vMat.squaredNorm() + 4. * tempC2) * // mc.transpose() * V * mc) * + (mc * mc.transpose())); + + printIt(&tempVcsMat, "circle_fit - Vcs:"); + cMat[0][0] = tempVcsMat.block(0, 0, n, n).template selfadjointView(); + vcsMat[0][1] = tempVcsMat.block(0, n, n, n); + cMat[1][1] = tempVcsMat.block(n, n, n, n).template selfadjointView(); + vcsMat[1][0] = vcsMat[0][1].transpose(); + printIt(&tempVcsMat, "circle_fit - Vcs:"); + } + + { + const ArrayNd t0 = (VectorXd::Constant(n, 1.) * p3D.row(0)); + const ArrayNd t1 = (VectorXd::Constant(n, 1.) * p3D.row(1)); + const ArrayNd t00 = p3D.row(0).transpose() * p3D.row(0); + const ArrayNd t01 = p3D.row(0).transpose() * p3D.row(1); + const ArrayNd t11 = p3D.row(1).transpose() * p3D.row(1); + const ArrayNd t10 = t01.transpose(); + vcsMat[0][0] = cMat[0][0]; + cMat[0][1] = vcsMat[0][1]; + cMat[0][2] = 2. * (vcsMat[0][0] * t0 + vcsMat[0][1] * t1); + vcsMat[1][1] = cMat[1][1]; + cMat[1][2] = 2. * (vcsMat[1][0] * t0 + vcsMat[1][1] * t1); + MatrixNd tmp; + tmp.template triangularView() = + (2. * (vcsMat[0][0] * vcsMat[0][0] + vcsMat[0][0] * vcsMat[0][1] + vcsMat[1][1] * vcsMat[1][0] + + vcsMat[1][1] * vcsMat[1][1]) + + 4. * (vcsMat[0][0] * t00 + vcsMat[0][1] * t01 + vcsMat[1][0] * t10 + vcsMat[1][1] * t11)) + .matrix(); + cMat[2][2] = tmp.template selfadjointView(); + } + printIt(&cMat[0][0], "circle_fit - C[0][0]:"); + + Matrix3d c0Mat; // cov matrix of center of gravity (r0.x,r0.y,r0.z) + for (uint i = 0; i < 3; ++i) { + for (uint j = i; j < 3; ++j) { + Eigen::Matrix tmp; + tmp = weight.transpose() * cMat[i][j] * weight; + // Workaround to get things working in GPU + const double tempC = tmp(0, 0); + c0Mat(i, j) = tempC; //weight.transpose() * C[i][j] * weight; + c0Mat(j, i) = c0Mat(i, j); + } + } + printIt(&c0Mat, "circle_fit - C0:"); + + const MatrixNd wMat = weight * weight.transpose(); + const MatrixNd hMat = MatrixNd::Identity().rowwise() - weight.transpose(); + const MatrixNx3d s_v = hMat * p3D.transpose(); + printIt(&wMat, "circle_fit - W:"); + printIt(&hMat, "circle_fit - H:"); + printIt(&s_v, "circle_fit - s_v:"); + + MatrixNd dMat[3][3]; // cov(s_v) + dMat[0][0] = (hMat * cMat[0][0] * hMat.transpose()).cwiseProduct(wMat); + dMat[0][1] = (hMat * cMat[0][1] * hMat.transpose()).cwiseProduct(wMat); + dMat[0][2] = (hMat * cMat[0][2] * hMat.transpose()).cwiseProduct(wMat); + dMat[1][1] = (hMat * cMat[1][1] * hMat.transpose()).cwiseProduct(wMat); + dMat[1][2] = (hMat * cMat[1][2] * hMat.transpose()).cwiseProduct(wMat); + dMat[2][2] = (hMat * cMat[2][2] * hMat.transpose()).cwiseProduct(wMat); + dMat[1][0] = dMat[0][1].transpose(); + dMat[2][0] = dMat[0][2].transpose(); + dMat[2][1] = dMat[1][2].transpose(); + printIt(&dMat[0][0], "circle_fit - D_[0][0]:"); + + constexpr uint nu[6][2] = {{0, 0}, {0, 1}, {0, 2}, {1, 1}, {1, 2}, {2, 2}}; + + Matrix6d eMat; // cov matrix of the 6 independent elements of A + for (uint a = 0; a < 6; ++a) { + const uint i = nu[a][0], j = nu[a][1]; + for (uint b = a; b < 6; ++b) { + const uint k = nu[b][0], l = nu[b][1]; + VectorNd t0(n); + VectorNd t1(n); + if (l == k) { + t0 = 2. * dMat[j][l] * s_v.col(l); + if (i == j) + t1 = t0; + else + t1 = 2. * dMat[i][l] * s_v.col(l); + } else { + t0 = dMat[j][l] * s_v.col(k) + dMat[j][k] * s_v.col(l); + if (i == j) + t1 = t0; + else + t1 = dMat[i][l] * s_v.col(k) + dMat[i][k] * s_v.col(l); + } + + if (i == j) { + Eigen::Matrix cm; + cm = s_v.col(i).transpose() * (t0 + t1); + // Workaround to get things working in GPU + const double tempC = cm(0, 0); + eMat(a, b) = 0. + tempC; + } else { + Eigen::Matrix cm; + cm = (s_v.col(i).transpose() * t0) + (s_v.col(j).transpose() * t1); + // Workaround to get things working in GPU + const double tempC = cm(0, 0); + eMat(a, b) = 0. + tempC; //(s_v.col(i).transpose() * t0) + (s_v.col(j).transpose() * t1); + } + if (b != a) + eMat(b, a) = eMat(a, b); + } + } + printIt(&eMat, "circle_fit - E:"); + + Eigen::Matrix j2Mat; // Jacobian of min_eigen() (numerically computed) + for (uint a = 0; a < 6; ++a) { + const uint i = nu[a][0], j = nu[a][1]; + Matrix3d delta = Matrix3d::Zero(); + delta(i, j) = delta(j, i) = abs(aMat(i, j) * epsilon); + j2Mat.col(a) = min_eigen3D_fast(acc, aMat + delta); + const int sign = (j2Mat.col(a)(2) > 0) ? 1 : -1; + j2Mat.col(a) = (j2Mat.col(a) * sign - vVec) / delta(i, j); + } + printIt(&j2Mat, "circle_fit - J2:"); + + Matrix4d cvcMat; // joint cov matrix of (v0,v1,v2,c) + { + Matrix3d t0 = j2Mat * eMat * j2Mat.transpose(); + Vector3d t1 = -t0 * r0; + cvcMat.block(0, 0, 3, 3) = t0; + cvcMat.block(0, 3, 3, 1) = t1; + cvcMat.block(3, 0, 1, 3) = t1.transpose(); + Eigen::Matrix cm1; + Eigen::Matrix cm3; + cm1 = (vVec.transpose() * c0Mat * vVec); + // cm2 = (c0Mat.cwiseProduct(t0)).sum(); + cm3 = (r0.transpose() * t0 * r0); + // Workaround to get things working in GPU + const double tempC = cm1(0, 0) + (c0Mat.cwiseProduct(t0)).sum() + cm3(0, 0); + cvcMat(3, 3) = tempC; + // (v.transpose() * c0Mat * v) + (c0Mat.cwiseProduct(t0)).sum() + (r0.transpose() * t0 * r0); + } + printIt(&cvcMat, "circle_fit - Cvc:"); + + Eigen::Matrix j3Mat; // Jacobian (v0,v1,v2,c)->(X0,Y0,R) + { + const double t = 1. / tempH; + j3Mat << -v2x2_inv, 0, vVec(0) * sqr(v2x2_inv) * 2., 0, 0, -v2x2_inv, vVec(1) * sqr(v2x2_inv) * 2., 0, + vVec(0) * v2x2_inv * t, vVec(1) * v2x2_inv * t, + -tempH * sqr(v2x2_inv) * 2. - (2. * tempC + vVec(2)) * v2x2_inv * t, -t; + } + printIt(&j3Mat, "circle_fit - J3:"); + + const RowVector2Nd Jq = mc.transpose() * tempS * 1. / n; // var(q) + printIt(&Jq, "circle_fit - Jq:"); + + Matrix3d cov_uvr = j3Mat * cvcMat * j3Mat.transpose() * sqr(s_inv) // cov(X0,Y0,R) + + (par_uvr * par_uvr.transpose()) * (Jq * vMat * Jq.transpose()); + + circle.cov = cov_uvr; + } + + printIt(&circle.cov, "Circle cov:"); +#ifdef RFIT_DEBUG + printf("circle_fit - exit\n"); +#endif + return circle; + } + + /*! \brief Perform an ordinary least square fit in the s-z plane to compute + * the parameters cotTheta and Zip. + * + * The fit is performed in the rotated S3D-Z' plane, following the formalism of + * Frodesen, Chapter 10, p. 259. + * + * The system has been rotated to both try to use the combined errors in s-z + * along Z', as errors in the Y direction and to avoid the patological case of + * degenerate lines with angular coefficient m = +/- inf. + * + * The rotation is using the information on the theta angle computed in the + * fast fit. The rotation is such that the S3D axis will be the X-direction, + * while the rotated Z-axis will be the Y-direction. This pretty much follows + * what is done in the same fit in the Broken Line approach. + */ + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE LineFit lineFit(const TAcc& acc, + const M3xN& hits, + const M6xN& hits_ge, + const CircleFit& circle, + const V4& fast_fit, + const double bField, + const bool error) { + constexpr uint32_t N = M3xN::ColsAtCompileTime; + constexpr auto n = N; + double theta = -circle.qCharge * atan(fast_fit(3)); + theta = theta < 0. ? theta + M_PI : theta; + + // Prepare the Rotation Matrix to rotate the points + Eigen::Matrix rot; + rot << sin(theta), cos(theta), -cos(theta), sin(theta); + + // PROJECTION ON THE CILINDER + // + // p2D will be: + // [s1, s2, s3, ..., sn] + // [z1, z2, z3, ..., zn] + // s values will be ordinary x-values + // z values will be ordinary y-values + + Matrix2xNd p2D = Matrix2xNd::Zero(); + Eigen::Matrix jxMat; + +#ifdef RFIT_DEBUG + printf("Line_fit - B: %g\n", bField); + printIt(&hits, "Line_fit points: "); + printIt(&hits_ge, "Line_fit covs: "); + printIt(&rot, "Line_fit rot: "); +#endif + // x & associated Jacobian + // cfr https://indico.cern.ch/event/663159/contributions/2707659/attachments/1517175/2368189/Riemann_fit.pdf + // Slide 11 + // a ==> -o i.e. the origin of the circle in XY plane, negative + // b ==> p i.e. distances of the points wrt the origin of the circle. + const Vector2d oVec(circle.par(0), circle.par(1)); + + // associated Jacobian, used in weights and errors computation + Matrix6d covMat = Matrix6d::Zero(); + Matrix2d cov_sz[N]; + for (uint i = 0; i < n; ++i) { + Vector2d pVec = hits.block(0, i, 2, 1) - oVec; + const double cross = cross2D(acc, -oVec, pVec); + const double dot = (-oVec).dot(pVec); + // atan2(cross, dot) give back the angle in the transverse plane so tha the + // final equation reads: x_i = -q*R*theta (theta = angle returned by atan2) + const double tempQAtan2 = -circle.qCharge * atan2(cross, dot); + // p2D.coeffRef(1, i) = atan2_ * circle.par(2); + p2D(0, i) = tempQAtan2 * circle.par(2); + + // associated Jacobian, used in weights and errors- computation + const double temp0 = -circle.qCharge * circle.par(2) * 1. / (sqr(dot) + sqr(cross)); + double d_X0 = 0., d_Y0 = 0., d_R = 0.; // good approximation for big pt and eta + if (error) { + d_X0 = -temp0 * ((pVec(1) + oVec(1)) * dot - (pVec(0) - oVec(0)) * cross); + d_Y0 = temp0 * ((pVec(0) + oVec(0)) * dot - (oVec(1) - pVec(1)) * cross); + d_R = tempQAtan2; + } + const double d_x = temp0 * (oVec(1) * dot + oVec(0) * cross); + const double d_y = temp0 * (-oVec(0) * dot + oVec(1) * cross); + jxMat << d_X0, d_Y0, d_R, d_x, d_y, 0., 0., 0., 0., 0., 0., 1.; + + covMat.block(0, 0, 3, 3) = circle.cov; + covMat(3, 3) = hits_ge.col(i)[0]; // x errors + covMat(4, 4) = hits_ge.col(i)[2]; // y errors + covMat(5, 5) = hits_ge.col(i)[5]; // z errors + covMat(3, 4) = covMat(4, 3) = hits_ge.col(i)[1]; // cov_xy + covMat(3, 5) = covMat(5, 3) = hits_ge.col(i)[3]; // cov_xz + covMat(4, 5) = covMat(5, 4) = hits_ge.col(i)[4]; // cov_yz + Matrix2d tmp = jxMat * covMat * jxMat.transpose(); + cov_sz[i].noalias() = rot * tmp * rot.transpose(); + } + // Math of d_{X0,Y0,R,x,y} all verified by hand + p2D.row(1) = hits.row(2); + + // The following matrix will contain errors orthogonal to the rotated S + // component only, with the Multiple Scattering properly treated!! + MatrixNd cov_with_ms; + scatterCovLine(acc, cov_sz, fast_fit, p2D.row(0), p2D.row(1), theta, bField, cov_with_ms); +#ifdef RFIT_DEBUG + printIt(cov_sz, "line_fit - cov_sz:"); + printIt(&cov_with_ms, "line_fit - cov_with_ms: "); +#endif + + // Rotate Points with the shape [2, n] + Matrix2xNd p2D_rot = rot * p2D; + +#ifdef RFIT_DEBUG + printf("Fast fit Tan(theta): %g\n", fast_fit(3)); + printf("Rotation angle: %g\n", theta); + printIt(&rot, "Rotation Matrix:"); + printIt(&p2D, "Original Hits(s,z):"); + printIt(&p2D_rot, "Rotated hits(S3D, Z'):"); + printIt(&rot, "Rotation Matrix:"); +#endif + + // Build the A Matrix + Matrix2xNd aMat; + aMat << MatrixXd::Ones(1, n), p2D_rot.row(0); // rotated s values + +#ifdef RFIT_DEBUG + printIt(&aMat, "A Matrix:"); +#endif + + // Build A^T V-1 A, where V-1 is the covariance of only the Y components. + MatrixNd vyInvMat; + math::cholesky::invert(cov_with_ms, vyInvMat); + // MatrixNd vyInvMat = cov_with_ms.inverse(); + Eigen::Matrix covParamsMat = aMat * vyInvMat * aMat.transpose(); + // Compute the Covariance Matrix of the fit parameters + math::cholesky::invert(covParamsMat, covParamsMat); + + // Now Compute the Parameters in the form [2,1] + // The first component is q. + // The second component is m. + Eigen::Matrix sol = covParamsMat * aMat * vyInvMat * p2D_rot.row(1).transpose(); + +#ifdef RFIT_DEBUG + printIt(&sol, "Rotated solutions:"); +#endif + + // We need now to transfer back the results in the original s-z plane + const auto sinTheta = sin(theta); + const auto cosTheta = cos(theta); + auto common_factor = 1. / (sinTheta - sol(1, 0) * cosTheta); + Eigen::Matrix jMat; + jMat << 0., common_factor * common_factor, common_factor, sol(0, 0) * cosTheta * common_factor * common_factor; + + double tempM = common_factor * (sol(1, 0) * sinTheta + cosTheta); + double tempQ = common_factor * sol(0, 0); + auto cov_mq = jMat * covParamsMat * jMat.transpose(); + + VectorNd res = p2D_rot.row(1).transpose() - aMat.transpose() * sol; + double chi2 = res.transpose() * vyInvMat * res; + + LineFit line; + line.par << tempM, tempQ; + line.cov << cov_mq; + line.chi2 = chi2; + +#ifdef RFIT_DEBUG + printf("Common_factor: %g\n", common_factor); + printIt(&jMat, "Jacobian:"); + printIt(&sol, "Rotated solutions:"); + printIt(&covParamsMat, "Cov_params:"); + printIt(&cov_mq, "Rotated Covariance Matrix:"); + printIt(&(line.par), "Real Parameters:"); + printIt(&(line.cov), "Real Covariance Matrix:"); + printf("Chi2: %g\n", chi2); +#endif + + return line; + } + + } // namespace riemannFit +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace riemannFit { + /*! + \brief Helix fit by three step: + -fast pre-fit (see Fast_fit() for further info); \n + -circle fit of hits projected in the transverse plane by Riemann-Chernov + algorithm (see Circle_fit() for further info); \n + -line fit of hits projected on cylinder surface by orthogonal distance + regression (see Line_fit for further info). \n + Points must be passed ordered (from inner to outer layer). + \param hits Matrix3xNd hits coordinates in this form: \n + |x0|x1|x2|...|xn| \n + |y0|y1|y2|...|yn| \n + |z0|z1|z2|...|zn| + \param hits_cov Matrix3Nd covariance matrix in this form (()->cov()): \n + |(x0,x0)|(x1,x0)|(x2,x0)|.|(y0,x0)|(y1,x0)|(y2,x0)|.|(z0,x0)|(z1,x0)|(z2,x0)| \n + |(x0,x1)|(x1,x1)|(x2,x1)|.|(y0,x1)|(y1,x1)|(y2,x1)|.|(z0,x1)|(z1,x1)|(z2,x1)| \n + |(x0,x2)|(x1,x2)|(x2,x2)|.|(y0,x2)|(y1,x2)|(y2,x2)|.|(z0,x2)|(z1,x2)|(z2,x2)| \n + . . . . . . . . . . . \n + |(x0,y0)|(x1,y0)|(x2,y0)|.|(y0,y0)|(y1,y0)|(y2,x0)|.|(z0,y0)|(z1,y0)|(z2,y0)| \n + |(x0,y1)|(x1,y1)|(x2,y1)|.|(y0,y1)|(y1,y1)|(y2,x1)|.|(z0,y1)|(z1,y1)|(z2,y1)| \n + |(x0,y2)|(x1,y2)|(x2,y2)|.|(y0,y2)|(y1,y2)|(y2,x2)|.|(z0,y2)|(z1,y2)|(z2,y2)| \n + . . . . . . . . . . . \n + |(x0,z0)|(x1,z0)|(x2,z0)|.|(y0,z0)|(y1,z0)|(y2,z0)|.|(z0,z0)|(z1,z0)|(z2,z0)| \n + |(x0,z1)|(x1,z1)|(x2,z1)|.|(y0,z1)|(y1,z1)|(y2,z1)|.|(z0,z1)|(z1,z1)|(z2,z1)| \n + |(x0,z2)|(x1,z2)|(x2,z2)|.|(y0,z2)|(y1,z2)|(y2,z2)|.|(z0,z2)|(z1,z2)|(z2,z2)| + \param bField magnetic field in the center of the detector in Gev/cm/c + unit, in order to perform pt calculation. + \param error flag for error computation. + \param scattering flag for multiple scattering treatment. + (see Circle_fit() documentation for further info). + \warning see Circle_fit(), Line_fit() and Fast_fit() warnings. + \bug see Circle_fit(), Line_fit() and Fast_fit() bugs. +*/ + + template + class helixFit { + public: + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void operator()(const TAcc& acc, + const Matrix3xNd* hits, + const Eigen::Matrix* hits_ge, + const double bField, + const bool error, + HelixFit* helix) const { + constexpr uint n = N; + VectorNd<4> rad = (hits->block(0, 0, 2, n).colwise().norm()); + + // Fast_fit gives back (X0, Y0, R, theta) w/o errors, using only 3 points. + Vector4d fast_fit; + ALPAKA_ACCELERATOR_NAMESPACE::riemannFit::fastFit(acc, *hits, fast_fit); + riemannFit::Matrix2Nd hits_cov = MatrixXd::Zero(2 * n, 2 * n); + ALPAKA_ACCELERATOR_NAMESPACE::riemannFit::loadCovariance2D(acc, *hits_ge, hits_cov); + CircleFit circle = ALPAKA_ACCELERATOR_NAMESPACE::riemannFit::circleFit( + acc, hits->block(0, 0, 2, n), hits_cov, fast_fit, rad, bField, error); + LineFit line = + ALPAKA_ACCELERATOR_NAMESPACE::riemannFit::lineFit(acc, *hits, *hits_ge, circle, fast_fit, bField, error); + + ALPAKA_ACCELERATOR_NAMESPACE::riemannFit::par_uvrtopak(acc, circle, bField, error); + + helix->par << circle.par, line.par; + if (error) { + helix->cov = MatrixXd::Zero(5, 5); + helix->cov.block(0, 0, 3, 3) = circle.cov; + helix->cov.block(3, 3, 2, 2) = line.cov; + } + helix->qCharge = circle.qCharge; + helix->chi2_circle = circle.chi2; + helix->chi2_line = line.chi2; + } + }; +} // namespace riemannFit +#endif // RecoPixelVertexing_PixelTrackFitting_interface_RiemannFit_h diff --git a/RecoTracker/PixelTrackFitting/plugins/BuildFile.xml b/RecoTracker/PixelTrackFitting/plugins/BuildFile.xml index d28dad5793a66..1a0a490893aef 100644 --- a/RecoTracker/PixelTrackFitting/plugins/BuildFile.xml +++ b/RecoTracker/PixelTrackFitting/plugins/BuildFile.xml @@ -1,8 +1,25 @@ - - - - - + + + + + + + + + + + + + + + + + + + + + diff --git a/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpAlpaka.cc b/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpAlpaka.cc new file mode 100644 index 0000000000000..7524fa012eb31 --- /dev/null +++ b/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpAlpaka.cc @@ -0,0 +1,80 @@ +#include // needed here by soa layout + +#include "DataFormats/Common/interface/Handle.h" +#include "FWCore/Framework/interface/ConsumesCollector.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/global/EDAnalyzer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/PluginManager/interface/ModuleDef.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "FWCore/Utilities/interface/RunningAverage.h" +#include "RecoTracker/TkMSParametrization/interface/PixelRecoUtilities.h" + +#include "DataFormats/Vertex/interface/ZVertexSoAHost.h" +#include "DataFormats/TrackSoA/interface/TrackSoAHost.h" + +template +class PixelTrackDumpAlpakaT : public edm::global::EDAnalyzer<> { +public: + using TkSoAHost = TrackSoAHost; + using VertexSoAHost = ZVertexHost; + + explicit PixelTrackDumpAlpakaT(const edm::ParameterSet& iConfig); + ~PixelTrackDumpAlpakaT() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void analyze(edm::StreamID streamID, edm::Event const& iEvent, const edm::EventSetup& iSetup) const override; + edm::EDGetTokenT tokenSoATrack_; + edm::EDGetTokenT tokenSoAVertex_; +}; + +template +PixelTrackDumpAlpakaT::PixelTrackDumpAlpakaT(const edm::ParameterSet& iConfig) { + tokenSoATrack_ = consumes(iConfig.getParameter("pixelTrackSrc")); + tokenSoAVertex_ = consumes(iConfig.getParameter("pixelVertexSrc")); +} + +template +void PixelTrackDumpAlpakaT::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("pixelTrackSrc", edm::InputTag("pixelTracksAlpaka")); + desc.add("pixelVertexSrc", edm::InputTag("pixelVerticesAlpaka")); + descriptions.addWithDefaultLabel(desc); +} + +template +void PixelTrackDumpAlpakaT::analyze(edm::StreamID streamID, + edm::Event const& iEvent, + const edm::EventSetup& iSetup) const { + auto const& tracks = iEvent.get(tokenSoATrack_); + assert(tracks.view().quality()); + assert(tracks.view().chi2()); + assert(tracks.view().nLayers()); + assert(tracks.view().eta()); + assert(tracks.view().pt()); + assert(tracks.view().state()); + assert(tracks.view().covariance()); + assert(tracks.view().nTracks()); + + auto const& vertices = iEvent.get(tokenSoAVertex_); + assert(vertices.view().idv()); + assert(vertices.view().zv()); + assert(vertices.view().wv()); + assert(vertices.view().chi2()); + assert(vertices.view().ptv2()); + assert(vertices.view().ndof()); + assert(vertices.view().sortInd()); + assert(vertices.view().nvFinal()); +} +using PixelTrackDumpAlpakaPhase1 = PixelTrackDumpAlpakaT; +using PixelTrackDumpAlpakaPhase2 = PixelTrackDumpAlpakaT; + +DEFINE_FWK_MODULE(PixelTrackDumpAlpakaPhase1); +DEFINE_FWK_MODULE(PixelTrackDumpAlpakaPhase2); diff --git a/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc b/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc new file mode 100644 index 0000000000000..121c71656d00e --- /dev/null +++ b/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc @@ -0,0 +1,261 @@ +#include + +#include "DataFormats/BeamSpot/interface/BeamSpot.h" +#include "DataFormats/GeometrySurface/interface/Plane.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/TrackSoA/interface/TrackSoAHost.h" +#include "DataFormats/TrackReco/interface/Track.h" +#include "DataFormats/TrackReco/interface/TrackExtra.h" +#include "DataFormats/TrackReco/interface/TrackFwd.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" +#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" +#include "DataFormats/TrajectoryState/interface/LocalTrajectoryParameters.h" +#include "FWCore/Framework/interface/ConsumesCollector.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" +#include "TrackingTools/AnalyticalJacobians/interface/JacobianLocalToCurvilinear.h" +#include "TrackingTools/TrajectoryParametrization/interface/CurvilinearTrajectoryError.h" +#include "TrackingTools/TrajectoryParametrization/interface/GlobalTrajectoryParameters.h" + +#include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h" +#include "RecoTracker/PixelTrackFitting/interface/alpaka/FitUtils.h" + +#include "storeTracks.h" + +/** + * This class creates "legacy" reco::Track + * objects from the output of SoA CA. + */ + +template +class PixelTrackProducerFromSoAAlpaka : public edm::global::EDProducer<> { + using TkSoAHost = TrackSoAHost; + using tracksHelpers = TracksUtilities; + using HMSstorage = std::vector; + +public: + using IndToEdm = std::vector; + + explicit PixelTrackProducerFromSoAAlpaka(const edm::ParameterSet &iConfig); + ~PixelTrackProducerFromSoAAlpaka() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions &descriptions); + +private: + void produce(edm::StreamID streamID, edm::Event &iEvent, const edm::EventSetup &iSetup) const override; + + // Event Data tokens + const edm::EDGetTokenT tBeamSpot_; + const edm::EDGetTokenT tokenTrack_; + const edm::EDGetTokenT cpuHits_; + const edm::EDGetTokenT hmsToken_; + // Event Setup tokens + const edm::ESGetToken idealMagneticFieldToken_; + const edm::ESGetToken ttTopoToken_; + + int32_t const minNumberOfHits_; + pixelTrack::Quality const minQuality_; +}; + +template +PixelTrackProducerFromSoAAlpaka::PixelTrackProducerFromSoAAlpaka(const edm::ParameterSet &iConfig) + : tBeamSpot_(consumes(iConfig.getParameter("beamSpot"))), + tokenTrack_(consumes(iConfig.getParameter("trackSrc"))), + cpuHits_(consumes(iConfig.getParameter("pixelRecHitLegacySrc"))), + hmsToken_(consumes(iConfig.getParameter("pixelRecHitLegacySrc"))), + idealMagneticFieldToken_(esConsumes()), + ttTopoToken_(esConsumes()), + minNumberOfHits_(iConfig.getParameter("minNumberOfHits")), + minQuality_(pixelTrack::qualityByName(iConfig.getParameter("minQuality"))) { + if (minQuality_ == pixelTrack::Quality::notQuality) { + throw cms::Exception("PixelTrackConfiguration") + << iConfig.getParameter("minQuality") + " is not a pixelTrack::Quality"; + } + if (minQuality_ < pixelTrack::Quality::dup) { + throw cms::Exception("PixelTrackConfiguration") + << iConfig.getParameter("minQuality") + " not supported"; + } + produces(); + produces(); + // TrackCollection refers to TrackingRechit and TrackExtra + // collections, need to declare its production after them to work + // around a rare race condition in framework scheduling + produces(); + produces(); +} + +template +void PixelTrackProducerFromSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions &descriptions) { + edm::ParameterSetDescription desc; + desc.add("beamSpot", edm::InputTag("offlineBeamSpot")); + desc.add("trackSrc", edm::InputTag("pixelTracksAlpaka")); + desc.add("pixelRecHitLegacySrc", edm::InputTag("siPixelRecHitsPreSplittingLegacy")); + desc.add("minNumberOfHits", 0); + desc.add("minQuality", "loose"); + descriptions.addWithDefaultLabel(desc); +} + +template +void PixelTrackProducerFromSoAAlpaka::produce(edm::StreamID streamID, + edm::Event &iEvent, + const edm::EventSetup &iSetup) const { + // enum class Quality : uint8_t { bad = 0, edup, dup, loose, strict, tight, highPurity }; + reco::TrackBase::TrackQuality recoQuality[] = {reco::TrackBase::undefQuality, + reco::TrackBase::undefQuality, + reco::TrackBase::discarded, + reco::TrackBase::loose, + reco::TrackBase::tight, + reco::TrackBase::tight, + reco::TrackBase::highPurity}; + assert(reco::TrackBase::highPurity == recoQuality[int(pixelTrack::Quality::highPurity)]); + + #ifdef GPU_DEBUG + std::cout << "Converting soa helix in reco tracks" << std::endl; + #endif + + auto indToEdmP = std::make_unique(); + auto &indToEdm = *indToEdmP; + + auto const &idealField = iSetup.getData(idealMagneticFieldToken_); + + pixeltrackfitting::TracksWithRecHits tracks; + + auto const &httopo = iSetup.getData(ttTopoToken_); + + const auto &bsh = iEvent.get(tBeamSpot_); + GlobalPoint bs(bsh.x0(), bsh.y0(), bsh.z0()); + + auto const &rechits = iEvent.get(cpuHits_); + std::vector hitmap; + auto const &rcs = rechits.data(); + auto const nhits = rcs.size(); + + hitmap.resize(nhits, nullptr); + + auto const &hitsModuleStart = iEvent.get(hmsToken_); + + for (auto const &hit : rcs) { + auto const &thit = static_cast(hit); + auto const detI = thit.det()->index(); + auto const &clus = thit.firstClusterRef(); + assert(clus.isPixel()); + auto const idx = hitsModuleStart[detI] + clus.pixelCluster().originalId(); + if (idx >= hitmap.size()) + hitmap.resize(idx + 256, nullptr); // only in case of hit overflow in one module + + assert(nullptr == hitmap[idx]); + hitmap[idx] = &hit; + } + + std::vector hits; + hits.reserve(5); + + auto const &tsoa = iEvent.get(tokenTrack_); + auto const *quality = tsoa.view().quality(); + auto const &hitIndices = tsoa.view().hitIndices(); + auto nTracks = tsoa.view().nTracks(); + + tracks.reserve(nTracks); + + int32_t nt = 0; + + //sort index by pt + std::vector sortIdxs(nTracks); + std::iota(sortIdxs.begin(), sortIdxs.end(), 0); + std::sort(sortIdxs.begin(), sortIdxs.end(), [&](int32_t const i1, int32_t const i2) { + return tsoa.view()[i1].pt() > tsoa.view()[i2].pt(); + }); + + //store the index of the SoA: indToEdm[index_SoAtrack] -> index_edmTrack (if it exists) + indToEdm.resize(sortIdxs.size(), -1); + for (const auto &it : sortIdxs) { + auto nHits = tracksHelpers::nHits(tsoa.view(), it); + assert(nHits >= 3); + auto q = quality[it]; + + if (q < minQuality_) + continue; + if (nHits < minNumberOfHits_) //move to nLayers? + continue; + indToEdm[it] = nt; + ++nt; + + hits.resize(nHits); + auto b = hitIndices.begin(it); + for (int iHit = 0; iHit < nHits; ++iHit) + hits[iHit] = hitmap[*(b + iHit)]; + + // mind: this values are respect the beamspot! + + float chi2 = tsoa.view()[it].chi2(); + float phi = tracksHelpers::phi(tsoa.view(), it); + + riemannFit::Vector5d ipar, opar; + riemannFit::Matrix5d icov, ocov; + tracksHelpers::template copyToDense(tsoa.view(), ipar, icov, it); + riemannFit::transformToPerigeePlane(ipar, icov, opar, ocov); + + LocalTrajectoryParameters lpar(opar(0), opar(1), opar(2), opar(3), opar(4), 1.); + AlgebraicSymMatrix55 m; + for (int i = 0; i < 5; ++i) + for (int j = i; j < 5; ++j) + m(i, j) = ocov(i, j); + + float sp = std::sin(phi); + float cp = std::cos(phi); + Surface::RotationType rot(sp, -cp, 0, 0, 0, -1.f, cp, sp, 0); + + Plane impPointPlane(bs, rot); + GlobalTrajectoryParameters gp( + impPointPlane.toGlobal(lpar.position()), impPointPlane.toGlobal(lpar.momentum()), lpar.charge(), &idealField); + JacobianLocalToCurvilinear jl2c(impPointPlane, lpar, idealField); + + AlgebraicSymMatrix55 mo = ROOT::Math::Similarity(jl2c.jacobian(), m); + + int ndof = 2 * hits.size() - 5; + chi2 = chi2 * ndof; + GlobalPoint vv = gp.position(); + math::XYZPoint pos(vv.x(), vv.y(), vv.z()); + GlobalVector pp = gp.momentum(); + math::XYZVector mom(pp.x(), pp.y(), pp.z()); + + auto track = std::make_unique(chi2, ndof, pos, mom, gp.charge(), CurvilinearTrajectoryError(mo)); + + // bad and edup not supported as fit not present or not reliable + auto tkq = recoQuality[int(q)]; + track->setQuality(tkq); + // loose,tight and HP are inclusive + if (reco::TrackBase::highPurity == tkq) { + track->setQuality(reco::TrackBase::tight); + track->setQuality(reco::TrackBase::loose); + } else if (reco::TrackBase::tight == tkq) { + track->setQuality(reco::TrackBase::loose); + } + track->setQuality(tkq); + // filter??? + tracks.emplace_back(track.release(), hits); + } + #ifdef GPU_DEBUG + std::cout << "processed " << nt << " good tuples " << tracks.size() << "out of " << indToEdm.size() << std::endl; + #endif + // store tracks + storeTracks(iEvent, tracks, httopo); + iEvent.put(std::move(indToEdmP)); +} + +#include "FWCore/Framework/interface/MakerMacros.h" + +using PixelTrackProducerFromSoAAlpakaPhase1 = PixelTrackProducerFromSoAAlpaka; +DEFINE_FWK_MODULE(PixelTrackProducerFromSoAAlpakaPhase1); + +using PixelTrackProducerFromSoAAlpakaPhase2 = PixelTrackProducerFromSoAAlpaka; +DEFINE_FWK_MODULE(PixelTrackProducerFromSoAAlpakaPhase2); diff --git a/RecoTracker/PixelTrackFitting/plugins/alpaka/PixelNtupletsFitterAlpaka.dev.cc b/RecoTracker/PixelTrackFitting/plugins/alpaka/PixelNtupletsFitterAlpaka.dev.cc new file mode 100644 index 0000000000000..65504f3e5e9b6 --- /dev/null +++ b/RecoTracker/PixelTrackFitting/plugins/alpaka/PixelNtupletsFitterAlpaka.dev.cc @@ -0,0 +1,131 @@ +#include +#include "CommonTools/Utils/interface/DynArray.h" +#include "DataFormats/GeometryCommonDetAlgo/interface/GlobalError.h" +#include "DataFormats/GeometryCommonDetAlgo/interface/Measurement1D.h" +#include "DataFormats/GeometryVector/interface/GlobalPoint.h" +#include "DataFormats/GeometryVector/interface/LocalPoint.h" +#include "DataFormats/GeometryVector/interface/Pi.h" +#include "DataFormats/TrackingRecHit/interface/TrackingRecHit.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "Geometry/CommonDetUnit/interface/GeomDet.h" +#include "Geometry/CommonDetUnit/interface/GeomDetType.h" +#include "MagneticField/Engine/interface/MagneticField.h" +#include "RecoTracker/PixelTrackFitting/interface/alpaka/BrokenLine.h" +#include "RecoTracker/PixelTrackFitting/interface/alpaka/PixelNtupletsFitter.h" +#include "RecoTracker/PixelTrackFitting/interface/PixelTrackBuilder.h" +#include "RecoTracker/PixelTrackFitting/interface/PixelTrackErrorParam.h" +#include "RecoTracker/PixelTrackFitting/interface/alpaka/RiemannFit.h" +#include "RecoTracker/TkMSParametrization/interface/PixelRecoUtilities.h" + +using namespace std; +namespace ALPAKA_ACCELERATOR_NAMESPACE { + using namespace cms::alpakatools; + + PixelNtupletsFitter::PixelNtupletsFitter(Queue& queue, float nominalB, const MagneticField* field, bool useRiemannFit) + : queue_(queue), nominalB_(nominalB), field_(field), useRiemannFit_(useRiemannFit) {} + + std::unique_ptr PixelNtupletsFitter::run(const std::vector& hits, + const TrackingRegion& region) const { + using namespace riemannFit; + + std::unique_ptr ret; + + unsigned int nhits = hits.size(); + + if (nhits < 2) + return ret; + + declareDynArray(GlobalPoint, nhits, points); + declareDynArray(GlobalError, nhits, errors); + declareDynArray(bool, nhits, isBarrel); + + for (unsigned int i = 0; i != nhits; ++i) { + auto const& recHit = hits[i]; + points[i] = GlobalPoint(recHit->globalPosition().basicVector() - region.origin().basicVector()); + errors[i] = recHit->globalPositionError(); + isBarrel[i] = recHit->detUnit()->type().isBarrel(); + } + + ALPAKA_ASSERT_OFFLOAD(nhits == 4); + auto hits_gp_h = cms::alpakatools::make_host_buffer>(queue_); + + Eigen::Matrix hits_ge_h = Eigen::Matrix::Zero(); + + for (unsigned int i = 0; i < nhits; ++i) { + hits_gp_h.data()->col(i) << points[i].x(), points[i].y(), points[i].z(); + + hits_ge_h.col(i) << errors[i].cxx(), errors[i].cyx(), errors[i].cyy(), errors[i].czx(), errors[i].czy(), + errors[i].czz(); + } + auto fittedTrack_d = cms::alpakatools::make_device_buffer(queue_); + auto workdiv = cms::alpakatools::make_workdiv(1, 1); + auto hits_gp_d = cms::alpakatools::make_device_buffer>(queue_); + auto hits_ge_d = cms::alpakatools::make_device_buffer>(queue_); + + alpaka::memcpy(queue_, hits_gp_d, hits_gp_h); + auto hits_ge_h_view = cms::alpakatools::make_host_view(hits_ge_h); + alpaka::memcpy(queue_, hits_ge_d, hits_ge_h_view); + + useRiemannFit_ ? alpaka::exec(queue_, + workdiv, + riemannFit::helixFit<4>{}, + hits_gp_d.data(), + hits_ge_d.data(), + nominalB_, + true, + fittedTrack_d.data()) + : alpaka::exec(queue_, + workdiv, + brokenline::helixFit<4>{}, + hits_gp_d.data(), + hits_ge_d.data(), + nominalB_, + fittedTrack_d.data()); + + auto fittedTrack_h = cms::alpakatools::make_host_buffer(queue_); + alpaka::memcpy(queue_, fittedTrack_h, fittedTrack_d); + int iCharge = fittedTrack_h.data()->qCharge; + + // parameters are: + // 0: phi + // 1: tip + // 2: curvature + // 3: cottheta + // 4: zip + float valPhi = fittedTrack_h.data()->par(0); + + float valTip = fittedTrack_h.data()->par(1); + + float valCotTheta = fittedTrack_h.data()->par(3); + + float valZip = fittedTrack_h.data()->par(4); + float valPt = fittedTrack_h.data()->par(2); + // + // PixelTrackErrorParam param(valEta, valPt); + float errValPhi = std::sqrt(fittedTrack_h.data()->cov(0, 0)); + float errValTip = std::sqrt(fittedTrack_h.data()->cov(1, 1)); + + float errValPt = std::sqrt(fittedTrack_h.data()->cov(2, 2)); + + float errValCotTheta = std::sqrt(fittedTrack_h.data()->cov(3, 3)); + float errValZip = std::sqrt(fittedTrack_h.data()->cov(4, 4)); + + float chi2 = fittedTrack_h.data()->chi2_line + fittedTrack_h.data()->chi2_circle; + + PixelTrackBuilder builder; + Measurement1D phi(valPhi, errValPhi); + Measurement1D tip(valTip, errValTip); + + Measurement1D pt(valPt, errValPt); + Measurement1D cotTheta(valCotTheta, errValCotTheta); + Measurement1D zip(valZip, errValZip); + + ret.reset(builder.build(pt, phi, cotTheta, tip, zip, chi2, iCharge, hits, field_, region.origin())); + return ret; + } +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoTracker/PixelTrackFitting/plugins/alpaka/PixelNtupletsFitterProducerAlpaka.cc b/RecoTracker/PixelTrackFitting/plugins/alpaka/PixelNtupletsFitterProducerAlpaka.cc new file mode 100644 index 0000000000000..3276779705243 --- /dev/null +++ b/RecoTracker/PixelTrackFitting/plugins/alpaka/PixelNtupletsFitterProducerAlpaka.cc @@ -0,0 +1,50 @@ +#include +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/StreamID.h" +#include "MagneticField/Engine/interface/MagneticField.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" +#include "RecoTracker/PixelTrackFitting/interface/PixelFitter.h" +#include "RecoTracker/PixelTrackFitting/interface/alpaka/PixelNtupletsFitter.h" +#include "RecoTracker/TkMSParametrization/interface/PixelRecoUtilities.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/global/EDProducer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + class PixelNtupletsFitterProducer : public global::EDProducer<> { + public: + explicit PixelNtupletsFitterProducer(const edm::ParameterSet& iConfig) + : useRiemannFit_(iConfig.getParameter("useRiemannFit")), + idealMagneticFieldToken_(esConsumes()), + fitterToken_(produces()) {} + ~PixelNtupletsFitterProducer() override {} + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("useRiemannFit", false)->setComment("true for Riemann, false for BrokenLine"); + descriptions.addWithDefaultLabel(desc); + } + + private: + bool useRiemannFit_; + const edm::ESGetToken idealMagneticFieldToken_; + const device::EDPutToken fitterToken_; + void produce(edm::StreamID, device::Event& iEvent, const device::EventSetup& iSetup) const override; + }; + + void PixelNtupletsFitterProducer::produce(edm::StreamID, + device::Event& iEvent, + const device::EventSetup& iSetup) const { + auto const& idealField = iSetup.getData(idealMagneticFieldToken_); + float bField = 1 / idealField.inverseBzAtOriginInGeV(); + auto impl = std::make_unique(iEvent.queue(), bField, &idealField, useRiemannFit_); + auto prod = std::make_unique(std::move(impl)); + iEvent.put(fitterToken_, std::move(prod)); + } +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(PixelNtupletsFitterProducer); \ No newline at end of file diff --git a/RecoTracker/PixelTrackFitting/python/PixelTracks_cff.py b/RecoTracker/PixelTrackFitting/python/PixelTracks_cff.py index 91eb380a33da9..538e388cacd32 100644 --- a/RecoTracker/PixelTrackFitting/python/PixelTracks_cff.py +++ b/RecoTracker/PixelTrackFitting/python/PixelTracks_cff.py @@ -203,3 +203,50 @@ (pixelNtupletFit & gpu & gpuValidationPixel).toModify(pixelTracksSoA.cpu, pixelRecHitSrc = "siPixelRecHitsPreSplittingSoA@cpu" ) + +###################################################################### + +### Alpaka Pixel Track Reco + +from Configuration.ProcessModifiers.alpaka_cff import alpaka + +from RecoTracker.PixelSeeding.caHitNtupletAlpakaPhase1_cfi import caHitNtupletAlpakaPhase1 as _pixelTracksAlpakaPhase1 +from RecoTracker.PixelSeeding.caHitNtupletAlpakaPhase2_cfi import caHitNtupletAlpakaPhase2 as _pixelTracksAlpakaPhase2 + +pixelTracksAlpaka = _pixelTracksAlpakaPhase1.clone() +phase2_tracker.toReplaceWith(pixelTracksAlpaka,_pixelTracksAlpakaPhase2.clone()) + +from RecoTracker.PixelTrackFitting.pixelTrackProducerFromSoAAlpakaPhase1_cfi import pixelTrackProducerFromSoAAlpakaPhase1 as _pixelTrackProducerFromSoAAlpakaPhase1 +from RecoTracker.PixelTrackFitting.pixelTrackProducerFromSoAAlpakaPhase2_cfi import pixelTrackProducerFromSoAAlpakaPhase2 as _pixelTrackProducerFromSoAAlpakaPhase2 + +(alpaka & ~phase2_tracker).toReplaceWith(pixelTracks, _pixelTrackProducerFromSoAAlpakaPhase1.clone( + pixelRecHitLegacySrc = "siPixelRecHitsPreSplitting", +)) + +(alpaka & phase2_tracker).toReplaceWith(pixelTracks, _pixelTrackProducerFromSoAAlpakaPhase2.clone( + pixelRecHitLegacySrc = "siPixelRecHitsPreSplitting", +)) + +alpaka.toReplaceWith(pixelTracksTask, cms.Task( + # Build the pixel ntuplets and the pixel tracks in SoA format on the Device + pixelTracksAlpaka, + # Convert the pixel tracks from SoA to legacy format + pixelTracks)) + +### Alpaka Device vs Host validation + +from Configuration.ProcessModifiers.alpakaValidationPixel_cff import alpakaValidationPixel + +# Hit SoA producer on serial backend +pixelTracksAlpakaSerial = pixelTracksAlpaka.clone( + pixelRecHitSrc = 'siPixelRecHitsPreSplittingAlpakaSerial', + alpaka = dict( backend = 'serial_sync' ) +) + +alpakaValidationPixel.toReplaceWith(pixelTracksTask, cms.Task( + # Reconstruct and convert the pixel tracks with alpaka on device + pixelTracksTask.copy(), + # SoA serial counterpart + pixelTracksAlpakaSerial)) + + diff --git a/RecoTracker/PixelTrackFitting/src/alpaka/classes_serial.h b/RecoTracker/PixelTrackFitting/src/alpaka/classes_serial.h new file mode 100644 index 0000000000000..78f2411078325 --- /dev/null +++ b/RecoTracker/PixelTrackFitting/src/alpaka/classes_serial.h @@ -0,0 +1,8 @@ +#ifndef RecoPixelVertexing_PixelTrackFitting_src_alpaka_classes_serial_h +#define RecoPixelVertexing_PixelTrackFitting_src_alpaka_classes_serial_h + +#include "RecoTracker/PixelTrackFitting/interface/PixelFitter.h" +#include "RecoTracker/PixelTrackFitting/interface/PixelTrackFilter.h" +#include "DataFormats/Common/interface/Wrapper.h" + +#endif \ No newline at end of file diff --git a/RecoTracker/PixelTrackFitting/src/alpaka/classes_serial_def.xml b/RecoTracker/PixelTrackFitting/src/alpaka/classes_serial_def.xml new file mode 100644 index 0000000000000..9f5d8260898e1 --- /dev/null +++ b/RecoTracker/PixelTrackFitting/src/alpaka/classes_serial_def.xml @@ -0,0 +1,4 @@ + + + + diff --git a/RecoTracker/PixelVertexFinding/plugins/BuildFile.xml b/RecoTracker/PixelVertexFinding/plugins/BuildFile.xml index d330676889f26..bbc7891dd93bd 100644 --- a/RecoTracker/PixelVertexFinding/plugins/BuildFile.xml +++ b/RecoTracker/PixelVertexFinding/plugins/BuildFile.xml @@ -1,5 +1,3 @@ - - @@ -10,25 +8,40 @@ + + + + + + + + + + + + + + + diff --git a/RecoTracker/PixelVertexFinding/plugins/PixelVertexProducerFromSoAAlpaka.cc b/RecoTracker/PixelVertexFinding/plugins/PixelVertexProducerFromSoAAlpaka.cc new file mode 100644 index 0000000000000..c932245d5fdcf --- /dev/null +++ b/RecoTracker/PixelVertexFinding/plugins/PixelVertexProducerFromSoAAlpaka.cc @@ -0,0 +1,179 @@ +#include "DataFormats/Vertex/interface/ZVertexSoAHost.h" +#include "DataFormats/BeamSpot/interface/BeamSpot.h" +#include "DataFormats/Common/interface/OrphanHandle.h" +#include "DataFormats/TrackReco/interface/Track.h" +#include "DataFormats/TrackReco/interface/TrackExtra.h" +#include "DataFormats/TrackReco/interface/TrackFwd.h" +#include "DataFormats/VertexReco/interface/Vertex.h" +#include "DataFormats/VertexReco/interface/VertexFwd.h" +#include "FWCore/Framework/interface/ConsumesCollector.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/PluginManager/interface/ModuleDef.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" + +#undef PIXVERTEX_DEBUG_PRODUCE + +class PixelVertexProducerFromSoAAlpaka : public edm::global::EDProducer<> { +public: + using IndToEdm = std::vector; + + explicit PixelVertexProducerFromSoAAlpaka(const edm::ParameterSet &iConfig); + ~PixelVertexProducerFromSoAAlpaka() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions &descriptions); + +private: + void produce(edm::StreamID streamID, edm::Event &iEvent, const edm::EventSetup &iSetup) const override; + + edm::EDGetTokenT tokenVertex_; + edm::EDGetTokenT tokenBeamSpot_; + edm::EDGetTokenT tokenTracks_; + edm::EDGetTokenT tokenIndToEdm_; +}; + +PixelVertexProducerFromSoAAlpaka::PixelVertexProducerFromSoAAlpaka(const edm::ParameterSet &conf) + : tokenVertex_(consumes(conf.getParameter("src"))), + tokenBeamSpot_(consumes(conf.getParameter("beamSpot"))), + tokenTracks_(consumes(conf.getParameter("TrackCollection"))), + tokenIndToEdm_(consumes(conf.getParameter("TrackCollection"))) { + produces(); +} + +void PixelVertexProducerFromSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions &descriptions) { + edm::ParameterSetDescription desc; + + desc.add("TrackCollection", edm::InputTag("pixelTracks")); + desc.add("beamSpot", edm::InputTag("offlineBeamSpot")); + desc.add("src", edm::InputTag("pixelVerticesAlpaka")); + + descriptions.add("pixelVertexFromSoAAlpaka", desc); +} + +void PixelVertexProducerFromSoAAlpaka::produce(edm::StreamID streamID, + edm::Event &iEvent, + const edm::EventSetup &) const { + auto vertexes = std::make_unique(); + + auto tracksHandle = iEvent.getHandle(tokenTracks_); + auto tracksSize = tracksHandle->size(); + auto const &indToEdm = iEvent.get(tokenIndToEdm_); + auto bsHandle = iEvent.getHandle(tokenBeamSpot_); + + float x0 = 0, y0 = 0, z0 = 0, dxdz = 0, dydz = 0; + std::vector itrk; + itrk.reserve(64); // avoid first relocations + if (!bsHandle.isValid()) { + edm::LogWarning("PixelVertexProducer") << "No beamspot found. returning vertexes with (0,0,Z) "; + } else { + const reco::BeamSpot &bs = *bsHandle; + x0 = bs.x0(); + y0 = bs.y0(); + z0 = bs.z0(); + dxdz = bs.dxdz(); + dydz = bs.dydz(); + } + + auto const &soa = iEvent.get(tokenVertex_); + + int nv = soa.view().nvFinal(); + +#ifdef PIXVERTEX_DEBUG_PRODUCE + std::cout << "converting " << nv << " vertices " + << " from " << indToEdm.size() << " tracks" << std::endl; +#endif // PIXVERTEX_DEBUG_PRODUCE + + std::set uind; // for verifing index consistency + for (int j = nv - 1; j >= 0; --j) { + auto i = soa.view()[j].sortInd(); // on gpu sorted in ascending order.... + assert(i < nv); + uind.insert(i); + assert(itrk.empty()); + auto z = soa.view()[i].zv(); + auto x = x0 + dxdz * z; + auto y = y0 + dydz * z; + z += z0; + reco::Vertex::Error err; + err(2, 2) = 1.f / soa.view()[i].wv(); + err(2, 2) *= 2.; // artifically inflate error + //Copy also the tracks (no intention to be efficient....) + for (auto k = 0U; k < indToEdm.size(); ++k) { + if (soa.view()[k].idv() == int16_t(i)) + itrk.push_back(k); + } + auto nt = itrk.size(); + if (nt == 0) { +#ifdef PIXVERTEX_DEBUG_PRODUCE + std::cout << "vertex " << i << " with no tracks..." << std::endl; +#endif // PIXVERTEX_DEBUG_PRODUCE + continue; + } + if (nt < 2) { + itrk.clear(); + continue; + } // remove outliers + (*vertexes).emplace_back(reco::Vertex::Point(x, y, z), err, soa.view()[i].chi2(), soa.view()[i].ndof(), nt); + auto &v = (*vertexes).back(); + v.reserve(itrk.size()); + for (auto it : itrk) { + assert(it < int(indToEdm.size())); + auto k = indToEdm[it]; + if (k > tracksSize) { + edm::LogWarning("PixelVertexProducer") << "oops track " << it << " does not exists on CPU " << k; + continue; + } + auto tk = reco::TrackRef(tracksHandle, k); + v.add(tk); + } + itrk.clear(); + } + + LogDebug("PixelVertexProducer") << ": Found " << vertexes->size() << " vertexes\n"; + for (unsigned int i = 0; i < vertexes->size(); ++i) { + LogDebug("PixelVertexProducer") << "Vertex number " << i << " has " << (*vertexes)[i].tracksSize() + << " tracks with a position of " << (*vertexes)[i].z() << " +- " + << std::sqrt((*vertexes)[i].covariance(2, 2)); + } + + // legacy logic.... + if (vertexes->empty() && bsHandle.isValid()) { + const reco::BeamSpot &bs = *bsHandle; + + GlobalError bse(bs.rotatedCovariance3D()); + if ((bse.cxx() <= 0.) || (bse.cyy() <= 0.) || (bse.czz() <= 0.)) { + AlgebraicSymMatrix33 we; + we(0, 0) = 10000; + we(1, 1) = 10000; + we(2, 2) = 10000; + vertexes->push_back(reco::Vertex(bs.position(), we, 0., 0., 0)); + + edm::LogInfo("PixelVertexProducer") << "No vertices found. Beamspot with invalid errors " << bse.matrix() + << "\nWill put Vertex derived from dummy-fake BeamSpot into Event.\n" + << (*vertexes)[0].x() << "\n" + << (*vertexes)[0].y() << "\n" + << (*vertexes)[0].z() << "\n"; + } else { + vertexes->push_back(reco::Vertex(bs.position(), bs.rotatedCovariance3D(), 0., 0., 0)); + + edm::LogInfo("PixelVertexProducer") << "No vertices found. Will put Vertex derived from BeamSpot into Event:\n" + << (*vertexes)[0].x() << "\n" + << (*vertexes)[0].y() << "\n" + << (*vertexes)[0].z() << "\n"; + } + } else if (vertexes->empty() && !bsHandle.isValid()) { + edm::LogWarning("PixelVertexProducer") << "No beamspot and no vertex found. No vertex returned."; + } + + iEvent.put(std::move(vertexes)); +} + +DEFINE_FWK_MODULE(PixelVertexProducerFromSoAAlpaka); diff --git a/RecoTracker/PixelVertexFinding/plugins/PixelVertexWorkSpaceLayout.h b/RecoTracker/PixelVertexFinding/plugins/PixelVertexWorkSpaceLayout.h new file mode 100644 index 0000000000000..c64937fbc578b --- /dev/null +++ b/RecoTracker/PixelVertexFinding/plugins/PixelVertexWorkSpaceLayout.h @@ -0,0 +1,26 @@ +#ifndef RecoPixelVertexing_PixelVertexFinding_PixelVertexWorkSpaceLayout_h +#define RecoPixelVertexing_PixelVertexFinding_PixelVertexWorkSpaceLayout_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +// Intermediate data used in the vertex reco algos +// For internal use only +GENERATE_SOA_LAYOUT(PixelVertexWSSoALayout, + SOA_COLUMN(uint16_t, itrk), // index of original track + SOA_COLUMN(float, zt), // input track z at bs + SOA_COLUMN(float, ezt2), // input error^2 on the above + SOA_COLUMN(float, ptt2), // input pt^2 on the above + SOA_COLUMN(uint8_t, izt), // interized z-position of input tracks + SOA_COLUMN(int32_t, iv), // vertex index for each associated track + SOA_SCALAR(uint32_t, ntrks), // number of "selected tracks" + SOA_SCALAR(uint32_t, nvIntermediate)) // the number of vertices after splitting pruning etc. + +namespace vertexFinder { + namespace workSpace { + using PixelVertexWorkSpaceSoALayout = PixelVertexWSSoALayout<>; + using PixelVertexWorkSpaceSoAView = PixelVertexWSSoALayout<>::View; + using PixelVertexWorkSpaceSoAConstView = PixelVertexWSSoALayout<>::ConstView; + } // namespace workSpace +} // namespace vertexFinder + +#endif diff --git a/RecoTracker/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHostAlpaka.h b/RecoTracker/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHostAlpaka.h new file mode 100644 index 0000000000000..ef1c17834ee19 --- /dev/null +++ b/RecoTracker/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHostAlpaka.h @@ -0,0 +1,27 @@ +#ifndef RecoPixelVertexing_PixelVertexFinding_PixelVertexWorkSpaceSoAHost_h +#define RecoPixelVertexing_PixelVertexFinding_PixelVertexWorkSpaceSoAHost_h +#include +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "DataFormats/Vertex/interface/alpaka/ZVertexUtilities.h" +#include "PixelVertexWorkSpaceLayout.h" + +template +class PixelVertexWorkSpaceSoAHost : public PortableHostCollection> { +public: + //explicit PixelVertexWorkSpaceSoAHost() : PortableHostCollection>(S) {} + + // Constructor which specifies the SoA size and CUDA stream + template + explicit PixelVertexWorkSpaceSoAHost(TQueue queue) : PortableHostCollection>(S, queue) {} + + explicit PixelVertexWorkSpaceSoAHost(alpaka_common::DevHost const& host) + : PortableHostCollection>(S, host) {} +}; + +namespace vertexFinder { + namespace workSpace { + using PixelVertexWorkSpaceSoAHost = PixelVertexWorkSpaceSoAHost; + } +} // namespace vertexFinder +#endif diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/PixelVertexProducerAlpaka.cc b/RecoTracker/PixelVertexFinding/plugins/alpaka/PixelVertexProducerAlpaka.cc new file mode 100644 index 0000000000000..76cbc8eb1bff3 --- /dev/null +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/PixelVertexProducerAlpaka.cc @@ -0,0 +1,128 @@ +#include + +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "DataFormats/Common/interface/Handle.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/ConsumesCollector.h" +#include "FWCore/Utilities/interface/StreamID.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "HeterogeneousCore/AlpakaCore/interface/module_backend_config.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDPutToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESGetToken.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/global/EDProducer.h" + +#include "DataFormats/TrackSoA/interface/alpaka/TrackSoACollection.h" +#include "DataFormats/TrackSoA/interface/TrackSoADevice.h" +#include "DataFormats/Vertex/interface/alpaka/ZVertexSoACollection.h" +#include "DataFormats/Vertex/interface/ZVertexSoADevice.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" + +#include "vertexFinder.h" + +#undef PIXVERTEX_DEBUG_PRODUCE + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using namespace cms::alpakatools; + + template + class PixelVertexProducerAlpaka : public global::EDProducer<> { + using TkSoADevice = TrackSoACollection; + using GPUAlgo = vertexFinder::Producer; + + public: + explicit PixelVertexProducerAlpaka(const edm::ParameterSet& iConfig); + ~PixelVertexProducerAlpaka() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + private: + void produceOnGPU(edm::StreamID streamID, // maybe even remove this and leave only produce? + device::Event& iEvent, + const device::EventSetup& iSetup) const; + void produce(edm::StreamID streamID, device::Event& iEvent, const device::EventSetup& iSetup) const override; + + device::EDGetToken tokenDeviceTrack_; + device::EDPutToken tokenDeviceVertex_; + + const GPUAlgo gpuAlgo_; + + // Tracking cuts before sending tracks to vertex algo + const float ptMin_; + const float ptMax_; + }; + + template + PixelVertexProducerAlpaka::PixelVertexProducerAlpaka(const edm::ParameterSet& conf) + : gpuAlgo_(conf.getParameter("oneKernel"), + conf.getParameter("useDensity"), + conf.getParameter("useDBSCAN"), + conf.getParameter("useIterative"), + conf.getParameter("doSplitting"), + conf.getParameter("minT"), + conf.getParameter("eps"), + conf.getParameter("errmax"), + conf.getParameter("chi2max")), + ptMin_(conf.getParameter("PtMin")), // 0.5 GeV + ptMax_(conf.getParameter("PtMax")) // 75. GeV + { + tokenDeviceTrack_ = consumes(conf.getParameter("pixelTrackSrc")); + tokenDeviceVertex_ = produces(); + } + + template + void PixelVertexProducerAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + // Only one of these three algos can be used at once. + // Maybe this should become a Plugin Factory + desc.add("oneKernel", true); + desc.add("useDensity", true); + desc.add("useDBSCAN", false); + desc.add("useIterative", false); + desc.add("doSplitting", true); + + desc.add("minT", 2); // min number of neighbours to be "core" + desc.add("eps", 0.07); // max absolute distance to cluster + desc.add("errmax", 0.01); // max error to be "seed" + desc.add("chi2max", 9.); // max normalized distance to cluster + + desc.add("PtMin", 0.5); + desc.add("PtMax", 75.); + desc.add("pixelTrackSrc", edm::InputTag("pixelTracksAlpaka")); + + descriptions.addWithDefaultLabel(desc); + } + + template + void PixelVertexProducerAlpaka::produceOnGPU(edm::StreamID streamID, + device::Event& iEvent, + const device::EventSetup& iSetup) const { + auto const& hTracks = iEvent.get(tokenDeviceTrack_); + + iEvent.emplace(tokenDeviceVertex_, gpuAlgo_.makeAsync(iEvent.queue(), hTracks.view(), ptMin_, ptMax_)); + } + + template + void PixelVertexProducerAlpaka::produce(edm::StreamID streamID, + device::Event& iEvent, + const device::EventSetup& iSetup) const { + produceOnGPU(streamID, iEvent, iSetup); + } + + using PixelVertexProducerAlpakaPhase1 = PixelVertexProducerAlpaka; + using PixelVertexProducerAlpakaPhase2 = PixelVertexProducerAlpaka; + using PixelVertexProducerAlpakaHIonPhase1 = PixelVertexProducerAlpaka; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +DEFINE_FWK_ALPAKA_MODULE(PixelVertexProducerAlpakaPhase1); +DEFINE_FWK_ALPAKA_MODULE(PixelVertexProducerAlpakaPhase2); +DEFINE_FWK_ALPAKA_MODULE(PixelVertexProducerAlpakaHIonPhase1); diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/PixelVertexWorkSpaceSoADeviceAlpaka.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/PixelVertexWorkSpaceSoADeviceAlpaka.h new file mode 100644 index 0000000000000..b9f23b4bfcd8b --- /dev/null +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/PixelVertexWorkSpaceSoADeviceAlpaka.h @@ -0,0 +1,31 @@ +#ifndef RecoPixelVertexing_PixelVertexFinding_PixelVertexWorkSpaceSoADevice_h +#define RecoPixelVertexing_PixelVertexFinding_PixelVertexWorkSpaceSoADevice_h +#include +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/Vertex/interface/alpaka/ZVertexUtilities.h" +#include "DataFormats/Vertex/interface/ZVertexDefinitions.h" +#include "../PixelVertexWorkSpaceLayout.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + template + class PixelVertexWorkSpaceSoADevice : public PortableCollection> { + public: + PixelVertexWorkSpaceSoADevice() = default; + + // Constructor which specifies the SoA size and Alpaka Queue + explicit PixelVertexWorkSpaceSoADevice(Queue queue) : PortableCollection>(S, queue) {} + + // Constructor which specifies the SoA size and alpaka device + // TODO: Needed? + explicit PixelVertexWorkSpaceSoADevice(Device const& device) + : PortableCollection>(S, device) {} + }; + namespace vertexFinder { + namespace workSpace { + using PixelVertexWorkSpaceSoADevice = PixelVertexWorkSpaceSoADevice<::zVertex::MAXTRACKS>; + } // namespace workSpace + } // namespace vertexFinder +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/PixelVertexWorkSpaceUtilitiesAlpaka.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/PixelVertexWorkSpaceUtilitiesAlpaka.h new file mode 100644 index 0000000000000..7cfda5ef8eb68 --- /dev/null +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/PixelVertexWorkSpaceUtilitiesAlpaka.h @@ -0,0 +1,22 @@ +#ifndef RecoTracker_PixelVertexFinding_PixelVertexWorkSpaceUtilitiesAlpaka_h +#define RecoTracker_PixelVertexFinding_PixelVertexWorkSpaceUtilitiesAlpaka_h + +#include +#include "RecoTracker/PixelVertexFinding/plugins/PixelVertexWorkSpaceLayout.h" + +// Methods that operate on View and ConstView of the PixelVertexWorkSpaceSoALayout. +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace vertexFinder { + namespace workSpace { + namespace utilities { + using namespace ::vertexFinder::workSpace; + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void init(PixelVertexWorkSpaceSoAView &workspace_view) { + workspace_view.ntrks() = 0; + workspace_view.nvIntermediate() = 0; + } + } // namespace utilities + } // namespace workSpace + } // namespace vertexFinder +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h new file mode 100644 index 0000000000000..d82ff5bfe6c47 --- /dev/null +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h @@ -0,0 +1,244 @@ +#ifndef RecoPixelVertexing_PixelVertexFinding_clusterTracksByDensityAlpaka_h +#define RecoPixelVertexing_PixelVertexFinding_clusterTracksByDensityAlpaka_h + +#include +#include +#include +#include +#include "DataFormats/Vertex/interface/ZVertexLayout.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" +#include "../PixelVertexWorkSpaceLayout.h" +#include "vertexFinder.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace vertexFinder { + using VtxSoAView = ::zVertex::ZVertexSoAView; + using WsSoAView = ::vertexFinder::workSpace::PixelVertexWorkSpaceSoAView; + // this algo does not really scale as it works in a single block... + // enough for <10K tracks we have + // + // based on Rodrighez&Laio algo + // + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline)) + clusterTracksByDensity(const TAcc& acc, + VtxSoAView& pdata, + WsSoAView& pws, + int minT, // min number of neighbours to be "seed" + float eps, // max absolute distance to cluster + float errmax, // max error to be "seed" + float chi2max // max normalized distance to cluster + ) { + using namespace vertexFinder; + constexpr bool verbose = false; // in principle the compiler should optmize out if false + const uint32_t threadIdxLocal(alpaka::getIdx(acc)[0u]); + + if (verbose && 0 == threadIdxLocal) + printf("params %d %f %f %f\n", minT, eps, errmax, chi2max); + + auto er2mx = errmax * errmax; + + auto& __restrict__ data = pdata; + auto& __restrict__ ws = pws; + auto nt = ws.ntrks(); + float const* __restrict__ zt = ws.zt(); + float const* __restrict__ ezt2 = ws.ezt2(); + + uint32_t& nvFinal = data.nvFinal(); + uint32_t& nvIntermediate = ws.nvIntermediate(); + + uint8_t* __restrict__ izt = ws.izt(); + int32_t* __restrict__ nn = data.ndof(); + int32_t* __restrict__ iv = ws.iv(); + + ALPAKA_ASSERT_OFFLOAD(zt); + ALPAKA_ASSERT_OFFLOAD(ezt2); + ALPAKA_ASSERT_OFFLOAD(izt); + ALPAKA_ASSERT_OFFLOAD(nn); + ALPAKA_ASSERT_OFFLOAD(iv); + + using Hist = cms::alpakatools::HistoContainer; + auto& hist = alpaka::declareSharedVar(acc); + auto& hws = alpaka::declareSharedVar(acc); + + for (auto j : cms::alpakatools::elements_with_stride(acc, Hist::totbins())) { + hist.off[j] = 0; + } + alpaka::syncBlockThreads(acc); + + if (verbose && 0 == threadIdxLocal) + printf("booked hist with %d bins, size %d for %d tracks\n", hist.totbins(), hist.capacity(), nt); + + ALPAKA_ASSERT_OFFLOAD(nt <= hist.capacity()); + + // fill hist (bin shall be wider than "eps") + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + ALPAKA_ASSERT_OFFLOAD(i < ::zVertex::MAXTRACKS); + int iz = int(zt[i] * 10.); // valid if eps<=0.1 + // iz = std::clamp(iz, INT8_MIN, INT8_MAX); // sorry c++17 only + iz = std::min(std::max(iz, INT8_MIN), INT8_MAX); + izt[i] = iz - INT8_MIN; + ALPAKA_ASSERT_OFFLOAD(iz - INT8_MIN >= 0); + ALPAKA_ASSERT_OFFLOAD(iz - INT8_MIN < 256); + hist.countHist(acc, izt[i]); + iv[i] = i; + nn[i] = 0; + } + alpaka::syncBlockThreads(acc); + if (threadIdxLocal < 32) + hws[threadIdxLocal] = 0; // used by prefix scan... + alpaka::syncBlockThreads(acc); + hist.finalize(acc, hws); + alpaka::syncBlockThreads(acc); + ALPAKA_ASSERT_OFFLOAD(hist.size() == nt); + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + hist.fillHist(acc, izt[i], uint16_t(i)); + } + alpaka::syncBlockThreads(acc); + // count neighbours + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + if (ezt2[i] > er2mx) + continue; + auto loop = [&](uint32_t j) { + if (i == j) + return; + auto dist = std::abs(zt[i] - zt[j]); + if (dist > eps) + return; + if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) + return; + nn[i]++; + }; + + cms::alpakatools::forEachInBins(hist, izt[i], 1, loop); + } + alpaka::syncBlockThreads(acc); + + // find closest above me .... (we ignore the possibility of two j at same distance from i) + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + float mdist = eps; + auto loop = [&](uint32_t j) { + if (nn[j] < nn[i]) + return; + if (nn[j] == nn[i] && zt[j] >= zt[i]) + return; // if equal use natural order... + auto dist = std::abs(zt[i] - zt[j]); + if (dist > mdist) + return; + if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) + return; // (break natural order???) + mdist = dist; + iv[i] = j; // assign to cluster (better be unique??) + }; + cms::alpakatools::forEachInBins(hist, izt[i], 1, loop); + } + alpaka::syncBlockThreads(acc); + +#ifdef GPU_DEBUG + // mini verification + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + if (iv[i] != int(i)) + ALPAKA_ASSERT_OFFLOAD(iv[iv[i]] != int(i)); + } + alpaka::syncBlockThreads(acc); +#endif + + // consolidate graph (percolate index of seed) + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + auto m = iv[i]; + while (m != iv[m]) + m = iv[m]; + iv[i] = m; + } + +#ifdef GPU_DEBUG + alpaka::syncBlockThreads(acc); + // mini verification + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + if (iv[i] != int(i)) + ALPAKA_ASSERT_OFFLOAD(iv[iv[i]] != int(i)); + } +#endif + +#ifdef GPU_DEBUG + // and verify that we did not spit any cluster... + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + auto minJ = i; + auto mdist = eps; + auto loop = [&](uint32_t j) { + if (nn[j] < nn[i]) + return; + if (nn[j] == nn[i] && zt[j] >= zt[i]) + return; // if equal use natural order... + auto dist = std::abs(zt[i] - zt[j]); + if (dist > mdist) + return; + if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) + return; + mdist = dist; + minJ = j; + }; + cms::alpakatools::forEachInBins(hist, izt[i], 1, loop); + // should belong to the same cluster... + ALPAKA_ASSERT_OFFLOAD(iv[i] == iv[minJ]); + ALPAKA_ASSERT_OFFLOAD(nn[i] <= nn[iv[i]]); + } + alpaka::syncBlockThreads(acc); +#endif + + auto& foundClusters = alpaka::declareSharedVar(acc); + foundClusters = 0; + alpaka::syncBlockThreads(acc); + + // find the number of different clusters, identified by a tracks with clus[i] == i and density larger than threshold; + // mark these tracks with a negative id. + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + if (iv[i] == int(i)) { + if (nn[i] >= minT) { + auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{}); + iv[i] = -(old + 1); + } else { // noise + iv[i] = -9998; + } + } + } + alpaka::syncBlockThreads(acc); + + ALPAKA_ASSERT_OFFLOAD(foundClusters < ::zVertex::MAXVTX); + + // propagate the negative id to all the tracks in the cluster. + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + if (iv[i] >= 0) { + // mark each track in a cluster with the same id as the first one + iv[i] = iv[iv[i]]; + } + } + alpaka::syncBlockThreads(acc); + + // adjust the cluster id to be a positive value starting from 0 + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + iv[i] = -iv[i] - 1; + } + + nvIntermediate = nvFinal = foundClusters; + if (verbose && 0 == threadIdxLocal) + printf("found %d proto vertices\n", foundClusters); + } + class clusterTracksByDensityKernel { + public: + template + ALPAKA_FN_ACC void operator()(const TAcc& acc, + VtxSoAView pdata, + WsSoAView pws, + int minT, // min number of neighbours to be "seed" + float eps, // max absolute distance to cluster + float errmax, // max error to be "seed" + float chi2max // max normalized distance to cluster + ) const { + clusterTracksByDensity(acc, pdata, pws, minT, eps, errmax, chi2max); + } + }; + } // namespace vertexFinder +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif // RecoPixelVertexing_PixelVertexFinding_clusterTracksByDensityAlpaka_h diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h new file mode 100644 index 0000000000000..1c1f9a4081107 --- /dev/null +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h @@ -0,0 +1,250 @@ +#ifndef RecoPixelVertexing_PixelVertexFinding_gpuClusterTracksDBSCAN_h +#define RecoPixelVertexing_PixelVertexFinding_gpuClusterTracksDBSCAN_h + +#include +#include +#include +#include +#include "DataFormats/Vertex/interface/ZVertexLayout.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" +#include "../PixelVertexWorkSpaceLayout.h" +#include "vertexFinder.h" +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace vertexFinder { + using VtxSoAView = ::zVertex::ZVertexSoAView; + using WsSoAView = ::vertexFinder::workSpace::PixelVertexWorkSpaceSoAView; + // this algo does not really scale as it works in a single block... + // enough for <10K tracks we have + class clusterTracksDBSCAN { + public: + template + ALPAKA_FN_ACC void operator()(const TAcc& acc, + VtxSoAView pdata, + WsSoAView pws, + int minT, // min number of neighbours to be "core" + float eps, // max absolute distance to cluster + float errmax, // max error to be "seed" + float chi2max // max normalized distance to cluster + ) const { + constexpr bool verbose = false; // in principle the compiler should optmize out if false + const uint32_t threadIdxLocal(alpaka::getIdx(acc)[0u]); + if (verbose && 0 == threadIdxLocal) + printf("params %d %f %f %f\n", minT, eps, errmax, chi2max); + + auto er2mx = errmax * errmax; + + auto& __restrict__ data = pdata; + auto& __restrict__ ws = pws; + auto nt = ws.ntrks(); + float const* __restrict__ zt = ws.zt(); + float const* __restrict__ ezt2 = ws.ezt2(); + + uint32_t& nvFinal = data.nvFinal(); + uint32_t& nvIntermediate = ws.nvIntermediate(); + + uint8_t* __restrict__ izt = ws.izt(); + int32_t* __restrict__ nn = data.ndof(); + int32_t* __restrict__ iv = ws.iv(); + + ALPAKA_ASSERT_OFFLOAD(zt); + ALPAKA_ASSERT_OFFLOAD(iv); + ALPAKA_ASSERT_OFFLOAD(nn); + ALPAKA_ASSERT_OFFLOAD(ezt2); + + using Hist = cms::alpakatools::HistoContainer; + auto& hist = alpaka::declareSharedVar(acc); + auto& hws = alpaka::declareSharedVar(acc); + + for (auto j : cms::alpakatools::elements_with_stride(acc, Hist::totbins())) { + hist.off[j] = 0; + } + alpaka::syncBlockThreads(acc); + + if (verbose && 0 == threadIdxLocal) + printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt); + + ALPAKA_ASSERT_OFFLOAD(nt <= hist.capacity()); + + // fill hist (bin shall be wider than "eps") + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + ALPAKA_ASSERT_OFFLOAD(i < ::zVertex::MAXTRACKS); + int iz = int(zt[i] * 10.); // valid if eps<=0.1 + iz = std::clamp(iz, INT8_MIN, INT8_MAX); + izt[i] = iz - INT8_MIN; + ALPAKA_ASSERT_OFFLOAD(iz - INT8_MIN >= 0); + ALPAKA_ASSERT_OFFLOAD(iz - INT8_MIN < 256); + hist.countHist(acc, izt[i]); + iv[i] = i; + nn[i] = 0; + } + alpaka::syncBlockThreads(acc); + if (threadIdxLocal < 32) + hws[threadIdxLocal] = 0; // used by prefix scan... + alpaka::syncBlockThreads(acc); + hist.finalize(acc, hws); + alpaka::syncBlockThreads(acc); + ALPAKA_ASSERT_OFFLOAD(hist.size() == nt); + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + hist.fillHist(acc, izt[i], uint32_t(i)); + } + alpaka::syncBlockThreads(acc); + + // count neighbours + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + if (ezt2[i] > er2mx) + continue; + auto loop = [&](uint32_t j) { + if (i == j) + return; + auto dist = std::abs(zt[i] - zt[j]); + if (dist > eps) + return; + // if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return; + nn[i]++; + }; + + cms::alpakatools::forEachInBins(hist, izt[i], 1, loop); + } + + alpaka::syncBlockThreads(acc); + + // find NN with smaller z... + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + if (nn[i] < minT) + continue; // DBSCAN core rule + float mz = zt[i]; + auto loop = [&](uint32_t j) { + if (zt[j] >= mz) + return; + if (nn[j] < minT) + return; // DBSCAN core rule + auto dist = std::abs(zt[i] - zt[j]); + if (dist > eps) + return; + // if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return; + mz = zt[j]; + iv[i] = j; // assign to cluster (better be unique??) + }; + cms::alpakatools::forEachInBins(hist, izt[i], 1, loop); + } + + alpaka::syncBlockThreads(acc); + +#ifdef GPU_DEBUG + // mini verification + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + if (iv[i] != int(i)) + ALPAKA_ASSERT_OFFLOAD(iv[iv[i]] != int(i)); + } + alpaka::syncBlockThreads(acc); +#endif + + // consolidate graph (percolate index of seed) + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + auto m = iv[i]; + while (m != iv[m]) + m = iv[m]; + iv[i] = m; + } + + alpaka::syncBlockThreads(acc); + +#ifdef GPU_DEBUG + // mini verification + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + if (iv[i] != int(i)) + ALPAKA_ASSERT_OFFLOAD(iv[iv[i]] != int(i)); + } + alpaka::syncBlockThreads(acc); +#endif + +#ifdef GPU_DEBUG + // and verify that we did not spit any cluster... + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + if (nn[i] < minT) + continue; // DBSCAN core rule + ALPAKA_ASSERT_OFFLOAD(zt[iv[i]] <= zt[i]); + auto loop = [&](uint32_t j) { + if (nn[j] < minT) + return; // DBSCAN core rule + auto dist = std::abs(zt[i] - zt[j]); + if (dist > eps) + return; + // if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return; + // they should belong to the same cluster, isn't it? + if (iv[i] != iv[j]) { + printf("ERROR %d %d %f %f %d\n", i, iv[i], zt[i], zt[iv[i]], iv[iv[i]]); + printf(" %d %d %f %f %d\n", j, iv[j], zt[j], zt[iv[j]], iv[iv[j]]); + ; + } + ALPAKA_ASSERT_OFFLOAD(iv[i] == iv[j]); + }; + cms::alpakatools::forEachInBins(hist, izt[i], 1, loop); + } + alpaka::syncBlockThreads(acc); +#endif + + // collect edges (assign to closest cluster of closest point??? here to closest point) + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + // if (nn[i]==0 || nn[i]>=minT) continue; // DBSCAN edge rule + if (nn[i] >= minT) + continue; // DBSCAN edge rule + float mdist = eps; + auto loop = [&](uint32_t j) { + if (nn[j] < minT) + return; // DBSCAN core rule + auto dist = std::abs(zt[i] - zt[j]); + if (dist > mdist) + return; + if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) + return; // needed? + mdist = dist; + iv[i] = iv[j]; // assign to cluster (better be unique??) + }; + cms::alpakatools::forEachInBins(hist, izt[i], 1, loop); + } + + auto& foundClusters = alpaka::declareSharedVar(acc); + foundClusters = 0; + alpaka::syncBlockThreads(acc); + + // find the number of different clusters, identified by a tracks with clus[i] == i; + // mark these tracks with a negative id. + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + if (iv[i] == int(i)) { + if (nn[i] >= minT) { + auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{}); + iv[i] = -(old + 1); + } else { // noise + iv[i] = -9998; + } + } + } + alpaka::syncBlockThreads(acc); + + ALPAKA_ASSERT_OFFLOAD(foundClusters < ::zVertex::MAXVTX); + + // propagate the negative id to all the tracks in the cluster. + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + if (iv[i] >= 0) { + // mark each track in a cluster with the same id as the first one + iv[i] = iv[iv[i]]; + } + } + alpaka::syncBlockThreads(acc); + + // adjust the cluster id to be a positive value starting from 0 + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + iv[i] = -iv[i] - 1; + } + + nvIntermediate = nvFinal = foundClusters; + + if (verbose && 0 == threadIdxLocal) + printf("found %d proto vertices\n", foundClusters); + } + }; + } // namespace vertexFinder +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif // RecoPixelVertexing_PixelVertexFinding_plugins_gpuClusterTracksDBSCAN_h diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h new file mode 100644 index 0000000000000..dedb5d9f0c572 --- /dev/null +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h @@ -0,0 +1,225 @@ +#ifndef RecoTracker_PixelVertexFinding_clusterTracksIterativeAlpaka_h +#define RecoTracker_PixelVertexFinding_clusterTracksIterativeAlpaka_h + +#include +#include +#include +#include +#include "DataFormats/Vertex/interface/alpaka/ZVertexUtilities.h" +#include "DataFormats/Vertex/interface/ZVertexDefinitions.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" +#include "RecoTracker/PixelVertexFinding/plugins/PixelVertexWorkSpaceLayout.h" +#include "vertexFinder.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace vertexFinder { + + // this algo does not really scale as it works in a single block... + // enough for <10K tracks we have + class clusterTracksIterative { + public: + template + ALPAKA_FN_ACC void operator()(const TAcc& acc, + VtxSoAView pdata, + WsSoAView pws, + int minT, // min number of neighbours to be "core" + float eps, // max absolute distance to cluster + float errmax, // max error to be "seed" + float chi2max // max normalized distance to cluster + ) const { + constexpr bool verbose = false; // in principle the compiler should optmize out if false + const uint32_t threadIdxLocal(alpaka::getIdx(acc)[0u]); + if (verbose && 0 == threadIdxLocal) + printf("params %d %f %f %f\n", minT, eps, errmax, chi2max); + + auto er2mx = errmax * errmax; + + auto& __restrict__ data = pdata; + auto& __restrict__ ws = pws; + auto nt = ws.ntrks(); + float const* __restrict__ zt = ws.zt(); + float const* __restrict__ ezt2 = ws.ezt2(); + + uint32_t& nvFinal = data.nvFinal(); + uint32_t& nvIntermediate = ws.nvIntermediate(); + + uint8_t* __restrict__ izt = ws.izt(); + int32_t* __restrict__ nn = data.ndof(); + int32_t* __restrict__ iv = ws.iv(); + + ALPAKA_ASSERT_OFFLOAD(zt); + ALPAKA_ASSERT_OFFLOAD(nn); + ALPAKA_ASSERT_OFFLOAD(iv); + ALPAKA_ASSERT_OFFLOAD(ezt2); + + using Hist = cms::alpakatools::HistoContainer; + auto& hist = alpaka::declareSharedVar(acc); + auto& hws = alpaka::declareSharedVar(acc); + + for (auto j : cms::alpakatools::elements_with_stride(acc, Hist::totbins())) { + hist.off[j] = 0; + } + alpaka::syncBlockThreads(acc); + + if (verbose && 0 == threadIdxLocal) + printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt); + + ALPAKA_ASSERT_OFFLOAD(nt <= hist.capacity()); + + // fill hist (bin shall be wider than "eps") + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + ALPAKA_ASSERT_OFFLOAD(i < ::zVertex::MAXTRACKS); + int iz = int(zt[i] * 10.); // valid if eps<=0.1 + iz = std::clamp(iz, INT8_MIN, INT8_MAX); + izt[i] = iz - INT8_MIN; + ALPAKA_ASSERT_OFFLOAD(iz - INT8_MIN >= 0); + ALPAKA_ASSERT_OFFLOAD(iz - INT8_MIN < 256); + hist.countHist(acc, izt[i]); + iv[i] = i; + nn[i] = 0; + } + alpaka::syncBlockThreads(acc); + + if (threadIdxLocal < 32) + hws[threadIdxLocal] = 0; // used by prefix scan... + alpaka::syncBlockThreads(acc); + + hist.finalize(acc, hws); + alpaka::syncBlockThreads(acc); + + ALPAKA_ASSERT_OFFLOAD(hist.size() == nt); + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + hist.fillHist(acc, izt[i], uint16_t(i)); + } + alpaka::syncBlockThreads(acc); + + // count neighbours + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + if (ezt2[i] > er2mx) + continue; + auto loop = [&](uint32_t j) { + if (i == j) + return; + auto dist = std::abs(zt[i] - zt[j]); + if (dist > eps) + return; + if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) + return; + nn[i]++; + }; + + cms::alpakatools::forEachInBins(hist, izt[i], 1, loop); + } + + auto& nloops = alpaka::declareSharedVar(acc); + nloops = 0; + + alpaka::syncBlockThreads(acc); + + // cluster seeds only + bool more = true; + while (alpaka::syncBlockThreadsPredicate(acc, more)) { + if (1 == nloops % 2) { + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + auto m = iv[i]; + while (m != iv[m]) + m = iv[m]; + iv[i] = m; + } + } else { + more = false; + for (auto k : cms::alpakatools::elements_with_stride(acc, hist.size())) { + auto p = hist.begin() + k; + auto i = (*p); + auto be = std::min(Hist::bin(izt[i]) + 1, int(hist.nbins() - 1)); + if (nn[i] < minT) + continue; // DBSCAN core rule + auto loop = [&](uint32_t j) { + ALPAKA_ASSERT_OFFLOAD(i != j); + if (nn[j] < minT) + return; // DBSCAN core rule + auto dist = std::abs(zt[i] - zt[j]); + if (dist > eps) + return; + if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) + return; + auto old = alpaka::atomicMin(acc, &iv[j], iv[i], alpaka::hierarchy::Blocks{}); + if (old != iv[i]) { + // end the loop only if no changes were applied + more = true; + } + alpaka::atomicMin(acc, &iv[i], old, alpaka::hierarchy::Blocks{}); + }; + ++p; + for (; p < hist.end(be); ++p) + loop(*p); + } // for i + } + if (threadIdxLocal == 0) + ++nloops; + } // while + + // collect edges (assign to closest cluster of closest point??? here to closest point) + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + // if (nn[i]==0 || nn[i]>=minT) continue; // DBSCAN edge rule + if (nn[i] >= minT) + continue; // DBSCAN edge rule + float mdist = eps; + auto loop = [&](int j) { + if (nn[j] < minT) + return; // DBSCAN core rule + auto dist = std::abs(zt[i] - zt[j]); + if (dist > mdist) + return; + if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) + return; // needed? + mdist = dist; + iv[i] = iv[j]; // assign to cluster (better be unique??) + }; + cms::alpakatools::forEachInBins(hist, izt[i], 1, loop); + } + + auto& foundClusters = alpaka::declareSharedVar(acc); + foundClusters = 0; + alpaka::syncBlockThreads(acc); + + // find the number of different clusters, identified by a tracks with clus[i] == i; + // mark these tracks with a negative id. + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + if (iv[i] == int(i)) { + if (nn[i] >= minT) { + auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{}); + iv[i] = -(old + 1); + } else { // noise + iv[i] = -9998; + } + } + } + alpaka::syncBlockThreads(acc); + + ALPAKA_ASSERT_OFFLOAD(foundClusters < ::zVertex::MAXVTX); + + // propagate the negative id to all the tracks in the cluster. + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + if (iv[i] >= 0) { + // mark each track in a cluster with the same id as the first one + iv[i] = iv[iv[i]]; + } + } + alpaka::syncBlockThreads(acc); + + // adjust the cluster id to be a positive value starting from 0 + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + iv[i] = -iv[i] - 1; + } + + nvIntermediate = nvFinal = foundClusters; + + if (verbose && 0 == threadIdxLocal) + printf("found %d proto vertices\n", foundClusters); + } + }; + } // namespace vertexFinder +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif // RecoTracker_PixelVertexFinding_plugins_clusterTracksIterativeAlpaka_h diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h new file mode 100644 index 0000000000000..5ffe83f1ea3e3 --- /dev/null +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h @@ -0,0 +1,121 @@ +#ifndef RecoPixelVertexing_PixelVertexFinding_gpuFitVertices_h +#define RecoPixelVertexing_PixelVertexFinding_gpuFitVertices_h + +#include +#include +#include +#include +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" + +#include "vertexFinder.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace vertexFinder { + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) void fitVertices( + const TAcc& acc, + VtxSoAView& pdata, + WsSoAView& pws, + float chi2Max // for outlier rejection + ) { + constexpr bool verbose = false; // in principle the compiler should optmize out if false + + auto& __restrict__ data = pdata; + auto& __restrict__ ws = pws; + auto nt = ws.ntrks(); + float const* __restrict__ zt = ws.zt(); + float const* __restrict__ ezt2 = ws.ezt2(); + float* __restrict__ zv = data.zv(); + float* __restrict__ wv = data.wv(); + float* __restrict__ chi2 = data.chi2(); + uint32_t& nvFinal = data.nvFinal(); + uint32_t& nvIntermediate = ws.nvIntermediate(); + + int32_t* __restrict__ nn = data.ndof(); + int32_t* __restrict__ iv = ws.iv(); + + ALPAKA_ASSERT_OFFLOAD(nvFinal <= nvIntermediate); + nvFinal = nvIntermediate; + auto foundClusters = nvFinal; + + // zero + for (auto i : cms::alpakatools::elements_with_stride(acc, foundClusters)) { + zv[i] = 0; + wv[i] = 0; + chi2[i] = 0; + } + + // only for test + auto& noise = alpaka::declareSharedVar(acc); + const uint32_t threadIdxLocal(alpaka::getIdx(acc)[0u]); + if (verbose && 0 == threadIdxLocal) + noise = 0; + + alpaka::syncBlockThreads(acc); + + // compute cluster location + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + if (iv[i] > 9990) { + if (verbose) + alpaka::atomicAdd(acc, &noise, 1, alpaka::hierarchy::Threads{}); + continue; + } + ALPAKA_ASSERT_OFFLOAD(iv[i] >= 0); + ALPAKA_ASSERT_OFFLOAD(iv[i] < int(foundClusters)); + auto w = 1.f / ezt2[i]; + alpaka::atomicAdd(acc, &zv[iv[i]], zt[i] * w, alpaka::hierarchy::Threads{}); + alpaka::atomicAdd(acc, &wv[iv[i]], w, alpaka::hierarchy::Threads{}); + } + + alpaka::syncBlockThreads(acc); + // reuse nn + for (auto i : cms::alpakatools::elements_with_stride(acc, foundClusters)) { + ALPAKA_ASSERT_OFFLOAD(wv[i] > 0.f); + zv[i] /= wv[i]; + nn[i] = -1; // ndof + } + alpaka::syncBlockThreads(acc); + + // compute chi2 + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + if (iv[i] > 9990) + continue; + + auto c2 = zv[iv[i]] - zt[i]; + c2 *= c2 / ezt2[i]; + if (c2 > chi2Max) { + iv[i] = 9999; + continue; + } + alpaka::atomicAdd(acc, &chi2[iv[i]], c2, alpaka::hierarchy::Blocks{}); + alpaka::atomicAdd(acc, &nn[iv[i]], 1, alpaka::hierarchy::Blocks{}); + } + alpaka::syncBlockThreads(acc); + + for (auto i : cms::alpakatools::elements_with_stride(acc, foundClusters)) { + if (nn[i] > 0) { + wv[i] *= float(nn[i]) / chi2[i]; + } + } + + if (verbose && 0 == threadIdxLocal) + printf("found %d proto clusters ", foundClusters); + if (verbose && 0 == threadIdxLocal) + printf("and %d noise\n", noise); + } + + class fitVerticesKernel { + public: + template + ALPAKA_FN_ACC void operator()(const TAcc& acc, + VtxSoAView pdata, + WsSoAView pws, + float chi2Max // for outlier rejection + ) const { + fitVertices(acc, pdata, pws, chi2Max); + } + }; + } // namespace vertexFinder +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif // RecoPixelVertexing_PixelVertexFinding_plugins_gpuFitVertices_h diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/sortByPt2.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/sortByPt2.h new file mode 100644 index 0000000000000..8ce44a2f49591 --- /dev/null +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/sortByPt2.h @@ -0,0 +1,79 @@ +#ifndef RecoPixelVertexing_PixelVertexFinding_sortByPt2_h +#define RecoPixelVertexing_PixelVertexFinding_sortByPt2_h + +#include +#include +#include +#include +#include +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/radixSort.h" +#include "DataFormats/Vertex/interface/ZVertexLayout.h" +#include "RecoTracker/PixelVertexFinding/plugins/PixelVertexWorkSpaceLayout.h" + +#include "vertexFinder.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace vertexFinder { + using VtxSoAView = ::zVertex::ZVertexSoAView; + using WsSoAView = ::vertexFinder::workSpace::PixelVertexWorkSpaceSoAView; + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void sortByPt2(const TAcc& acc, VtxSoAView& data, WsSoAView& ws) { + auto nt = ws.ntrks(); + float const* __restrict__ ptt2 = ws.ptt2(); + uint32_t const& nvFinal = data.nvFinal(); + + int32_t const* __restrict__ iv = ws.iv(); + float* __restrict__ ptv2 = data.ptv2(); + uint16_t* __restrict__ sortInd = data.sortInd(); + + if (nvFinal < 1) + return; + + // fill indexing + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + data.idv()[ws.itrk()[i]] = iv[i]; + }; + + // can be done asynchronously at the end of previous event + for (auto i : cms::alpakatools::elements_with_stride(acc, nvFinal)) { + ptv2[i] = 0; + }; + alpaka::syncBlockThreads(acc); + + for (auto i : cms::alpakatools::elements_with_stride(acc, nt)) { + if (iv[i] <= 9990) { + alpaka::atomicAdd(acc, &ptv2[iv[i]], ptt2[i], alpaka::hierarchy::Blocks{}); + } + }; + alpaka::syncBlockThreads(acc); + + const uint32_t threadIdxLocal(alpaka::getIdx(acc)[0u]); + if (1 == nvFinal) { + if (threadIdxLocal == 0) + sortInd[0] = 0; + return; + } +#ifndef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + auto& sws = alpaka::declareSharedVar(acc); + // sort using only 16 bits + cms::alpakatools::radixSort(acc, ptv2, sortInd, sws, nvFinal); +#else + for (uint16_t i = 0; i < nvFinal; ++i) + sortInd[i] = i; + std::sort(sortInd, sortInd + nvFinal, [&](auto i, auto j) { return ptv2[i] < ptv2[j]; }); +#endif + } + + class sortByPt2Kernel { + public: + template + ALPAKA_FN_ACC void operator()(const TAcc& acc, VtxSoAView pdata, WsSoAView pws) const { + sortByPt2(acc, pdata, pws); + } + }; + } // namespace vertexFinder +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif // RecoPixelVertexing_PixelVertexFinding_sortByPt2_h diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h new file mode 100644 index 0000000000000..f393af552e2dc --- /dev/null +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h @@ -0,0 +1,169 @@ +#ifndef RecoPixelVertexing_PixelVertexFinding_splitVertices_h +#define RecoPixelVertexing_PixelVertexFinding_splitVertices_h + +#include +#include +#include +#include +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" + +#include "vertexFinder.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace vertexFinder { + using VtxSoAView = ::zVertex::ZVertexSoAView; + using WsSoAView = ::vertexFinder::workSpace::PixelVertexWorkSpaceSoAView; + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) void splitVertices(const TAcc& acc, + VtxSoAView& pdata, + WsSoAView& pws, + float maxChi2) { + constexpr bool verbose = false; // in principle the compiler should optmize out if false + const uint32_t threadIdxLocal(alpaka::getIdx(acc)[0u]); + + auto& __restrict__ data = pdata; + auto& __restrict__ ws = pws; + auto nt = ws.ntrks(); + float const* __restrict__ zt = ws.zt(); + float const* __restrict__ ezt2 = ws.ezt2(); + float* __restrict__ zv = data.zv(); + float* __restrict__ wv = data.wv(); + float const* __restrict__ chi2 = data.chi2(); + uint32_t& nvFinal = data.nvFinal(); + + int32_t const* __restrict__ nn = data.ndof(); + int32_t* __restrict__ iv = ws.iv(); + + ALPAKA_ASSERT_OFFLOAD(zt); + ALPAKA_ASSERT_OFFLOAD(wv); + ALPAKA_ASSERT_OFFLOAD(chi2); + ALPAKA_ASSERT_OFFLOAD(nn); + + constexpr uint32_t MAXTK = 512; + +#ifndef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + auto& it = alpaka::declareSharedVar(acc); // track index + auto& zz = alpaka::declareSharedVar(acc); // z pos + auto& newV = alpaka::declareSharedVar(acc); // 0 or 1 + auto& ww = alpaka::declareSharedVar(acc); // z weight +#else + uint32_t it[MAXTK]; + float zz[MAXTK]; + uint8_t newV[MAXTK]; + float ww[MAXTK]; +#endif + auto& nq = alpaka::declareSharedVar(acc); // number of track for this vertex + + const uint32_t blockIdx(alpaka::getIdx(acc)[0u]); + const uint32_t gridDimension(alpaka::getWorkDiv(acc)[0u]); + + // one vertex per block + for (auto kv = blockIdx; kv < nvFinal; kv += gridDimension) { + if (nn[kv] < 4) + continue; + if (chi2[kv] < maxChi2 * float(nn[kv])) + continue; + + ALPAKA_ASSERT_OFFLOAD(nn[kv] < int32_t(MAXTK)); + + if ((uint32_t)nn[kv] >= MAXTK) + continue; // too bad FIXME + + nq = 0u; + alpaka::syncBlockThreads(acc); + + // copy to local + for (auto k : cms::alpakatools::elements_with_stride(acc, nt)) { + if (iv[k] == int(kv)) { + auto old = alpaka::atomicInc(acc, &nq, MAXTK, alpaka::hierarchy::Threads{}); + zz[old] = zt[k] - zv[kv]; + newV[old] = zz[old] < 0 ? 0 : 1; + ww[old] = 1.f / ezt2[k]; + it[old] = k; + } + } + + // the new vertices + auto& znew = alpaka::declareSharedVar(acc); + auto& wnew = alpaka::declareSharedVar(acc); + alpaka::syncBlockThreads(acc); + + ALPAKA_ASSERT_OFFLOAD(int(nq) == nn[kv] + 1); + + int maxiter = 20; + // kt-min.... + bool more = true; + while (alpaka::syncBlockThreadsPredicate(acc, more)) { + more = false; + if (0 == threadIdxLocal) { + znew[0] = 0; + znew[1] = 0; + wnew[0] = 0; + wnew[1] = 0; + } + alpaka::syncBlockThreads(acc); + + for (auto k : cms::alpakatools::elements_with_stride(acc, nq)) { + auto i = newV[k]; + alpaka::atomicAdd(acc, &znew[i], zz[k] * ww[k], alpaka::hierarchy::Threads{}); + alpaka::atomicAdd(acc, &wnew[i], ww[k], alpaka::hierarchy::Threads{}); + } + alpaka::syncBlockThreads(acc); + + if (0 == threadIdxLocal) { + znew[0] /= wnew[0]; + znew[1] /= wnew[1]; + } + alpaka::syncBlockThreads(acc); + + for (auto k : cms::alpakatools::elements_with_stride(acc, nq)) { + auto d0 = fabs(zz[k] - znew[0]); + auto d1 = fabs(zz[k] - znew[1]); + auto newer = d0 < d1 ? 0 : 1; + more |= newer != newV[k]; + newV[k] = newer; + } + --maxiter; + if (maxiter <= 0) + more = false; + } + + // avoid empty vertices + if (0 == wnew[0] || 0 == wnew[1]) + continue; + + // quality cut + auto dist2 = (znew[0] - znew[1]) * (znew[0] - znew[1]); + + auto chi2Dist = dist2 / (1.f / wnew[0] + 1.f / wnew[1]); + + if (verbose && 0 == threadIdxLocal) + printf("inter %d %f %f\n", 20 - maxiter, chi2Dist, dist2 * wv[kv]); + + if (chi2Dist < 4) + continue; + + // get a new global vertex + auto& igv = alpaka::declareSharedVar(acc); + if (0 == threadIdxLocal) + igv = alpaka::atomicAdd(acc, &ws.nvIntermediate(), 1u, alpaka::hierarchy::Blocks{}); + alpaka::syncBlockThreads(acc); + for (auto k : cms::alpakatools::elements_with_stride(acc, nq)) { + if (1 == newV[k]) + iv[it[k]] = igv; + } + + } // loop on vertices + } + + class splitVerticesKernel { + public: + template + ALPAKA_FN_ACC void operator()(const TAcc& acc, VtxSoAView pdata, WsSoAView pws, float maxChi2) const { + splitVertices(acc, pdata, pws, maxChi2); + } + }; + } // namespace vertexFinder +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif // RecoPixelVertexing_PixelVertexFinding_plugins_splitVertices.h diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc b/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc new file mode 100644 index 0000000000000..9303b04586deb --- /dev/null +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc @@ -0,0 +1,204 @@ +#include +#include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h" +#include "DataFormats/Vertex/interface/alpaka/ZVertexUtilities.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" +#include "PixelVertexWorkSpaceUtilitiesAlpaka.h" +#include "PixelVertexWorkSpaceSoADeviceAlpaka.h" + +#include "vertexFinder.h" +#include "vertexFinder.h" +#include "clusterTracksDBSCAN.h" +#include "clusterTracksIterative.h" +#include "clusterTracksByDensity.h" +#include "fitVertices.h" +#include "sortByPt2.h" +#include "splitVertices.h" + +#undef PIXVERTEX_DEBUG_PRODUCE +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace vertexFinder { + using namespace cms::alpakatools; + // reject outlier tracks that contribute more than this to the chi2 of the vertex fit + constexpr float maxChi2ForFirstFit = 50.f; + constexpr float maxChi2ForFinalFit = 5000.f; + + // split vertices with a chi2/NDoF greater than this + constexpr float maxChi2ForSplit = 9.f; + + template + class loadTracks { + public: + template >> + ALPAKA_FN_ACC void operator()(const TAcc& acc, + TrackSoAConstView tracks_view, + VtxSoAView soa, + WsSoAView pws, + float ptMin, + float ptMax) const { + auto const* quality = tracks_view.quality(); + using helper = TracksUtilities; + + for (auto idx : cms::alpakatools::elements_with_stride(acc, tracks_view.nTracks())) { + // TODO: since nHits is not used anywhere else it gives of an unused variable warning. Check! + // auto nHits = helper::nHits(tracks_view, idx); + // ALPAKA_ASSERT_OFFLOAD(nHits >= 3); + + // initialize soa... + soa[idx].idv() = -1; + + if (helper::isTriplet(tracks_view, idx)) + continue; // no triplets + if (quality[idx] < ::pixelTrack::Quality::highPurity) + continue; + + auto pt = tracks_view[idx].pt(); + + if (pt < ptMin) + continue; + + // clamp pt + pt = std::min(pt, ptMax); + + auto& data = pws; + auto it = alpaka::atomicAdd(acc, &data.ntrks(), 1u, alpaka::hierarchy::Blocks{}); + data[it].itrk() = idx; + data[it].zt() = helper::zip(tracks_view, idx); + data[it].ezt2() = tracks_view[idx].covariance()(14); + data[it].ptt2() = pt * pt; + } + } + }; +// #define THREE_KERNELS +#ifndef THREE_KERNELS + class vertexFinderOneKernel { //FIXME: CUDA do not have the split here + public: + template >> + ALPAKA_FN_ACC void operator()(const TAcc& acc, + VtxSoAView pdata, + WsSoAView pws, + bool doSplit, + int minT, // min number of neighbours to be "seed" + float eps, // max absolute distance to cluster + float errmax, // max error to be "seed" + float chi2max // max normalized distance to cluster, + ) const { + clusterTracksByDensity(acc, pdata, pws, minT, eps, errmax, chi2max); + alpaka::syncBlockThreads(acc); + fitVertices(acc, pdata, pws, maxChi2ForFirstFit); + alpaka::syncBlockThreads(acc); + if (doSplit) + { + splitVertices(acc, pdata, pws, maxChi2ForSplit); + alpaka::syncBlockThreads(acc); + fitVertices(acc, pdata, pws, maxChi2ForFinalFit); + alpaka::syncBlockThreads(acc); + } + sortByPt2(acc, pdata, pws); + } + }; +#else + class vertexFinderOneKernel1 { + public: + template >> + ALPAKA_FN_ACC void operator()(const TAcc& acc, + VtxSoAView pdata, + WsSoAView pws, + int minT, // min number of neighbours to be "seed" + float eps, // max absolute distance to cluster + float errmax, // max error to be "seed" + float chi2max // max normalized distance to cluster, + ) const { + clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max); + alpaka::syncBlockThreads(acc); + fitVertices(pdata, pws, maxChi2ForFirstFit); + } + }; + class vertexFinderKernel2 { + public: + template >> + ALPAKA_FN_ACC void operator()(const TAcc& acc, VtxSoAView pdata, WsSoAView pws) const { + fitVertices(pdata, pws, maxChi2ForFinalFit); + alpaka::syncBlockThreads(acc); + sortByPt2(pdata, pws); + } + }; +#endif + + template + ZVertexCollection Producer::makeAsync(Queue& queue, + const TrackSoAConstView& tracks_view, + float ptMin, + float ptMax) const { +#ifdef PIXVERTEX_DEBUG_PRODUCE + std::cout << "producing Vertices on GPU" << std::endl; +#endif // PIXVERTEX_DEBUG_PRODUCE + ZVertexCollection vertices(queue); + + auto soa = vertices.view(); + + // ALPAKA_ASSERT_OFFLOAD(vertices.buffer()); + + auto ws_d = workSpace::PixelVertexWorkSpaceSoADevice(queue); + + // Initialize + const auto initWorkDiv = cms::alpakatools::make_workdiv(1, 1); + alpaka::exec(queue, initWorkDiv, init{}, soa, ws_d.view()); + + // Load Tracks + const uint32_t blockSize = 128; + const uint32_t numberOfBlocks = + cms::alpakatools::divide_up_by(tracks_view.metadata().size() + blockSize - 1, blockSize); + const auto loadTracksWorkDiv = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + alpaka::exec( + queue, loadTracksWorkDiv, loadTracks{}, tracks_view, soa, ws_d.view(), ptMin, ptMax); + + // Running too many thread lead to problems when printf is enabled. + const auto finderSorterWorkDiv = cms::alpakatools::make_workdiv(1, 1024 - 128); + const auto splitterFitterWorkDiv = cms::alpakatools::make_workdiv(1024, 128); + + if (oneKernel_) { + // implemented only for density clustesrs +#ifndef THREE_KERNELS + alpaka::exec( + queue, finderSorterWorkDiv, vertexFinderOneKernel{}, soa, ws_d.view(), doSplitting_, minT, eps, errmax, chi2max); +#else + alpaka::exec( + queue, finderSorterWorkDiv, vertexFinderOneKernel{}, soa, ws_d.view(), minT, eps, errmax, chi2max); + + // one block per vertex... + if (doSplitting_) //FIXME: CUDA doesn't have this + alpaka::exec(queue, splitterFitterWorkDiv, splitVerticesKernel{}, soa, ws_d.view(), maxChi2ForSplit); + alpaka::exec(queue, finderSorterWorkDiv{}, soa, ws_d.view()); +#endif + } else { // five kernels + if (useDensity_) { + alpaka::exec( + queue, finderSorterWorkDiv, clusterTracksByDensityKernel{}, soa, ws_d.view(), minT, eps, errmax, chi2max); + + } else if (useDBSCAN_) { + alpaka::exec( + queue, finderSorterWorkDiv, clusterTracksDBSCAN{}, soa, ws_d.view(), minT, eps, errmax, chi2max); + } else if (useIterative_) { + alpaka::exec( + queue, finderSorterWorkDiv, clusterTracksIterative{}, soa, ws_d.view(), minT, eps, errmax, chi2max); + } + alpaka::exec(queue, finderSorterWorkDiv, fitVerticesKernel{}, soa, ws_d.view(), maxChi2ForFirstFit); + + // one block per vertex... + if (doSplitting_) { + alpaka::exec(queue, splitterFitterWorkDiv, splitVerticesKernel{}, soa, ws_d.view(), maxChi2ForSplit); + + alpaka::exec(queue, finderSorterWorkDiv, fitVerticesKernel{}, soa, ws_d.view(), maxChi2ForFinalFit); + } + alpaka::exec(queue, finderSorterWorkDiv, sortByPt2Kernel{}, soa, ws_d.view()); + } + + return vertices; + } + + template class Producer; + template class Producer; + template class Producer; + } // namespace vertexFinder +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.h new file mode 100644 index 0000000000000..a546225410099 --- /dev/null +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.h @@ -0,0 +1,77 @@ +#ifndef RecoPixelVertexing_PixelVertexFinding_vertexFinder_h +#define RecoPixelVertexing_PixelVertexFinding_vertexFinder_h + +#include +#include +#include +#include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h" +#include "DataFormats/Vertex/interface/ZVertexSoAHost.h" +#include "DataFormats/Vertex/interface/ZVertexLayout.h" +#include "DataFormats/Vertex/interface/alpaka/ZVertexSoACollection.h" +#include "DataFormats/Vertex/interface/ZVertexSoADevice.h" +#include "DataFormats/Vertex/interface/alpaka/ZVertexUtilities.h" +#include "../PixelVertexWorkSpaceLayout.h" +#include "PixelVertexWorkSpaceUtilitiesAlpaka.h" +#include "PixelVertexWorkSpaceSoADeviceAlpaka.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace vertexFinder { + using namespace cms::alpakatools; + using VtxSoAView = ::zVertex::ZVertexSoAView; + using WsSoAView = ::vertexFinder::workSpace::PixelVertexWorkSpaceSoAView; + + class init { + public: + template >> + ALPAKA_FN_ACC void operator()(const TAcc &acc, VtxSoAView pdata, WsSoAView pws) const { + pdata.nvFinal() = 0; // initialization + vertexFinder::workSpace::utilities::init(pws); + } + }; + + template + class Producer { + using TkSoAConstView = TrackSoAConstView; + + public: + Producer(bool oneKernel, + bool useDensity, + bool useDBSCAN, + bool useIterative, + bool doSplitting, + int iminT, // min number of neighbours to be "core" + float ieps, // max absolute distance to cluster + float ierrmax, // max error to be "seed" + float ichi2max // max normalized distance to cluster + ) + : oneKernel_(oneKernel && !(useDBSCAN || useIterative)), + useDensity_(useDensity), + useDBSCAN_(useDBSCAN), + useIterative_(useIterative), + doSplitting_(doSplitting), + minT(iminT), + eps(ieps), + errmax(ierrmax), + chi2max(ichi2max) {} + + ~Producer() = default; + + ZVertexCollection makeAsync(Queue &queue, const TkSoAConstView &tracks_view, float ptMin, float ptMax) const; + + private: + const bool oneKernel_; // run everything (cluster,fit,split,sort) in one kernel. Uses only density clusterizer + const bool useDensity_; // use density clusterizer + const bool useDBSCAN_; // use DBScan clusterizer + const bool useIterative_; // use iterative clusterizer + const bool doSplitting_; //run vertex splitting + + int minT; // min number of neighbours to be "core" + float eps; // max absolute distance to cluster + float errmax; // max error to be "seed" + float chi2max; // max normalized distance to cluster + }; + + } // namespace vertexFinder +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif diff --git a/RecoTracker/PixelVertexFinding/test/BuildFile.xml b/RecoTracker/PixelVertexFinding/test/BuildFile.xml index 9343f00f9a027..1f20bf69e6300 100644 --- a/RecoTracker/PixelVertexFinding/test/BuildFile.xml +++ b/RecoTracker/PixelVertexFinding/test/BuildFile.xml @@ -21,24 +21,57 @@ - + - + - + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/RecoTracker/PixelVertexFinding/test/alpaka/VertexFinder_t.cc b/RecoTracker/PixelVertexFinding/test/alpaka/VertexFinder_t.cc new file mode 100644 index 0000000000000..d039d24878947 --- /dev/null +++ b/RecoTracker/PixelVertexFinding/test/alpaka/VertexFinder_t.cc @@ -0,0 +1,34 @@ +#include +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +#include "DataFormats/Vertex/interface/alpaka/ZVertexUtilities.h" +#include "DataFormats/Vertex/interface/ZVertexSoAHost.h" +#include "DataFormats/Vertex/interface/alpaka/ZVertexSoACollection.h" +#include "DataFormats/Vertex/interface/ZVertexSoADevice.h" + +#include "RecoTracker/PixelVertexFinding/plugins/PixelVertexWorkSpaceLayout.h" +#include "RecoTracker/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHostAlpaka.h" +#include "RecoTracker/PixelVertexFinding/plugins/alpaka/PixelVertexWorkSpaceSoADeviceAlpaka.h" + +using namespace std; +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + namespace vertexfinder_t { + void runKernels(Queue& queue); + } + +}; // namespace ALPAKA_ACCELERATOR_NAMESPACE + +int main() { + const auto host = cms::alpakatools::host(); + const auto device = cms::alpakatools::devices()[0]; + Queue queue(device); + + vertexfinder_t::runKernels(queue); + return 0; +} diff --git a/RecoTracker/PixelVertexFinding/test/alpaka/VertexFinder_t.dev.cc b/RecoTracker/PixelVertexFinding/test/alpaka/VertexFinder_t.dev.cc new file mode 100644 index 0000000000000..b2f3a32c8a936 --- /dev/null +++ b/RecoTracker/PixelVertexFinding/test/alpaka/VertexFinder_t.dev.cc @@ -0,0 +1,284 @@ +#include +#include +#include +#include +#include +#include +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +// TrackUtilities only included in order to compile SoALayout with Eigen columns +#include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h" +#ifdef USE_DBSCAN +#include "RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h" +#define CLUSTERIZE ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::clusterTracksDBSCAN +#elif USE_ITERATIVE +#include "RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterativeAlpaka.h" +#define CLUSTERIZE ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::clusterTracksIterative +#else +#include "RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h" +#define CLUSTERIZE ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::clusterTracksByDensityKernel +#endif +#include "RecoTracker/PixelVertexFinding/plugins/alpaka/PixelVertexWorkSpaceUtilitiesAlpaka.h" +#include "RecoTracker/PixelVertexFinding/plugins/PixelVertexWorkSpaceLayout.h" +#include "RecoTracker/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHostAlpaka.h" +#include "RecoTracker/PixelVertexFinding/plugins/alpaka/PixelVertexWorkSpaceSoADeviceAlpaka.h" + +#include "RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h" +#include "RecoTracker/PixelVertexFinding/plugins/alpaka/sortByPt2.h" +#include "RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h" +#include "RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + using namespace cms::alpakatools; + + using WSSoAHost = ::vertexFinder::workSpace::PixelVertexWorkSpaceSoAHost; + + struct ClusterGenerator { + explicit ClusterGenerator(float nvert, float ntrack) + : rgen(-13., 13), errgen(0.005, 0.025), clusGen(nvert), trackGen(ntrack), gauss(0., 1.), ptGen(1.) {} + + void operator()(WSSoAHost& pwsh, ZVertexHost& vtxh) { + int nclus = clusGen(reng); + for (int zint = 0; zint < vtxh.view().metadata().size(); ++zint) { + vtxh.view().zv()[zint] = 3.5f * gauss(reng); + } + + int aux = 0; + for (int iv = 0; iv < nclus; ++iv) { + auto nt = trackGen(reng); + pwsh.view().itrk()[iv] = nt; + for (int it = 0; it < nt; ++it) { + auto err = errgen(reng); // reality is not flat.... + pwsh.view().zt()[aux] = vtxh.view().zv()[iv] + err * gauss(reng); + pwsh.view().ezt2()[aux] = err * err; + pwsh.view().iv()[aux] = iv; + pwsh.view().ptt2()[aux] = (iv == 5 ? 1.f : 0.5f) + ptGen(reng); + pwsh.view().ptt2()[aux] *= pwsh.view().ptt2()[aux]; + ++aux; + } + } + pwsh.view().ntrks() = aux; + // add noise + auto nt = 2 * trackGen(reng); + for (int it = 0; it < nt; ++it) { + auto err = 0.03f; + pwsh.view().zt()[it] = rgen(reng); + pwsh.view().ezt2()[it] = err * err; + pwsh.view().iv()[it] = 9999; + pwsh.view().ptt2()[it] = 0.5f + ptGen(reng); + pwsh.view().ptt2()[it] *= pwsh.view().ptt2()[it]; + } + } + + std::mt19937 reng; + std::uniform_real_distribution rgen; + std::uniform_real_distribution errgen; + std::poisson_distribution clusGen; + std::poisson_distribution trackGen; + std::normal_distribution gauss; + std::exponential_distribution ptGen; + }; + + namespace vertexfinder_t { +#ifdef ONE_KERNEL + class vertexFinderOneKernel { + public: + template + ALPAKA_FN_ACC void operator()(const TAcc& acc, + vertexFinder::VtxSoAView pdata, + vertexFinder::WsSoAView pws, + int minT, // min number of neighbours to be "seed" + float eps, // max absolute distance to cluster + float errmax, // max error to be "seed" + float chi2max // max normalized distance to cluster, + ) const { + vertexFinder::clusterTracksByDensity(acc, pdata, pws, minT, eps, errmax, chi2max); + alpaka::syncBlockThreads(acc); + vertexFinder::fitVertices(acc, pdata, pws, 50.); + alpaka::syncBlockThreads(acc); + vertexFinder::splitVertices(acc, pdata, pws, 9.f); + alpaka::syncBlockThreads(acc); + vertexFinder::fitVertices(acc, pdata, pws, 5000.); + alpaka::syncBlockThreads(acc); + vertexFinder::sortByPt2(acc, pdata, pws); + alpaka::syncBlockThreads(acc); + } + }; +#endif + + class kernel_print { + public: + template + ALPAKA_FN_ACC void operator()(const TAcc& acc, + vertexFinder::VtxSoAView pdata, + vertexFinder::WsSoAView pws) const { + printf("nt,nv %d %d,%d\n", pws.ntrks(), pdata.nvFinal(), pws.nvIntermediate()); + } + }; + + void runKernels(Queue& queue) { + vertexFinder::workSpace::PixelVertexWorkSpaceSoADevice ws_d(queue); + ::vertexFinder::workSpace::PixelVertexWorkSpaceSoAHost ws_h(queue); + ZVertexHost vertices_h(queue); + ZVertexCollection vertices_d(queue); + + float eps = 0.1f; + std::array par{{eps, 0.01f, 9.0f}}; + for (int nav = 30; nav < 80; nav += 20) { + ClusterGenerator gen(nav, 10); + + for (int i = 8; i < 20; ++i) { + auto kk = i / 4; // M param + + gen(ws_h, vertices_h); + auto workDiv1D = make_workdiv(1, 1); + alpaka::exec(queue, workDiv1D, vertexFinder::init{}, vertices_d.view(), ws_d.view()); + // std::cout << "v,t size " << ws_h.view().zt()[0] << ' ' << vertices_h.view().zv()[0] << std::endl; + alpaka::memcpy(queue, ws_d.buffer(), ws_h.buffer()); + alpaka::wait(queue); + + std::cout << "M eps, pset " << kk << ' ' << eps << ' ' << (i % 4) << std::endl; + + if ((i % 4) == 0) + par = {{eps, 0.02f, 12.0f}}; + if ((i % 4) == 1) + par = {{eps, 0.02f, 9.0f}}; + if ((i % 4) == 2) + par = {{eps, 0.01f, 9.0f}}; + if ((i % 4) == 3) + par = {{0.7f * eps, 0.01f, 9.0f}}; + + alpaka::exec(queue, workDiv1D, kernel_print{}, vertices_d.view(), ws_d.view()); + + auto workDivClusterizer = make_workdiv(1, 512 + 256); +#ifdef ONE_KERNEL + alpaka::exec(queue, + workDivClusterizer, + vertexFinderOneKernel{}, + vertices_d.view(), + ws_d.view(), + kk, + par[0], + par[1], + par[2]); +#else + alpaka::exec( + queue, workDivClusterizer, CLUSTERIZE{}, vertices_d.view(), ws_d.view(), kk, par[0], par[1], par[2]); +#endif + alpaka::wait(queue); + alpaka::exec(queue, workDiv1D, kernel_print{}, vertices_d.view(), ws_d.view()); + alpaka::wait(queue); + + auto workDivFitter = make_workdiv(1, 1024 - 256); + + alpaka::exec( + queue, workDivFitter, vertexFinder::fitVerticesKernel{}, vertices_d.view(), ws_d.view(), 50.f); + + alpaka::memcpy(queue, vertices_h.buffer(), vertices_d.buffer()); + alpaka::wait(queue); + + if (vertices_h.view().nvFinal() == 0) { + std::cout << "NO VERTICES???" << std::endl; + continue; + } + + for (auto j = 0U; j < vertices_h.view().nvFinal(); ++j) + if (vertices_h.view().ndof()[j] > 0) + vertices_h.view().chi2()[j] /= float(vertices_h.view().ndof()[j]); + { + auto mx = + std::minmax_element(vertices_h.view().chi2(), vertices_h.view().chi2() + vertices_h.view().nvFinal()); + std::cout << "after fit nv, min max chi2 " << vertices_h.view().nvFinal() << " " << *mx.first << ' ' + << *mx.second << std::endl; + } + + alpaka::exec( + queue, workDivFitter, vertexFinder::fitVerticesKernel{}, vertices_d.view(), ws_d.view(), 50.f); + alpaka::memcpy(queue, vertices_h.buffer(), vertices_d.buffer()); + alpaka::wait(queue); + + for (auto j = 0U; j < vertices_h.view().nvFinal(); ++j) + if (vertices_h.view().ndof()[j] > 0) + vertices_h.view().chi2()[j] /= float(vertices_h.view().ndof()[j]); + { + auto mx = + std::minmax_element(vertices_h.view().chi2(), vertices_h.view().chi2() + vertices_h.view().nvFinal()); + std::cout << "before splitting nv, min max chi2 " << vertices_h.view().nvFinal() << " " << *mx.first << ' ' + << *mx.second << std::endl; + } + + auto workDivSplitter = make_workdiv(1024, 64); + + // one vertex per block!!! + alpaka::exec( + queue, workDivSplitter, vertexFinder::splitVerticesKernel{}, vertices_d.view(), ws_d.view(), 9.f); + alpaka::memcpy(queue, ws_h.buffer(), ws_d.buffer()); + alpaka::wait(queue); + std::cout << "after split " << ws_h.view().nvIntermediate() << std::endl; + + alpaka::exec( + queue, workDivFitter, vertexFinder::fitVerticesKernel{}, vertices_d.view(), ws_d.view(), 5000.f); + + auto workDivSorter = make_workdiv(1, 256); + alpaka::exec(queue, workDivSorter, vertexFinder::sortByPt2Kernel{}, vertices_d.view(), ws_d.view()); + alpaka::memcpy(queue, vertices_h.buffer(), vertices_d.buffer()); + alpaka::wait(queue); + + if (vertices_h.view().nvFinal() == 0) { + std::cout << "NO VERTICES???" << std::endl; + continue; + } + + for (auto j = 0U; j < vertices_h.view().nvFinal(); ++j) + if (vertices_h.view().ndof()[j] > 0) + vertices_h.view().chi2()[j] /= float(vertices_h.view().ndof()[j]); + { + auto mx = + std::minmax_element(vertices_h.view().chi2(), vertices_h.view().chi2() + vertices_h.view().nvFinal()); + std::cout << "nv, min max chi2 " << vertices_h.view().nvFinal() << " " << *mx.first << ' ' << *mx.second + << std::endl; + } + + { + auto mx = std::minmax_element(vertices_h.view().wv(), vertices_h.view().wv() + vertices_h.view().nvFinal()); + std::cout << "min max error " << 1. / std::sqrt(*mx.first) << ' ' << 1. / std::sqrt(*mx.second) + << std::endl; + } + + { + auto mx = + std::minmax_element(vertices_h.view().ptv2(), vertices_h.view().ptv2() + vertices_h.view().nvFinal()); + std::cout << "min max ptv2 " << *mx.first << ' ' << *mx.second << std::endl; + std::cout << "min max ptv2 " << vertices_h.view().ptv2()[vertices_h.view().sortInd()[0]] << ' ' + << vertices_h.view().ptv2()[vertices_h.view().sortInd()[vertices_h.view().nvFinal() - 1]] + << " at " << vertices_h.view().sortInd()[0] << ' ' + << vertices_h.view().sortInd()[vertices_h.view().nvFinal() - 1] << std::endl; + } + + float dd[vertices_h.view().nvFinal()]; + for (auto kv = 0U; kv < vertices_h.view().nvFinal(); ++kv) { + auto zr = vertices_h.view().zv()[kv]; + auto md = 500.0f; + for (int zint = 0; zint < ws_h.view().metadata().size(); ++zint) { + auto d = std::abs(zr - ws_h.view().zt()[zint]); + md = std::min(d, md); + } + dd[kv] = md; + } + if (i == 6) { + for (auto d : dd) + std::cout << d << ' '; + std::cout << std::endl; + } + auto mx = std::minmax_element(dd, dd + vertices_h.view().nvFinal()); + float rms = 0; + for (auto d : dd) + rms += d * d; + rms = std::sqrt(rms) / (vertices_h.view().nvFinal() - 1); + std::cout << "min max rms " << *mx.first << ' ' << *mx.second << ' ' << rms << std::endl; + + } // loop on events + } // lopp on ave vert + } + } // namespace vertexfinder_t +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoVertex/BeamSpotProducer/plugins/BuildFile.xml b/RecoVertex/BeamSpotProducer/plugins/BuildFile.xml index dec839e2af6cc..318ef5848183d 100644 --- a/RecoVertex/BeamSpotProducer/plugins/BuildFile.xml +++ b/RecoVertex/BeamSpotProducer/plugins/BuildFile.xml @@ -12,33 +12,42 @@ + + + + + + + + + @@ -48,3 +57,11 @@ + + + + + + + + diff --git a/RecoVertex/BeamSpotProducer/plugins/alpaka/BeamSpotDeviceProducer.cc b/RecoVertex/BeamSpotProducer/plugins/alpaka/BeamSpotDeviceProducer.cc new file mode 100644 index 0000000000000..42ae05b668b54 --- /dev/null +++ b/RecoVertex/BeamSpotProducer/plugins/alpaka/BeamSpotDeviceProducer.cc @@ -0,0 +1,60 @@ +#include "DataFormats/BeamSpot/interface/BeamSpot.h" +#include "DataFormats/BeamSpot/interface/BeamSpotHostProduct.h" +#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" +#include "DataFormats/BeamSpot/interface/alpaka/BeamSpotDeviceProduct.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDPutToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/global/EDProducer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class BeamSpotDeviceProducer : public global::EDProducer<> { + public: + BeamSpotDeviceProducer(edm::ParameterSet const& config) + : legacyToken_{consumes(config.getParameter("src"))}, + deviceToken_{produces()} {} + + void produce(edm::StreamID, device::Event& event, device::EventSetup const& setup) const override { + reco::BeamSpot const& beamspot = event.get(legacyToken_); + + BeamSpotHostProduct hostProduct{event.queue()}; + hostProduct->x = beamspot.x0(); + hostProduct->y = beamspot.y0(); + hostProduct->z = beamspot.z0(); + hostProduct->sigmaZ = beamspot.sigmaZ(); + hostProduct->beamWidthX = beamspot.BeamWidthX(); + hostProduct->beamWidthY = beamspot.BeamWidthY(); + hostProduct->dxdz = beamspot.dxdz(); + hostProduct->dydz = beamspot.dydz(); + hostProduct->emittanceX = beamspot.emittanceX(); + hostProduct->emittanceY = beamspot.emittanceY(); + hostProduct->betaStar = beamspot.betaStar(); + + if constexpr(std::is_same_v) { + event.emplace(deviceToken_, std::move(hostProduct)); + } else { + BeamSpotDeviceProduct deviceProduct{event.queue()}; + alpaka::memcpy(event.queue(), deviceProduct.buffer(), hostProduct.const_buffer()); + event.emplace(deviceToken_, std::move(deviceProduct)); + } + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("src", edm::InputTag{}); + descriptions.addWithDefaultLabel(desc); + } + + private: + const edm::EDGetTokenT legacyToken_; + const device::EDPutToken deviceToken_; + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(BeamSpotDeviceProducer); diff --git a/RecoVertex/BeamSpotProducer/python/BeamSpot_cff.py b/RecoVertex/BeamSpotProducer/python/BeamSpot_cff.py index 7cc651af22106..77a71291ccd88 100644 --- a/RecoVertex/BeamSpotProducer/python/BeamSpot_cff.py +++ b/RecoVertex/BeamSpotProducer/python/BeamSpot_cff.py @@ -2,6 +2,7 @@ from RecoVertex.BeamSpotProducer.BeamSpot_cfi import * from RecoVertex.BeamSpotProducer.offlineBeamSpotToCUDA_cfi import offlineBeamSpotToCUDA +from RecoVertex.BeamSpotProducer.beamSpotDeviceProducer_cfi import beamSpotDeviceProducer as _beamSpotDeviceProducer offlineBeamSpotTask = cms.Task(offlineBeamSpot) @@ -9,3 +10,11 @@ _offlineBeamSpotTask_gpu = offlineBeamSpotTask.copy() _offlineBeamSpotTask_gpu.add(offlineBeamSpotToCUDA) gpu.toReplaceWith(offlineBeamSpotTask, _offlineBeamSpotTask_gpu) + +from Configuration.ProcessModifiers.alpaka_cff import alpaka +_offlineBeamSpotTask_alpaka = offlineBeamSpotTask.copy() +offlineBeamSpotDevice = _beamSpotDeviceProducer.clone(src = cms.InputTag('offlineBeamSpot')) +_offlineBeamSpotTask_alpaka.add(offlineBeamSpotDevice) +alpaka.toReplaceWith(offlineBeamSpotTask, _offlineBeamSpotTask_alpaka) + +