From f4650a55fec14b4723702c5479293fe7fbfff0b7 Mon Sep 17 00:00:00 2001 From: Long Date: Mon, 3 Jul 2023 10:48:54 +0200 Subject: [PATCH] Add HcalMLTask to hcal client for anomaly detection ML using ONNX interference --- DQM/HcalTasks/BuildFile.xml | 1 + DQM/HcalTasks/plugins/HcalMLTask.cc | 284 ++++++++++++++++ .../plugins/OnlineDQMDigiAD_cmssw.cc | 320 ++++++++++++++++++ DQM/HcalTasks/plugins/OnlineDQMDigiAD_cmssw.h | 183 ++++++++++ .../clients/hcal_dqm_sourceclient-live_cfg.py | 2 + 5 files changed, 790 insertions(+) create mode 100644 DQM/HcalTasks/plugins/HcalMLTask.cc create mode 100644 DQM/HcalTasks/plugins/OnlineDQMDigiAD_cmssw.cc create mode 100644 DQM/HcalTasks/plugins/OnlineDQMDigiAD_cmssw.h diff --git a/DQM/HcalTasks/BuildFile.xml b/DQM/HcalTasks/BuildFile.xml index 6c13fedf0ef6e..551e329144a67 100644 --- a/DQM/HcalTasks/BuildFile.xml +++ b/DQM/HcalTasks/BuildFile.xml @@ -1,6 +1,7 @@ + diff --git a/DQM/HcalTasks/plugins/HcalMLTask.cc b/DQM/HcalTasks/plugins/HcalMLTask.cc new file mode 100644 index 0000000000000..8e88761eafe50 --- /dev/null +++ b/DQM/HcalTasks/plugins/HcalMLTask.cc @@ -0,0 +1,284 @@ +// -*- C++ -*- +// Long Wang (UMD) +// plugin to run ML4DQM ONNX module and plot number of flagged bad channel counts vs LS +// + +#include "DQM/HcalCommon/interface/DQTask.h" +#include "DQM/HcalCommon/interface/Utilities.h" +#include "DQM/HcalCommon/interface/HashFilter.h" +#include "DQM/HcalCommon/interface/Container1D.h" +#include "DQM/HcalCommon/interface/Container2D.h" +#include "DQM/HcalCommon/interface/ContainerProf1D.h" +#include "DQM/HcalCommon/interface/ContainerProf2D.h" +#include "DQM/HcalCommon/interface/ContainerSingle1D.h" +#include "DQM/HcalCommon/interface/ContainerSingle2D.h" +#include "DQM/HcalCommon/interface/ContainerSingleProf2D.h" +#include "DQM/HcalCommon/interface/ElectronicsMap.h" + +#include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h" +#include "FWCore/ParameterSet/interface/FileInPath.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" + +#include "DQM/HcalTasks/plugins/OnlineDQMDigiAD_cmssw.h" + +#include +#include +#include + +using namespace cms::Ort; +using namespace hcaldqm; +using namespace hcaldqm::constants; +using namespace hcaldqm::filter; + +class HcalMLTask : public hcaldqm::DQTask { +public: + HcalMLTask(edm::ParameterSet const&); + ~HcalMLTask() override = default; + + void dqmBeginRun(edm::Run const&, edm::EventSetup const&) override; + void bookHistograms(DQMStore::IBooker&, edm::Run const&, edm::EventSetup const&) override; + std::shared_ptr globalBeginLuminosityBlock(edm::LuminosityBlock const&, + edm::EventSetup const&) const override; + void globalEndLuminosityBlock(edm::LuminosityBlock const&, edm::EventSetup const&) override; + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void _process(edm::Event const&, edm::EventSetup const&) override; + void _resetMonitors(hcaldqm::UpdateFreq) override; + + std::string onnx_model_path_HB, onnx_model_path_HE; + double flagDecisionThr; + edm::InputTag tagQIE11; + edm::InputTag tagHO; + edm::InputTag tagQIE10; + edm::EDGetTokenT tokQIE11; + edm::EDGetTokenT tokHO; + edm::EDGetTokenT tokQIE10; + edm::ESGetToken hcalDbServiceToken_; + + hcaldqm::ContainerXXX Occupancy1LS; + hcaldqm::Container1D MLFlagvsLS_Subdet; + + std::unique_ptr dqmadObj_HB = nullptr; + std::unique_ptr dqmadObj_HE = nullptr; + + std::vector> digiHcal2DHist_depth_1{ + std::vector>(64, std::vector(72, 0))}; + std::vector> digiHcal2DHist_depth_2{ + std::vector>(64, std::vector(72, 0))}; + std::vector> digiHcal2DHist_depth_3{ + std::vector>(64, std::vector(72, 0))}; + std::vector> digiHcal2DHist_depth_4{ + std::vector>(64, std::vector(72, 0))}; + std::vector> digiHcal2DHist_depth_5{ + std::vector>(64, std::vector(72, 0))}; + std::vector> digiHcal2DHist_depth_6{ + std::vector>(64, std::vector(72, 0))}; + std::vector> digiHcal2DHist_depth_7{ + std::vector>(64, std::vector(72, 0))}; +}; + +HcalMLTask::HcalMLTask(edm::ParameterSet const& ps) + : DQTask(ps), hcalDbServiceToken_(esConsumes()) { + onnx_model_path_HB = ps.getUntrackedParameter( + "onnx_model_path_HB", + "DQM/HcalTasks/data/HB_2022/" + "CGAE_MultiDim_SPATIAL_vONNX_RCLv22_PIXEL_BT_BN_RIN_IPHI_MED_5218_v06_02_2023_21h01_stateful.onnx"); + onnx_model_path_HE = ps.getUntrackedParameter( + "onnx_model_path_HE", + "DQM/HcalTasks/data/HE_2022/" + "CGAE_MultiDim_SPATIAL_vONNX_RCLv22_PIXEL_BT_BN_RIN_IPHI_MED_7763_v06_02_2023_22h55_stateful.onnx"); + flagDecisionThr = ps.getUntrackedParameter("flagDecisionThr", 20.); + tagQIE11 = ps.getUntrackedParameter("tagHBHE", edm::InputTag("hcalDigis")); + tagHO = ps.getUntrackedParameter("tagHO", edm::InputTag("hcalDigis")); + tagQIE10 = ps.getUntrackedParameter("tagHF", edm::InputTag("hcalDigis")); + + tokQIE11 = consumes(tagQIE11); + tokHO = consumes(tagHO); + tokQIE10 = consumes(tagQIE10); + + auto dqmadObj_HB_ = std::make_unique("hb", onnx_model_path_HB, Backend::cpu); + auto dqmadObj_HE_ = std::make_unique("he", onnx_model_path_HE, Backend::cpu); + dqmadObj_HB = std::move(dqmadObj_HB_); + dqmadObj_HE = std::move(dqmadObj_HE_); +} + +void HcalMLTask::dqmBeginRun(edm::Run const& r, edm::EventSetup const& es) { DQTask::dqmBeginRun(r, es); } + +void HcalMLTask::bookHistograms(DQMStore::IBooker& ib, edm::Run const& r, edm::EventSetup const& es) { + DQTask::bookHistograms(ib, r, es); + + // GET WHAT YOU NEED + edm::ESHandle dbs = es.getHandle(hcalDbServiceToken_); + _emap = dbs->getHcalMapping(); + + // Book monitoring elements + Occupancy1LS.initialize(hcaldqm::hashfunctions::fDChannel); + + MLFlagvsLS_Subdet.initialize(_name, + "MLBadFlagedChannelsvsLS", + hcaldqm::hashfunctions::fSubdet, + new hcaldqm::quantity::LumiSection(_maxLS), + new hcaldqm::quantity::ValueQuantity(hcaldqm::quantity::fN), + 0); + + Occupancy1LS.book(_emap); + MLFlagvsLS_Subdet.book(ib, _emap, _subsystem); +} + +void HcalMLTask::_resetMonitors(hcaldqm::UpdateFreq uf) { DQTask::_resetMonitors(uf); } + +void HcalMLTask::_process(edm::Event const& e, edm::EventSetup const&) { + if (_ptype != fOnline) + return; + + auto const chbhe = e.getHandle(tokQIE11); + + if (not(chbhe.isValid())) { + edm::LogWarning("HcalMLTask") << "QIE11 Collection is unavailable, will not fill this event."; + return; + } + + auto lumiCache = luminosityBlockCache(e.getLuminosityBlock().index()); + _currentLS = lumiCache->currentLS; + + for (QIE11DigiCollection::const_iterator it = chbhe->begin(); it != chbhe->end(); ++it) { + const QIE11DataFrame digi = static_cast(*it); + + HcalDetId const& did = digi.detid(); + if (did.subdet() != HcalEndcap && did.subdet() != HcalBarrel) + continue; + + Occupancy1LS.get(did)++; + } +} + +std::shared_ptr HcalMLTask::globalBeginLuminosityBlock(edm::LuminosityBlock const& lb, + edm::EventSetup const& es) const { + return DQTask::globalBeginLuminosityBlock(lb, es); +} + +void HcalMLTask::globalEndLuminosityBlock(edm::LuminosityBlock const& lb, edm::EventSetup const& es) { + auto lumiCache = luminosityBlockCache(lb.index()); + _currentLS = lumiCache->currentLS; + _xQuality.reset(); + _xQuality = lumiCache->xQuality; + + for (auto& HistElement : digiHcal2DHist_depth_1) + std::fill(HistElement.begin(), HistElement.end(), 0); + for (auto& HistElement : digiHcal2DHist_depth_2) + std::fill(HistElement.begin(), HistElement.end(), 0); + for (auto& HistElement : digiHcal2DHist_depth_3) + std::fill(HistElement.begin(), HistElement.end(), 0); + for (auto& HistElement : digiHcal2DHist_depth_4) + std::fill(HistElement.begin(), HistElement.end(), 0); + for (auto& HistElement : digiHcal2DHist_depth_5) + std::fill(HistElement.begin(), HistElement.end(), 0); + for (auto& HistElement : digiHcal2DHist_depth_6) + std::fill(HistElement.begin(), HistElement.end(), 0); + for (auto& HistElement : digiHcal2DHist_depth_7) + std::fill(HistElement.begin(), HistElement.end(), 0); + float LS_numEvents = (float)_evsPerLS; + + std::vector dids = _emap->allPrecisionId(); + for (std::vector::const_iterator it = dids.begin(); it != dids.end(); ++it) { + if (!it->isHcalDetId()) + continue; + if (_xQuality.exists(HcalDetId(*it))) { + HcalChannelStatus cs(it->rawId(), _xQuality.get(HcalDetId(*it))); + if (cs.isBitSet(HcalChannelStatus::HcalCellMask) || cs.isBitSet(HcalChannelStatus::HcalCellDead)) + continue; + } + + HcalDetId did = HcalDetId(it->rawId()); + if (did.subdet() != HcalEndcap && did.subdet() != HcalBarrel) + continue; + + if (did.depth() == 1) + digiHcal2DHist_depth_1.at(did.ieta() < 0 ? did.ieta() + 32 : did.ieta() + 31).at(did.iphi() - 1) = + Occupancy1LS.get(did); + if (did.depth() == 2) + digiHcal2DHist_depth_2.at(did.ieta() < 0 ? did.ieta() + 32 : did.ieta() + 31).at(did.iphi() - 1) = + Occupancy1LS.get(did); + if (did.depth() == 3) + digiHcal2DHist_depth_3.at(did.ieta() < 0 ? did.ieta() + 32 : did.ieta() + 31).at(did.iphi() - 1) = + Occupancy1LS.get(did); + if (did.depth() == 4) + digiHcal2DHist_depth_4.at(did.ieta() < 0 ? did.ieta() + 32 : did.ieta() + 31).at(did.iphi() - 1) = + Occupancy1LS.get(did); + if (did.depth() == 5) + digiHcal2DHist_depth_5.at(did.ieta() < 0 ? did.ieta() + 32 : did.ieta() + 31).at(did.iphi() - 1) = + Occupancy1LS.get(did); + if (did.depth() == 6) + digiHcal2DHist_depth_6.at(did.ieta() < 0 ? did.ieta() + 32 : did.ieta() + 31).at(did.iphi() - 1) = + Occupancy1LS.get(did); + if (did.depth() == 7) + digiHcal2DHist_depth_7.at(did.ieta() < 0 ? did.ieta() + 32 : did.ieta() + 31).at(did.iphi() - 1) = + Occupancy1LS.get(did); + } + + std::vector> ad_HBmodel_output_vectors = dqmadObj_HB->Inference_CMSSW(digiHcal2DHist_depth_1, + digiHcal2DHist_depth_2, + digiHcal2DHist_depth_3, + digiHcal2DHist_depth_4, + digiHcal2DHist_depth_5, + digiHcal2DHist_depth_6, + digiHcal2DHist_depth_7, + LS_numEvents, + (float)flagDecisionThr); + + std::vector> ad_HEmodel_output_vectors = dqmadObj_HE->Inference_CMSSW(digiHcal2DHist_depth_1, + digiHcal2DHist_depth_2, + digiHcal2DHist_depth_3, + digiHcal2DHist_depth_4, + digiHcal2DHist_depth_5, + digiHcal2DHist_depth_6, + digiHcal2DHist_depth_7, + LS_numEvents, + (float)flagDecisionThr); + + std::vector>> digiHcal3DHist_ANOMALY_FLAG_HB = + dqmadObj_HB->ONNXOutputToDQMHistMap(ad_HBmodel_output_vectors, 7); + std::vector>> digiHcal3DHist_ANOMALY_FLAG_HE = + dqmadObj_HE->ONNXOutputToDQMHistMap(ad_HEmodel_output_vectors, 7); + + int NHB_MLbadflags_ = 0, NHE_MLbadflags_ = 0; + for (const auto& plane : digiHcal3DHist_ANOMALY_FLAG_HB) + for (const auto& row : plane) + NHB_MLbadflags_ += std::count(row.begin(), row.end(), 1); + for (const auto& plane : digiHcal3DHist_ANOMALY_FLAG_HE) + for (const auto& row : plane) + NHE_MLbadflags_ += std::count(row.begin(), row.end(), 1); + + MLFlagvsLS_Subdet.fill(HcalDetId(HcalBarrel, 1, 1, 1), _currentLS, NHB_MLbadflags_); + MLFlagvsLS_Subdet.fill(HcalDetId(HcalEndcap, 17, 1, 1), _currentLS, NHE_MLbadflags_); + + Occupancy1LS.reset(); + DQTask::globalEndLuminosityBlock(lb, es); +} + +void HcalMLTask::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.addUntracked("name", "HcalMLTask"); + desc.addUntracked( + "onnx_model_path_HB", + "DQM/HcalTasks/data/HB_2022/" + "CGAE_MultiDim_SPATIAL_vONNX_RCLv22_PIXEL_BT_BN_RIN_IPHI_MED_5218_v06_02_2023_21h01_stateful.onnx"); + desc.addUntracked( + "onnx_model_path_HE", + "DQM/HcalTasks/data/HE_2022/" + "CGAE_MultiDim_SPATIAL_vONNX_RCLv22_PIXEL_BT_BN_RIN_IPHI_MED_7763_v06_02_2023_22h55_stateful.onnx"); + desc.addUntracked("flagDecisionThr", 20.); + desc.addUntracked("debug", 0); + desc.addUntracked("runkeyVal", 0); + desc.addUntracked("runkeyName", "pp_run"); + desc.addUntracked("ptype", 1); + desc.addUntracked("mtype", true); + desc.addUntracked("subsystem", "Hcal"); + desc.addUntracked("tagHBHE", edm::InputTag("hcalDigis")); + desc.addUntracked("tagHO", edm::InputTag("hcalDigis")); + desc.addUntracked("tagHF", edm::InputTag("hcalDigis")); + descriptions.addWithDefaultLabel(desc); +} + +DEFINE_FWK_MODULE(HcalMLTask); diff --git a/DQM/HcalTasks/plugins/OnlineDQMDigiAD_cmssw.cc b/DQM/HcalTasks/plugins/OnlineDQMDigiAD_cmssw.cc new file mode 100644 index 0000000000000..7cfbb0f7d07af --- /dev/null +++ b/DQM/HcalTasks/plugins/OnlineDQMDigiAD_cmssw.cc @@ -0,0 +1,320 @@ +/* + * OnlineDQMDigiAD_cmssw.cpp + * + * Created on: Jun 10, 2023 + * Author: Mulugeta W.Asres, UiA, Norway + * + * The implementation follows https://github.com/cms-sw/cmssw/tree/master/PhysicsTools/ONNXRuntime + */ + +// #include "FWCore/Utilities/interface/Exception.h" +// #include "FWCore/Utilities/interface/thread_safety_macros.h" +// #include "FWCore/Framework/interface/Event.h" +// #include "FWCore/Framework/interface/EDAnalyzer.h" +#include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h" +#include "FWCore/ParameterSet/interface/FileInPath.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "DQM/HcalTasks/plugins/OnlineDQMDigiAD_cmssw.h" + +// using namespace std; +using namespace cms::Ort; + +// Constructor +OnlineDQMDigiAD::OnlineDQMDigiAD(const std::string model_system_name, + const std::string &modelFilepath, + Backend backend) { + std::string instanceName{"DESMOD Digioccupancy Map AD inference"}; + + /**************** Initailize Model Memory States ******************/ + InitializeState(); // initailize model memory states to zero + + /**************** Create ORT session ******************/ + // Set up options for session + auto session_options = ONNXRuntime::defaultSessionOptions(backend); + // Create session by loading the onnx model + model_path = edm::FileInPath(modelFilepath).fullPath(); + auto uOrtSession = std::make_unique(model_path, &session_options); + ort_mSession = std::move(uOrtSession); + + // check model availability + hcal_subsystem_name = model_system_name; + + IsModelExist(hcal_subsystem_name); // assert model integration for the given hcal system name + + if (hcal_subsystem_name == "he") { + std::vector> input_shapes_ = { + {batch_size, 64, 72, 7, 1}, + {batch_size, 1}, + {1, 1}, + {batch_size, model_state_inner_dim, model_state_layer_dims[0][0]}, + {batch_size, model_state_inner_dim, model_state_layer_dims[0][0]}, + {batch_size, model_state_inner_dim, model_state_layer_dims[0][1]}, + {batch_size, model_state_inner_dim, model_state_layer_dims[0][1]}, + {batch_size, model_state_inner_dim, model_state_layer_dims[1][0]}, + {batch_size, model_state_inner_dim, model_state_layer_dims[1][0]}, + {batch_size, model_state_inner_dim, model_state_layer_dims[1][1]}, + {batch_size, model_state_inner_dim, model_state_layer_dims[1][1]}}; // input dims + input_shapes = input_shapes_; + } + + else if (hcal_subsystem_name == "hb") { + std::vector> input_shapes_ = { + {batch_size, 64, 72, 4, 1}, + {batch_size, 1}, + {1, 1}, + {batch_size, model_state_inner_dim, model_state_layer_dims[0][0]}, + {batch_size, model_state_inner_dim, model_state_layer_dims[0][0]}, + {batch_size, model_state_inner_dim, model_state_layer_dims[0][1]}, + {batch_size, model_state_inner_dim, model_state_layer_dims[0][1]}, + {batch_size, model_state_inner_dim, model_state_layer_dims[1][0]}, + {batch_size, model_state_inner_dim, model_state_layer_dims[1][0]}, + {batch_size, model_state_inner_dim, model_state_layer_dims[1][1]}, + {batch_size, model_state_inner_dim, model_state_layer_dims[1][1]}}; // input dims + input_shapes = input_shapes_; + } +} + +void OnlineDQMDigiAD::IsModelExist(std::string hcal_subsystem_name) { + if (std::find(hcal_modeled_systems.begin(), hcal_modeled_systems.end(), hcal_subsystem_name) == + hcal_modeled_systems.end()) { + std::string err = + "ML for OnlineDQM is not currently supported for the selected " + hcal_subsystem_name + " system!\n"; + throw std::invalid_argument(err); + } +} + +void OnlineDQMDigiAD::InitializeState() { + // model memory states vectors init, only when the runs starts or for the first LS + std::fill(input_model_state_memory_e_0_0.begin(), + input_model_state_memory_e_0_0.end(), + float(0.0)); // init model memory states-encoder_layer_0_state_0 to zero + std::fill(input_model_state_memory_e_0_1.begin(), + input_model_state_memory_e_0_1.end(), + float(0.0)); // init model memory states-encoder_layer_0_state_1 to zero + std::fill(input_model_state_memory_e_1_0.begin(), + input_model_state_memory_e_1_0.end(), + float(0.0)); // init model memory states-encoder_layer_1_state_0 to zero + std::fill(input_model_state_memory_e_1_1.begin(), + input_model_state_memory_e_1_1.end(), + float(0.0)); // init model memory states-encoder_layer_1_state_1 to zero + std::fill(input_model_state_memory_d_0_0.begin(), + input_model_state_memory_d_0_0.end(), + float(0.0)); // init model memory states-decoder_layer_0_state_0 to zero + std::fill(input_model_state_memory_d_0_1.begin(), + input_model_state_memory_d_0_1.end(), + float(0.0)); // init model memory states-decoder_layer_0_state_1 to zero + std::fill(input_model_state_memory_d_1_0.begin(), + input_model_state_memory_d_1_0.end(), + float(0.0)); // init model memory states-decoder_layer_1_state_0 to zero + std::fill(input_model_state_memory_d_1_1.begin(), + input_model_state_memory_d_1_1.end(), + float(0.0)); // init model memory states-decoder_layer_1_state_1 to zero + + // model_state_refresh_counter = 15; // counter set due to onnx double datatype handling limitation that might cause precision error to propagate. + model_state_refresh_counter = + 1; // DQM multithread returns non-sequential LS. Hence, the model will not keep states (experimental) +} + +std::vector OnlineDQMDigiAD::Serialize2DVector(const std::vector> &input_2d_vec) { + std::vector output; + for (const auto &row : input_2d_vec) { + for (const auto &element : row) { + output.push_back(element); + } + } + return output; +} + +std::vector> OnlineDQMDigiAD::Map1DTo2DVector(const std::vector &input_1d_vec, + const int numSplits) { + std::size_t const splitted_size = input_1d_vec.size() / numSplits; + // check splitted_size*numSplits == input_1d_vec.size() + std::vector> output_2d_vec; + + for (size_t i = 0; i < input_1d_vec.size(); i += numSplits - 1) { + std::vector chunch_vec(input_1d_vec.begin() + i, input_1d_vec.begin() + i + splitted_size); + output_2d_vec.push_back(chunch_vec); + } + return output_2d_vec; +} + +std::vector OnlineDQMDigiAD::PrepareONNXDQMMapVectors( + std::vector>> &digiHcal2DHist_depth_all) { + std::vector digi3DHistVector_serialized; + + for (const std::vector> &digiHcal2DHist_depth : digiHcal2DHist_depth_all) { + std::vector digiHcalDHist_serialized_depth = Serialize2DVector(digiHcal2DHist_depth); + digi3DHistVector_serialized.insert(digi3DHistVector_serialized.end(), + digiHcalDHist_serialized_depth.begin(), + digiHcalDHist_serialized_depth.end()); + } + + return digi3DHistVector_serialized; +} + +std::vector>> OnlineDQMDigiAD::ONNXOutputToDQMHistMap( + const std::vector> &ad_model_output_vectors, const int selOutputIdx) { + // each output_vector is a serialized 3d hist map + const unsigned short numDepth = 7; + const unsigned short numDIeta = 64; + + const std::vector &output_vector = ad_model_output_vectors[selOutputIdx]; + std::vector> output_2d_vec = Map1DTo2DVector(output_vector, numDepth); + + std::vector>> digiHcal3DHist; + for (const std::vector &output_vector_depth : output_2d_vec) { + std::vector> digiHcal2DHist_depth = Map1DTo2DVector(output_vector_depth, numDIeta); + digiHcal3DHist.push_back(digiHcal2DHist_depth); + } + + return digiHcal3DHist; +} + +// Perform inference for a given dqm map +std::vector> OnlineDQMDigiAD::Inference(std::vector &digiHcalMapTW, + std::vector &numEvents, + std::vector &adThr, + std::vector &input_model_state_memory_e_0_0, + std::vector &input_model_state_memory_e_0_1, + std::vector &input_model_state_memory_e_1_0, + std::vector &input_model_state_memory_e_1_1, + std::vector &input_model_state_memory_d_0_0, + std::vector &input_model_state_memory_d_0_1, + std::vector &input_model_state_memory_d_1_0, + std::vector &input_model_state_memory_d_1_1) { + /**************** Preprocessing ******************/ + // Create input tensor (including size and value) from the loaded inputs + // Compute the product of all input dimension + // Assign memory for input tensor + // inputTensors will be used by the Session Run for inference + + input_values.clear(); + input_values.emplace_back(digiHcalMapTW); + input_values.emplace_back(numEvents); + input_values.emplace_back(adThr); + input_values.emplace_back(input_model_state_memory_e_0_0); + input_values.emplace_back(input_model_state_memory_e_0_1); + input_values.emplace_back(input_model_state_memory_e_1_0); + input_values.emplace_back(input_model_state_memory_e_1_1); + input_values.emplace_back(input_model_state_memory_d_0_0); + input_values.emplace_back(input_model_state_memory_d_0_1); + input_values.emplace_back(input_model_state_memory_d_1_0); + input_values.emplace_back(input_model_state_memory_d_1_1); + + /**************** Inference ******************/ + + output_values = ort_mSession->run(input_names, input_values, input_shapes, output_names, batch_size); + + return output_values; +} + +// AD method to be called by the CMS system +std::vector> OnlineDQMDigiAD::Inference_CMSSW( + const std::vector> &digiHcal2DHist_depth_1, + const std::vector> &digiHcal2DHist_depth_2, + const std::vector> &digiHcal2DHist_depth_3, + const std::vector> &digiHcal2DHist_depth_4, + const std::vector> &digiHcal2DHist_depth_5, + const std::vector> &digiHcal2DHist_depth_6, + const std::vector> &digiHcal2DHist_depth_7, + const float LS_numEvents, + const float flagDecisionThr) + +{ + /**************** Prepare data ******************/ + // merging all 2d hist into one 3d depth[ieta[iphi]] + std::vector>> digiHcal2DHist_depth_all; + + if (hcal_subsystem_name == "he") { + digiHcal2DHist_depth_all.push_back(digiHcal2DHist_depth_1); + digiHcal2DHist_depth_all.push_back(digiHcal2DHist_depth_2); + digiHcal2DHist_depth_all.push_back(digiHcal2DHist_depth_3); + digiHcal2DHist_depth_all.push_back(digiHcal2DHist_depth_4); + digiHcal2DHist_depth_all.push_back(digiHcal2DHist_depth_5); + digiHcal2DHist_depth_all.push_back(digiHcal2DHist_depth_6); + digiHcal2DHist_depth_all.push_back(digiHcal2DHist_depth_7); + } + + else if (hcal_subsystem_name == "hb") { + digiHcal2DHist_depth_all.push_back(digiHcal2DHist_depth_1); + digiHcal2DHist_depth_all.push_back(digiHcal2DHist_depth_2); + digiHcal2DHist_depth_all.push_back(digiHcal2DHist_depth_3); + digiHcal2DHist_depth_all.push_back(digiHcal2DHist_depth_4); + } + + // convert the 3d depth[ieta[iphi]] vector into 1d and commbined + std::vector digiHcalMapTW = PrepareONNXDQMMapVectors(digiHcal2DHist_depth_all); + + std::vector adThr{flagDecisionThr}; // AD decision threshold, increase to reduce sensitivity + std::vector numEvents{LS_numEvents}; + + // call model inference + /**************** Inference ******************/ + std::vector> output_tensors = Inference(digiHcalMapTW, + numEvents, + adThr, + input_model_state_memory_e_0_0, + input_model_state_memory_e_0_1, + input_model_state_memory_e_1_0, + input_model_state_memory_e_1_1, + input_model_state_memory_d_0_0, + input_model_state_memory_d_0_1, + input_model_state_memory_d_1_0, + input_model_state_memory_d_1_1); + + // auto output_tensors = Inference(digiHcalMapTW, numEvents, adThr); + //std::cout << "******* model inference is success *******" << std::endl; + + /**************** Output post processing ******************/ + // split outputs into ad output vectors and state_memory vectors + std::string state_output_name_tag = "rnn_hidden"; + std::vector> ad_model_output_vectors, ad_model_state_vectors; + for (size_t i = 0; i < output_tensors.size(); i++) { + std::string output_names_startstr = output_names[i].substr( + 2, state_output_name_tag.length()); // Extract the same number of characters as str2 from mOutputNames + if (output_names_startstr == state_output_name_tag) { + ad_model_state_vectors.emplace_back(output_tensors[i]); + } else { + ad_model_output_vectors.emplace_back(output_tensors[i]); + } + } + + if (ad_model_output_vectors.size() == num_state_vectors) { + input_model_state_memory_e_0_0 = ad_model_state_vectors[0]; + input_model_state_memory_e_0_1 = ad_model_state_vectors[1]; + input_model_state_memory_e_1_0 = ad_model_state_vectors[2]; + input_model_state_memory_e_1_1 = ad_model_state_vectors[3]; + input_model_state_memory_d_0_0 = ad_model_state_vectors[4]; + input_model_state_memory_d_0_1 = ad_model_state_vectors[5]; + input_model_state_memory_d_1_0 = ad_model_state_vectors[6]; + input_model_state_memory_d_1_1 = ad_model_state_vectors[7]; + } else { + std::cout << "Warning: the number of output state vectors does NOT equals to expected!. The states are set to " + "default values." + << std::endl; + InitializeState(); + } + + // # if onnx is returning serialized 1d vectors instead of vector of 3d vectors + // aml score and flag are at index 5 and 7 of the vector ad_model_output_vectors: anomaly score: ad_model_output_vectors[5], anomaly flags: ad_model_output_vectors[7] + /* + selOutputIdx: index to select of the onnx output. e.g. 5 is the anomaly score and 7 is the anomaly flag (1 is with anomaly, 0 is healthy) + std::vector>> digiHcal3DHist_ANOMALY_FLAG = ONNXOutputToDQMHistMap(ad_model_output_vectors, 7) + std::vector>> digiHcal3DHist_ANOMALY_SCORE = ONNXOutputToDQMHistMap(ad_model_output_vectors, 5) + */ + + // reduce counter for each ls call. due to onnx double datatype handling limitation that might cause precision error to propagate. + if (--model_state_refresh_counter == 0) + InitializeState(); + + return ad_model_output_vectors; +} diff --git a/DQM/HcalTasks/plugins/OnlineDQMDigiAD_cmssw.h b/DQM/HcalTasks/plugins/OnlineDQMDigiAD_cmssw.h new file mode 100644 index 0000000000000..72a9403723c31 --- /dev/null +++ b/DQM/HcalTasks/plugins/OnlineDQMDigiAD_cmssw.h @@ -0,0 +1,183 @@ +/* + * OnlineDQMDigiAD_cmssw.cpp + * + * Created on: Jun 10, 2023 + * Author: Mulugeta W.Asres, UiA, Norway + * + * The implementation follows https://github.com/cms-sw/cmssw/tree/master/PhysicsTools/ONNXRuntime + */ +#ifndef OnlineDQMDigiAD_cmssw_H_ +#define OnlineDQMDigiAD_cmssw_H_ + +#include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h" +#include "FWCore/Utilities/interface/Exception.h" +#include "FWCore/Utilities/interface/thread_safety_macros.h" + +#include +#include +#include +#include +#include +#include + +// Declare OnlineDQMDigiAD class +class OnlineDQMDigiAD { +public: + /** + * @brief Constructor + * @param modelFilepath: path to the .onnx file + * @param Backend: backend selection cpu or gpu + */ + OnlineDQMDigiAD(const std::string model_system_name, + const std::string &modelFilepath, + cms::Ort::Backend backend = cms::Ort::Backend::cpu); + + /** + * @brief check whether onnx model integration is added for the selected hcal system + */ + void IsModelExist(std::string hcal_subsystem_name); + + /** + * @brief Resets ml model memory states to default and function needs to be called when new collision run starts + */ + void InitializeState(); + + /** + * @brief Perform inference on a single image + * @param digiHcalMapTW: The input digipccupany maps in time window + * @param numEvents: The input number of events for map renormalization in time window + * @param adThr: The anomaly detection decision threshold + * @param input_model_state_memory_: The model memory states + * @param output_tensors: output arrays + * @return the list of multidimensional arrays + */ + std::vector> Inference(std::vector &digiHcalMapTW, + std::vector &numEvents, + std::vector &adThr, + std::vector &input_model_state_memory_e_0_0, + std::vector &input_model_state_memory_e_0_1, + std::vector &input_model_state_memory_e_1_0, + std::vector &input_model_state_memory_e_1_1, + std::vector &input_model_state_memory_d_0_0, + std::vector &input_model_state_memory_d_0_1, + std::vector &input_model_state_memory_d_1_0, + std::vector &input_model_state_memory_d_1_1); + /** + * @brief Perform inference on a single image + * @param digiHcal2DHist_depth_1: 2D histogram digioccupancy of the 1st depth of the hcal-hehb + * @param digiHcal2DHist_depth_2: 2D histogram digioccupancy of the 2nd depth of the hcal-hehb + * @param digiHcal2DHist_depth_3: 2D histogram digioccupancy of the 3rd depth of the hcal-hehb + * @param digiHcal2DHist_depth_4: 2D histogram digioccupancy of the 4th depth of the hcal-hehb + * @param digiHcal2DHist_depth_5: 2D histogram digioccupancy of the 5th depth of the hcal-hehb + * @param digiHcal2DHist_depth_5: 2D histogram digioccupancy of the 6th depth of the hcal-hehb + * @param digiHcal2DHist_depth_7: 2D histogram digioccupancy of the 7th depth of the hcal-hehb + * @param LS_numEvents: The input number of events for digioccupancy map renormalization + * @param flagDecisionThr: The anomaly detection decision threshold, decrease to increase sensitivity + * @return ad_model_output_vectors: the vectors of multidimensional arrays: output_data_0, output_data_1, ... + */ + std::vector> Inference_CMSSW(const std::vector> &digiHcal2DHist_depth_1, + const std::vector> &digiHcal2DHist_depth_2, + const std::vector> &digiHcal2DHist_depth_3, + const std::vector> &digiHcal2DHist_depth_4, + const std::vector> &digiHcal2DHist_depth_5, + const std::vector> &digiHcal2DHist_depth_6, + const std::vector> &digiHcal2DHist_depth_7, + const float LS_numEvents, + const float flagDecisionThr = 20); + + /** + @brief Converts 1D serialized vector output of the onnx into 3d hcal-hehp vector + @param ad_model_output_vectors: vector of 3D histogram maps the hcal-hehb, each vector output from the onnx. e.g 3d map of anomaly score and 3d map of anomaly flag or label + @param selOutputIdx: index to select of the onnx output. e.g. 5 is the anomaly score and 7 is the anomaly flag (1 is with anomaly, 0 is healthy) + @return ad_model_output_vectors: the vectors of multidimensional arrays: output_data_0, output_data_1, ... + */ + std::vector>> ONNXOutputToDQMHistMap( + const std::vector> &ad_model_output_vectors, const int selOutputIdx = 7); + +private: + // onnx session + const std::vector hcal_modeled_systems = {"he", "hb"}; + std::string hcal_subsystem_name; + std::unique_ptr ort_mSession = nullptr; + std::string model_path; // onnx model path + + // names of onnx model input vectors; do not change + const std::vector input_names = { + "input_data", + "input_data_exo", + "anomaly_std_th", + "e_rnn_hidden__layer_0_state_0", + "e_rnn_hidden__layer_0_state_1", + "e_rnn_hidden__layer_1_state_0", + "e_rnn_hidden__layer_1_state_1", + "d_rnn_hidden__layer_0_state_0", + "d_rnn_hidden__layer_0_state_1", + "d_rnn_hidden__layer_1_state_0", + "d_rnn_hidden__layer_1_state_1", + }; + + // names of onnx model outputs vectors; do not change + const std::vector output_names = { + "target_data", + "pred_data", + "pred_err_spatial", + "pred_err_window_spatial", + "pred_err_spatial_scaled", + "pred_err_window_spatial_scaled", + "pred_err_spatial_scaled_aml", + "pred_err_window_spatial_scaled_aml", + "e_rnn_hidden__layer_0_state_0_o", + "e_rnn_hidden__layer_0_state_1_o", + "e_rnn_hidden__layer_1_state_0_o", + "e_rnn_hidden__layer_1_state_1_o", + "d_rnn_hidden__layer_0_state_0_o", + "d_rnn_hidden__layer_0_state_1_o", + "d_rnn_hidden__layer_1_state_0_o", + "d_rnn_hidden__layer_1_state_1_o", + }; + + // model state network config declaration : encoder and decoder have each two lstm layers(each hold two state vectors, h0, c0) + const size_t num_state_vectors = 8; + const unsigned int model_state_inner_dim = 2; // do not change + const std::vector> model_state_layer_dims = { + {128, 32}, {128, 640}}; // do not change, encoder[layer_0, layer_1] and decoder [layer_0, layer_1] + const std::vector> model_state_layer_serialized_dims = { + {256, 64}, + {256, + 1280}}; // do not change, model_state_inner_dim*encoder[layer_0, layer_1] and model_state_inner_dim*decoder [layer_0, layer_1] + // unsigned model_state_refresh_counter = 15; // do not change for now. set due to onnx double datatype handling limitation that might cause precision error to propagate. + unsigned model_state_refresh_counter = + 1; // DQM multithread returns non-sequential LS. Hence, the model will not keep states (experimental) + + std::vector input_model_state_memory_e_0_0{std::vector(model_state_layer_serialized_dims[0][0])}; + std::vector input_model_state_memory_e_0_1{std::vector(model_state_layer_serialized_dims[0][0])}; + std::vector input_model_state_memory_e_1_0{std::vector(model_state_layer_serialized_dims[0][1])}; + std::vector input_model_state_memory_e_1_1{std::vector(model_state_layer_serialized_dims[0][1])}; + std::vector input_model_state_memory_d_0_0{std::vector(model_state_layer_serialized_dims[1][0])}; + std::vector input_model_state_memory_d_0_1{std::vector(model_state_layer_serialized_dims[1][0])}; + std::vector input_model_state_memory_d_1_0{std::vector(model_state_layer_serialized_dims[1][1])}; + std::vector input_model_state_memory_d_1_1{std::vector(model_state_layer_serialized_dims[1][1])}; + + // input and outputs + int64_t batch_size = 1; // number maps to be evaluated at once, a single time-window + std::vector> input_values, output_values; + std::vector> input_shapes; + + /** + * @brief Serializes 2d vectors into 1d + */ + std::vector Serialize2DVector(const std::vector> &input_2d_vec); + + /** + * @brief Converts serialized 1d vectors into 2d + */ + std::vector> Map1DTo2DVector(const std::vector &input_1d_vec, const int numSplits); + + /** + * @brief Prepares model input serialized dqm histogram from 2D histogram inputs from the cmssw + * @param digiHcal2DHist_depth_all: 3D vector (depth[ieta[iphi]]) of combined 2D histogram digioccupancy of the any depth of the hcal + */ + std::vector PrepareONNXDQMMapVectors(std::vector>> &digiHcal2DHist_depth_all); +}; + +#endif // OnlineDQMDigiAD_cmssw_H_ diff --git a/DQM/Integration/python/clients/hcal_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/hcal_dqm_sourceclient-live_cfg.py index 78670f84c42a0..19e5cb43ebb70 100644 --- a/DQM/Integration/python/clients/hcal_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/hcal_dqm_sourceclient-live_cfg.py @@ -151,6 +151,7 @@ #process.load('DQM.HcalTasks.QIE11Task') # 2018: integrate QIE11Task into DigiTask process.load('DQM.HcalTasks.HcalOnlineHarvesting') process.load('DQM.HcalTasks.HcalQualityTests') +process.load('DQM.HcalTasks.hcalMLTask_cfi') #------------------------------------- # For Debugginb @@ -192,6 +193,7 @@ #+process.qie11Task #ZDC to be removed after 2018 PbPb run +process.zdcQIE10Task + +process.hcalMLTask ) if isHeavyIon: