From 3987153b53188870c193d558f23e833634940a08 Mon Sep 17 00:00:00 2001
From: Joosep Pata <joosep.pata@gmail.com>
Date: Thu, 23 Sep 2021 15:39:10 +0300
Subject: [PATCH] mlpf v2

---
 .../RecoParticleFlow_EventContent_cff.py      |   5 -
 .../python/RecoParticleFlow_cff.py            |  14 +-
 RecoParticleFlow/PFProducer/BuildFile.xml     |   1 +
 .../PFProducer/interface/MLPFModel.h          |  44 +++--
 .../PFProducer/plugins/BuildFile.xml          |   2 +-
 .../PFProducer/plugins/MLPFProducer.cc        | 164 +++++++++++-------
 .../python/mlpf_EventContent_cff.py           |   8 -
 RecoParticleFlow/PFProducer/src/MLPFModel.cc  | 113 ++++++++++--
 .../plugins/PFAnalysisNtuplizer.cc            |  73 +++++++-
 .../test/pfanalysis_ntuple.py                 |   2 +-
 10 files changed, 300 insertions(+), 126 deletions(-)
 delete mode 100644 RecoParticleFlow/PFProducer/python/mlpf_EventContent_cff.py
diff --git a/RecoParticleFlow/Configuration/python/RecoParticleFlow_EventContent_cff.py b/RecoParticleFlow/Configuration/python/RecoParticleFlow_EventContent_cff.py
index 2bd2008a44be8..0c43c82cc25a6 100644
--- a/RecoParticleFlow/Configuration/python/RecoParticleFlow_EventContent_cff.py
+++ b/RecoParticleFlow/Configuration/python/RecoParticleFlow_EventContent_cff.py
@@ -81,8 +81,3 @@
                                                             'keep recoPFRecHits_particleFlowRecHitHGC__*',
                                                             'keep *_simPFProducer_*_*'])
 
-from Configuration.ProcessModifiers.mlpf_cff import mlpf
-from RecoParticleFlow.PFProducer.mlpf_EventContent_cff import MLPF_RECO
-
-mlpf.toModify(RecoParticleFlowRECO,
-    outputCommands = RecoParticleFlowRECO.outputCommands + MLPF_RECO.outputCommands)
diff --git a/RecoParticleFlow/Configuration/python/RecoParticleFlow_cff.py b/RecoParticleFlow/Configuration/python/RecoParticleFlow_cff.py
index 7047df0704ae5..9331eeecdd3c4 100644
--- a/RecoParticleFlow/Configuration/python/RecoParticleFlow_cff.py
+++ b/RecoParticleFlow/Configuration/python/RecoParticleFlow_cff.py
@@ -102,15 +102,6 @@
     e.toModify(pfNoPileUp, enable = False)
     e.toModify(pfPileUp, enable = False)
 
-#
-# for MLPF
-from Configuration.ProcessModifiers.mlpf_cff import mlpf
-from RecoParticleFlow.PFProducer.mlpfProducer_cfi import mlpfProducer
-
-_mlpfTask = cms.Task(mlpfProducer, particleFlowRecoTask.copy())
-
-mlpf.toReplaceWith(particleFlowRecoTask, _mlpfTask)
-
 #
 # switch from pfTICL to simPF
 def _findIndicesByModule(process,name):
@@ -144,3 +135,8 @@ def replaceTICLwithSimPF(process):
     )
 
     return process
+
+# for MLPF
+from Configuration.ProcessModifiers.mlpf_cff import mlpf
+from RecoParticleFlow.PFProducer.mlpfProducer_cfi import mlpfProducer
+mlpf.toReplaceWith(particleFlowTmp, mlpfProducer)
diff --git a/RecoParticleFlow/PFProducer/BuildFile.xml b/RecoParticleFlow/PFProducer/BuildFile.xml
index 2eeaf4a6a12b8..98351dbc8143f 100644
--- a/RecoParticleFlow/PFProducer/BuildFile.xml
+++ b/RecoParticleFlow/PFProducer/BuildFile.xml
@@ -16,6 +16,7 @@
 <use name="boost"/>
 <use name="clhep"/>
 <use name="rootmath"/>
+<use name="onnxruntime"/>
 <export>
   <lib name="1"/>
 </export>
diff --git a/RecoParticleFlow/PFProducer/interface/MLPFModel.h b/RecoParticleFlow/PFProducer/interface/MLPFModel.h
index 9c850859981ef..f1105cc1ef4fc 100644
--- a/RecoParticleFlow/PFProducer/interface/MLPFModel.h
+++ b/RecoParticleFlow/PFProducer/interface/MLPFModel.h
@@ -7,31 +7,37 @@
 
 namespace reco::mlpf {
   //The model takes the following number of features for each input PFElement
-  static constexpr unsigned int NUM_ELEMENT_FEATURES = 15;
+  static constexpr unsigned int NUM_ELEMENT_FEATURES = 18;
+  static constexpr unsigned int NUM_OUTPUT_FEATURES = 14;
 
   //these are defined at model creation time and set the random LSH codebook size
-  static constexpr int NUM_MAX_ELEMENTS_BATCH = 20000;
-  static constexpr int LSH_BIN_SIZE = 100;
+  static constexpr int LSH_BIN_SIZE = 160;
+  static constexpr int NUM_MAX_ELEMENTS_BATCH = 200 * LSH_BIN_SIZE;
 
   //In CPU mode, we only want to evaluate each event separately
   static constexpr int BATCH_SIZE = 1;
 
   //The model has 12 outputs for each particle:
   // out[0-7]: particle classification logits
-  // out[8]: regressed eta
-  // out[9]: regressed phi
-  // out[10]: regressed energy
-  // out[11]: regressed charge logit
-  static constexpr unsigned int NUM_OUTPUTS = 12;
-  static constexpr unsigned int NUM_CLASS = 7;
-  static constexpr unsigned int IDX_ETA = 8;
-  static constexpr unsigned int IDX_PHI = 9;
-  static constexpr unsigned int IDX_ENERGY = 10;
-  static constexpr unsigned int IDX_CHARGE = 11;
+  // out[8]: regressed charge
+  // out[9]: regressed pt
+  // out[10]: regressed eta
+  // out[11]: regressed sin phi
+  // out[12]: regressed cos phi
+  // out[13]: regressed energy
+  static constexpr unsigned int IDX_CLASS = 7;
+
+  static constexpr unsigned int IDX_CHARGE = 8;
+
+  static constexpr unsigned int IDX_PT = 9;
+  static constexpr unsigned int IDX_ETA = 10;
+  static constexpr unsigned int IDX_SIN_PHI = 11;
+  static constexpr unsigned int IDX_COS_PHI = 12;
+  static constexpr unsigned int IDX_ENERGY = 13;
 
   //index [0, N_pdgids) -> PDGID
   //this maps the absolute values of the predicted PDGIDs to an array of ascending indices
-  static const std::vector<int> pdgid_encoding = {0, 1, 2, 11, 13, 22, 130, 211};
+  static const std::vector<int> pdgid_encoding = {0, 211, 130, 1, 2, 22, 11, 13};
 
   //PFElement::type -> index [0, N_types)
   //this maps the type of the PFElement to an ascending index that is used by the model to distinguish between different elements
@@ -55,7 +61,13 @@ namespace reco::mlpf {
 
   int argMax(std::vector<float> const& vec);
 
-  reco::PFCandidate makeCandidate(int pred_pid, int pred_charge, float pred_e, float pred_eta, float pred_phi);
+  reco::PFCandidate makeCandidate(int pred_pid,
+                                  int pred_charge,
+                                  float pred_pt,
+                                  float pred_eta,
+                                  float pred_sin_phi,
+                                  float pred_cos_phi,
+                                  float pred_e);
 
   const std::vector<const reco::PFBlockElement*> getPFElements(const reco::PFBlockCollection& blocks);
 
@@ -64,4 +76,4 @@ namespace reco::mlpf {
                         size_t ielem_originator);
 };  // namespace reco::mlpf
 
-#endif
\ No newline at end of file
+#endif
diff --git a/RecoParticleFlow/PFProducer/plugins/BuildFile.xml b/RecoParticleFlow/PFProducer/plugins/BuildFile.xml
index ddf9c80ffe168..6c4c7c4a9bcb8 100644
--- a/RecoParticleFlow/PFProducer/plugins/BuildFile.xml
+++ b/RecoParticleFlow/PFProducer/plugins/BuildFile.xml
@@ -80,6 +80,6 @@
   <use name="RecoParticleFlow/PFProducer"/>
   <use name="RecoParticleFlow/PFTracking"/>
   <use name="RecoEcal/EgammaCoreTools"/>
-  <use name="PhysicsTools/TensorFlow"/>
+  <use name="PhysicsTools/ONNXRuntime"/>
   <flags EDM_PLUGIN="1"/>
 </library>
diff --git a/RecoParticleFlow/PFProducer/plugins/MLPFProducer.cc b/RecoParticleFlow/PFProducer/plugins/MLPFProducer.cc
index add3f1e27ddd1..0a926a06c61df 100644
--- a/RecoParticleFlow/PFProducer/plugins/MLPFProducer.cc
+++ b/RecoParticleFlow/PFProducer/plugins/MLPFProducer.cc
@@ -4,36 +4,35 @@
 #include "FWCore/Framework/interface/MakerMacros.h"
 
 #include "DataFormats/ParticleFlowCandidate/interface/PFCandidate.h"
-#include "PhysicsTools/TensorFlow/interface/TensorFlow.h"
+#include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h"
 #include "RecoParticleFlow/PFProducer/interface/MLPFModel.h"
 
-struct MLPFCache {
-  const tensorflow::GraphDef* graph_def;
-};
+#include "DataFormats/ParticleFlowReco/interface/PFBlockElementTrack.h"
+
+using namespace cms::Ort;
+
+//use this to switch on detailed print statements in MLPF
+//#define MLPF_DEBUG
 
-class MLPFProducer : public edm::stream::EDProducer<edm::GlobalCache<MLPFCache> > {
+class MLPFProducer : public edm::stream::EDProducer<edm::GlobalCache<ONNXRuntime>> {
 public:
-  explicit MLPFProducer(const edm::ParameterSet&, const MLPFCache*);
+  explicit MLPFProducer(const edm::ParameterSet&, const ONNXRuntime*);
+
   void produce(edm::Event& event, const edm::EventSetup& setup) override;
   static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
 
   // static methods for handling the global cache
-  static std::unique_ptr<MLPFCache> initializeGlobalCache(const edm::ParameterSet&);
-  static void globalEndJob(MLPFCache*);
+  static std::unique_ptr<ONNXRuntime> initializeGlobalCache(const edm::ParameterSet&);
+  static void globalEndJob(const ONNXRuntime*);
 
 private:
   const edm::EDPutTokenT<reco::PFCandidateCollection> pfCandidatesPutToken_;
   const edm::EDGetTokenT<reco::PFBlockCollection> inputTagBlocks_;
-  const std::string model_path_;
-  tensorflow::Session* session_;
 };
 
-MLPFProducer::MLPFProducer(const edm::ParameterSet& cfg, const MLPFCache* cache)
+MLPFProducer::MLPFProducer(const edm::ParameterSet& cfg, const ONNXRuntime* cache)
     : pfCandidatesPutToken_{produces<reco::PFCandidateCollection>()},
-      inputTagBlocks_(consumes<reco::PFBlockCollection>(cfg.getParameter<edm::InputTag>("src"))),
-      model_path_(cfg.getParameter<std::string>("model_path")) {
-  session_ = tensorflow::createSession(cache->graph_def);
-}
+      inputTagBlocks_(consumes<reco::PFBlockCollection>(cfg.getParameter<edm::InputTag>("src"))) {}
 
 void MLPFProducer::produce(edm::Event& event, const edm::EventSetup& setup) {
   using namespace reco::mlpf;
@@ -41,20 +40,34 @@ void MLPFProducer::produce(edm::Event& event, const edm::EventSetup& setup) {
   const auto& blocks = event.get(inputTagBlocks_);
   const auto& all_elements = getPFElements(blocks);
 
-  const long long int num_elements_total = all_elements.size();
+  std::vector<const reco::PFBlockElement*> selected_elements;
+  unsigned int num_elements_total = 0;
+  for (const auto* pelem : all_elements) {
+    if (pelem->type() == reco::PFBlockElement::PS1 || pelem->type() == reco::PFBlockElement::PS2) {
+      continue;
+    }
+    num_elements_total += 1;
+    selected_elements.push_back(pelem);
+  }
+  assert(num_elements_total < NUM_MAX_ELEMENTS_BATCH);
 
   //tensor size must be a multiple of the bin size and larger than the number of elements
-  const auto tensor_size = LSH_BIN_SIZE * (num_elements_total / LSH_BIN_SIZE + 1);
+  const auto tensor_size = LSH_BIN_SIZE * std::max(2u, (num_elements_total / LSH_BIN_SIZE + 1));
   assert(tensor_size <= NUM_MAX_ELEMENTS_BATCH);
+  assert(tensor_size % LSH_BIN_SIZE == 0);
 
-  //Create the input tensor
-  tensorflow::TensorShape shape({BATCH_SIZE, tensor_size, NUM_ELEMENT_FEATURES});
-  tensorflow::Tensor input(tensorflow::DT_FLOAT, shape);
-  input.flat<float>().setZero();
+#ifdef MLPF_DEBUG
+  std::cout << "tensor_size=" << tensor_size << std::endl;
+#endif
 
-  //Fill the input tensor
+  //Fill the input tensor (batch, elems, features) = (1, tensor_size, NUM_ELEMENT_FEATURES)
+  std::vector<std::vector<float>> inputs(1, std::vector<float>(NUM_ELEMENT_FEATURES * tensor_size, 0.0));
   unsigned int ielem = 0;
-  for (const auto* pelem : all_elements) {
+  for (const auto* pelem : selected_elements) {
+    if (ielem > tensor_size) {
+      continue;
+    }
+
     const auto& elem = *pelem;
 
     //prepare the input array from the PFElement
@@ -62,77 +75,94 @@ void MLPFProducer::produce(edm::Event& event, const edm::EventSetup& setup) {
 
     //copy features to the input array
     for (unsigned int iprop = 0; iprop < NUM_ELEMENT_FEATURES; iprop++) {
-      input.tensor<float, 3>()(0, ielem, iprop) = normalize(props[iprop]);
+      inputs[0][ielem * NUM_ELEMENT_FEATURES + iprop] = normalize(props[iprop]);
     }
     ielem += 1;
   }
 
-  //TF model input and output tensor names
-  const tensorflow::NamedTensorList input_list = {{"x:0", input}};
-  const std::vector<std::string> output_names = {"Identity:0"};
-
-  //Prepare the output tensor
-  std::vector<tensorflow::Tensor> outputs;
-
   //run the GNN inference, given the inputs and the output.
-  //Note that the GNN enables information transfer between the input PFElements,
-  //such that the output ML-PFCandidates are in general combinations of the input PFElements, in the form of
-  //y_out = Adj.x_in, where x_in is input matrix (num_elem, NUM_ELEMENT_FEATURES), y_out is the output matrix (num_elem, NUM_OUTPUT_FEATURES)
-  //and Adj is an adjacency matrix between the elements that is constructed on the fly during model inference.
-  tensorflow::run(session_, input_list, output_names, &outputs);
-
-  //process the output tensor to ML-PFCandidates.
-  //The output can contain up to num_elem particles, with predicted PDGID=0 corresponding to no particles predicted.
-  const auto out_arr = outputs[0].tensor<float, 3>();
+  const auto& outputs = globalCache()->run({"x:0"}, inputs, {{1, tensor_size, NUM_ELEMENT_FEATURES}});
+  const auto& output = outputs[0];
+  assert(output.size() == tensor_size * NUM_OUTPUT_FEATURES);
 
   std::vector<reco::PFCandidate> pOutputCandidateCollection;
-  for (unsigned int ielem = 0; ielem < all_elements.size(); ielem++) {
-    //get the coefficients in the output corresponding to the class probabilities (raw logits)
-    std::vector<float> pred_id_logits;
-    for (unsigned int idx_id = 0; idx_id <= NUM_CLASS; idx_id++) {
-      pred_id_logits.push_back(out_arr(0, ielem, idx_id));
+  for (size_t ielem = 0; ielem < num_elements_total; ielem++) {
+    std::vector<float> pred_id_probas(IDX_CLASS + 1, 0.0);
+    const reco::PFBlockElement* elem = selected_elements[ielem];
+
+    for (unsigned int idx_id = 0; idx_id <= IDX_CLASS; idx_id++) {
+      auto pred_proba = output[ielem * NUM_OUTPUT_FEATURES + idx_id];
+      assert(!std::isnan(pred_proba));
+      pred_id_probas[idx_id] = pred_proba;
     }
 
+    auto imax = argMax(pred_id_probas);
+
     //get the most probable class PDGID
-    int pred_pid = pdgid_encoding[argMax(pred_id_logits)];
+    int pred_pid = pdgid_encoding[imax];
 
-    //get the predicted momentum components
-    float pred_eta = out_arr(0, ielem, IDX_ETA);
-    float pred_phi = out_arr(0, ielem, IDX_PHI);
-    float pred_charge = out_arr(0, ielem, IDX_CHARGE);
-    float pred_e = out_arr(0, ielem, IDX_ENERGY);
+#ifdef MLPF_DEBUG
+    std::cout << "ielem=" << ielem << " inputs:";
+    for (unsigned int iprop = 0; iprop < NUM_ELEMENT_FEATURES; iprop++) {
+      std::cout << iprop << "=" << inputs[0][ielem * NUM_ELEMENT_FEATURES + iprop] << " ";
+    }
+    std::cout << std::endl;
+    std::cout << "ielem=" << ielem << " pred: pid=" << pred_pid << std::endl;
+#endif
 
     //a particle was predicted for this PFElement, otherwise it was a spectator
     if (pred_pid != 0) {
-      auto cand = makeCandidate(pred_pid, pred_charge, pred_e, pred_eta, pred_phi);
-      setCandidateRefs(cand, all_elements, ielem);
+      //muons and charged hadrons should only come from tracks, otherwise we won't have track references to pass downstream
+      if (((pred_pid == 13) || (pred_pid == 211)) && elem->type() != reco::PFBlockElement::TRACK) {
+        pred_pid = 130;
+      }
+
+      if (elem->type() == reco::PFBlockElement::TRACK) {
+        const auto* eltTrack = dynamic_cast<const reco::PFBlockElementTrack*>(elem);
+
+        //a track with no muon ref should not produce a muon candidate, instead we interpret it as a charged hadron
+        if (pred_pid == 13 && eltTrack->muonRef().isNull()) {
+          pred_pid = 211;
+        }
+
+        //tracks from displaced vertices need reference debugging downstream as well, so we just treat them as neutrals for the moment
+        if ((pred_pid == 211) && (eltTrack->isLinkedToDisplacedVertex())) {
+          pred_pid = 130;
+        }
+      }
+
+      //get the predicted momentum components
+      float pred_pt = output[ielem * NUM_OUTPUT_FEATURES + IDX_PT];
+      float pred_eta = output[ielem * NUM_OUTPUT_FEATURES + IDX_ETA];
+      float pred_sin_phi = output[ielem * NUM_OUTPUT_FEATURES + IDX_SIN_PHI];
+      float pred_cos_phi = output[ielem * NUM_OUTPUT_FEATURES + IDX_COS_PHI];
+      float pred_e = output[ielem * NUM_OUTPUT_FEATURES + IDX_ENERGY];
+      float pred_charge = output[ielem * NUM_OUTPUT_FEATURES + IDX_CHARGE];
+
+      auto cand = makeCandidate(pred_pid, pred_charge, pred_pt, pred_eta, pred_sin_phi, pred_cos_phi, pred_e);
+      setCandidateRefs(cand, selected_elements, ielem);
       pOutputCandidateCollection.push_back(cand);
+
+#ifdef MLPF_DEBUG
+      std::cout << "ielem=" << ielem << " cand: pid=" << cand.pdgId() << " E=" << cand.energy() << " pt=" << cand.pt()
+                << " eta=" << cand.eta() << " phi=" << cand.phi() << " charge=" << cand.charge() << std::endl;
+#endif
     }
   }  //loop over PFElements
 
   event.emplace(pfCandidatesPutToken_, pOutputCandidateCollection);
 }
 
-std::unique_ptr<MLPFCache> MLPFProducer::initializeGlobalCache(const edm::ParameterSet& params) {
-  // this method is supposed to create, initialize and return a MLPFCache instance
-  std::unique_ptr<MLPFCache> cache = std::make_unique<MLPFCache>();
-
-  //load the frozen TF graph of the GNN model
-  std::string path = params.getParameter<std::string>("model_path");
-  auto fullPath = edm::FileInPath(path).fullPath();
-  LogDebug("MLPFProducer") << "Initializing MLPF model from " << fullPath;
-
-  cache->graph_def = tensorflow::loadGraphDef(fullPath);
-
-  return cache;
+std::unique_ptr<ONNXRuntime> MLPFProducer::initializeGlobalCache(const edm::ParameterSet& params) {
+  return std::make_unique<ONNXRuntime>(params.getParameter<edm::FileInPath>("model_path").fullPath());
 }
 
-void MLPFProducer::globalEndJob(MLPFCache* cache) { delete cache->graph_def; }
+void MLPFProducer::globalEndJob(const ONNXRuntime* cache) {}
 
 void MLPFProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
   edm::ParameterSetDescription desc;
   desc.add<edm::InputTag>("src", edm::InputTag("particleFlowBlock"));
-  desc.add<std::string>("model_path", "RecoParticleFlow/PFProducer/data/mlpf/mlpf_2020_11_04.pb");
+  desc.add<edm::FileInPath>("model_path", edm::FileInPath("RecoParticleFlow/PFProducer/data/mlpf/mlpf_2021_09_20.onnx"));
   descriptions.addWithDefaultLabel(desc);
 }
 
diff --git a/RecoParticleFlow/PFProducer/python/mlpf_EventContent_cff.py b/RecoParticleFlow/PFProducer/python/mlpf_EventContent_cff.py
deleted file mode 100644
index 0e91687fa40a6..0000000000000
--- a/RecoParticleFlow/PFProducer/python/mlpf_EventContent_cff.py
+++ /dev/null
@@ -1,8 +0,0 @@
-import FWCore.ParameterSet.Config as cms
-
-MLPF_RECO = cms.PSet(
-    outputCommands = cms.untracked.vstring(
-        'keep recoPFCandidates_mlpfProducer_*_*',
-        )
-)
-
diff --git a/RecoParticleFlow/PFProducer/src/MLPFModel.cc b/RecoParticleFlow/PFProducer/src/MLPFModel.cc
index 6ca1b61ff0a69..8f61d25b9a29a 100644
--- a/RecoParticleFlow/PFProducer/src/MLPFModel.cc
+++ b/RecoParticleFlow/PFProducer/src/MLPFModel.cc
@@ -8,6 +8,8 @@
 #include "DataFormats/ParticleFlowReco/interface/PFBlockElementTrack.h"
 #include "DataFormats/ParticleFlowReco/interface/PFBlockElementBrem.h"
 #include "DataFormats/ParticleFlowReco/interface/PFBlockElementCluster.h"
+#include "DataFormats/EgammaReco/interface/SuperCluster.h"
+#include "DataFormats/EgammaReco/interface/ElectronSeed.h"
 
 namespace reco::mlpf {
 
@@ -34,6 +36,9 @@ namespace reco::mlpf {
     float depth = 0;
     float muon_dt_hits = 0.0;
     float muon_csc_hits = 0.0;
+    float muon_type = 0.0;
+    float gsf_brem_sc_energy = 0.0;
+    float num_hits = 0.0;
 
     if (type == reco::PFBlockElement::TRACK) {
       const auto& matched_pftrack = orig.trackRefPF();
@@ -58,6 +63,7 @@ namespace reco::mlpf {
       phi = ref->phi();
       energy = ref->p();
       charge = ref->charge();
+      num_hits = ref->recHitsSize();
 
       reco::MuonRef muonRef = orig.muonRef();
       if (muonRef.isNonnull()) {
@@ -66,6 +72,7 @@ namespace reco::mlpf {
           muon_dt_hits = standAloneMu->hitPattern().numberOfValidMuonDTHits();
           muon_csc_hits = standAloneMu->hitPattern().numberOfValidMuonCSCHits();
         }
+        muon_type = muonRef->type();
       }
 
     } else if (type == reco::PFBlockElement::BREM) {
@@ -84,6 +91,18 @@ namespace reco::mlpf {
         trajpoint = orig2->indTrajPoint();
         charge = ref->charge();
       }
+
+      const auto& gsfextraref = ref->extra();
+      if (gsfextraref.isAvailable() && gsfextraref->seedRef().isAvailable()) {
+        reco::ElectronSeedRef seedref = gsfextraref->seedRef().castTo<reco::ElectronSeedRef>();
+        if (seedref.isAvailable() && seedref->isEcalDriven()) {
+          reco::SuperClusterRef scref = seedref->caloCluster().castTo<reco::SuperClusterRef>();
+          if (scref.isNonnull()) {
+            gsf_brem_sc_energy = scref->energy();
+          }
+        }
+      };
+
     } else if (type == reco::PFBlockElement::GSF) {
       //requires to keep GsfPFRecTracks
       const auto* orig2 = (const reco::PFBlockElementGsfTrack*)&orig;
@@ -95,9 +114,29 @@ namespace reco::mlpf {
       eta = vec.eta();
       phi = vec.phi();
       energy = vec.energy();
+
+      const auto& vec2 = orig2->Pout();
+      eta_ecal = vec2.eta();
+      phi_ecal = vec2.phi();
+
       if (!orig2->GsftrackRefPF().isNull()) {
         charge = orig2->GsftrackRefPF()->charge();
+        num_hits = orig2->GsftrackRefPF()->PFRecBrem().size();
       }
+
+      const auto& ref = orig2->GsftrackRef();
+
+      const auto& gsfextraref = ref->extra();
+      if (gsfextraref.isAvailable() && gsfextraref->seedRef().isAvailable()) {
+        reco::ElectronSeedRef seedref = gsfextraref->seedRef().castTo<reco::ElectronSeedRef>();
+        if (seedref.isAvailable() && seedref->isEcalDriven()) {
+          reco::SuperClusterRef scref = seedref->caloCluster().castTo<reco::SuperClusterRef>();
+          if (scref.isNonnull()) {
+            gsf_brem_sc_energy = scref->energy();
+          }
+        }
+      };
+
     } else if (type == reco::PFBlockElement::ECAL || type == reco::PFBlockElement::PS1 ||
                type == reco::PFBlockElement::PS2 || type == reco::PFBlockElement::HCAL ||
                type == reco::PFBlockElement::HO || type == reco::PFBlockElement::HFHAD ||
@@ -112,6 +151,7 @@ namespace reco::mlpf {
         energy = ref->energy();
         layer = ref->layer();
         depth = ref->depth();
+        num_hits = ref->recHitFractions().size();
       }
     } else if (type == reco::PFBlockElement::SC) {
       const auto& clref = ((const reco::PFBlockElementSuperCluster*)&orig)->superClusterRef();
@@ -122,6 +162,7 @@ namespace reco::mlpf {
         py = clref->position().y();
         pz = clref->position().z();
         energy = clref->energy();
+        num_hits = clref->clustersSize();
       }
     }
 
@@ -142,7 +183,10 @@ namespace reco::mlpf {
                                                      eta_hcal,
                                                      phi_hcal,
                                                      muon_dt_hits,
-                                                     muon_csc_hits}});
+                                                     muon_csc_hits,
+                                                     muon_type,
+                                                     gsf_brem_sc_energy,
+                                                     num_hits}});
   }
 
   //to make sure DNN inputs are within numerical bounds, use the same in training
@@ -159,12 +203,14 @@ namespace reco::mlpf {
     return static_cast<int>(std::distance(vec.begin(), max_element(vec.begin(), vec.end())));
   }
 
-  reco::PFCandidate makeCandidate(int pred_pid, int pred_charge, float pred_e, float pred_eta, float pred_phi) {
-    pred_phi = angle0to2pi::make0To2pi(pred_phi);
-
-    //currently, set the pT from a massless approximation.
-    //later versions of the model may predict predict both the energy and pT of the particle
-    float pred_pt = pred_e / cosh(pred_eta);
+  reco::PFCandidate makeCandidate(int pred_pid,
+                                  int pred_charge,
+                                  float pred_pt,
+                                  float pred_eta,
+                                  float pred_sin_phi,
+                                  float pred_cos_phi,
+                                  float pred_e) {
+    float pred_phi = std::atan2(pred_sin_phi, pred_cos_phi);
 
     //set the charge to +1 or -1 for PFCandidates that are charged, according to the sign of the predicted charge
     reco::PFCandidate::Charge charge = 0;
@@ -174,10 +220,25 @@ namespace reco::mlpf {
 
     math::PtEtaPhiELorentzVectorD p4(pred_pt, pred_eta, pred_phi, pred_e);
 
-    reco::PFCandidate cand(
-        0, math::XYZTLorentzVector(p4.X(), p4.Y(), p4.Z(), p4.E()), reco::PFCandidate::ParticleType(0));
-    cand.setPdgId(pred_pid);
-    cand.setCharge(charge);
+    reco::PFCandidate::ParticleType particleType(reco::PFCandidate::X);
+    if (pred_pid == 211)
+      particleType = reco::PFCandidate::h;
+    else if (pred_pid == 130)
+      particleType = reco::PFCandidate::h0;
+    else if (pred_pid == 22)
+      particleType = reco::PFCandidate::gamma;
+    else if (pred_pid == 11)
+      particleType = reco::PFCandidate::e;
+    else if (pred_pid == 13)
+      particleType = reco::PFCandidate::mu;
+    else if (pred_pid == 1)
+      particleType = reco::PFCandidate::h_HF;
+    else if (pred_pid == 2)
+      particleType = reco::PFCandidate::egamma_HF;
+
+    reco::PFCandidate cand(charge, math::XYZTLorentzVector(p4.X(), p4.Y(), p4.Z(), p4.E()), particleType);
+    //cand.setPdgId(pred_pid);
+    //cand.setCharge(charge);
 
     return cand;
   }
@@ -201,19 +262,35 @@ namespace reco::mlpf {
     return all_elements;
   }
 
+  //   [4] Calling method for module JetTracksAssociatorExplicit/'ak4JetTracksAssociatorExplicitAll' -> Ref is inconsistent with RefVectorid = (3:3546) should be (3:3559)
+  //   [6] Calling method for module MuonProducer/'muons' -> muon::isTightMuon
   void setCandidateRefs(reco::PFCandidate& cand,
                         const std::vector<const reco::PFBlockElement*> elems,
                         size_t ielem_originator) {
     const reco::PFBlockElement* elem = elems[ielem_originator];
+
     //set the track ref in case the originating element was a track
-    if (elem->type() == reco::PFBlockElement::TRACK && cand.charge() != 0 && elem->trackRef().isNonnull()) {
-      cand.setTrackRef(elem->trackRef());
+    if (std::abs(cand.pdgId()) == 211 && elem->type() == reco::PFBlockElement::TRACK && elem->trackRef().isNonnull()) {
+      const auto* eltTrack = dynamic_cast<const reco::PFBlockElementTrack*>(elem);
+      cand.setTrackRef(eltTrack->trackRef());
+      cand.setVertex(eltTrack->trackRef()->vertex());
+      cand.setPositionAtECALEntrance(eltTrack->positionAtECALEntrance());
+    }
 
-      //set the muon ref in case the originator was a muon
-      const auto& muonref = elem->muonRef();
-      if (muonref.isNonnull()) {
-        cand.setMuonRef(muonref);
-      }
+    //set the muon ref
+    if (std::abs(cand.pdgId()) == 13) {
+      const auto* eltTrack = dynamic_cast<const reco::PFBlockElementTrack*>(elem);
+      const auto& muonRef = eltTrack->muonRef();
+      cand.setTrackRef(muonRef->track());
+      cand.setMuonTrackType(muonRef->muonBestTrackType());
+      cand.setVertex(muonRef->track()->vertex());
+      cand.setMuonRef(muonRef);
+    }
+
+    if (std::abs(cand.pdgId()) == 11 && elem->type() == reco::PFBlockElement::GSF) {
+      const auto* eltTrack = dynamic_cast<const reco::PFBlockElementGsfTrack*>(elem);
+      const auto& ref = eltTrack->GsftrackRef();
+      cand.setGsfTrackRef(ref);
     }
   }
 
diff --git a/Validation/RecoParticleFlow/plugins/PFAnalysisNtuplizer.cc b/Validation/RecoParticleFlow/plugins/PFAnalysisNtuplizer.cc
index 3e3341b4408c4..e9f76bea04616 100644
--- a/Validation/RecoParticleFlow/plugins/PFAnalysisNtuplizer.cc
+++ b/Validation/RecoParticleFlow/plugins/PFAnalysisNtuplizer.cc
@@ -33,6 +33,8 @@
 #include "SimDataFormats/TrackingAnalysis/interface/TrackingParticle.h"
 #include "SimDataFormats/Associations/interface/TrackAssociation.h"
 #include "DataFormats/ParticleFlowReco/interface/PFBlock.h"
+#include "DataFormats/EgammaReco/interface/SuperCluster.h"
+#include "DataFormats/EgammaReco/interface/ElectronSeed.h"
 
 #include "DataFormats/Math/interface/deltaPhi.h"
 #include "DataFormats/TrackReco/interface/Track.h"
@@ -232,6 +234,7 @@ class PFAnalysis : public edm::one::EDAnalyzer<edm::one::WatchRuns, edm::one::Sh
   vector<float> element_eta_;
   vector<float> element_phi_;
   vector<float> element_energy_;
+  vector<float> element_corr_energy_;
   vector<float> element_eta_ecal_;
   vector<float> element_phi_ecal_;
   vector<float> element_eta_hcal_;
@@ -243,6 +246,10 @@ class PFAnalysis : public edm::one::EDAnalyzer<edm::one::WatchRuns, edm::one::Sh
   vector<float> element_trajpoint_;
   vector<float> element_muon_dt_hits_;
   vector<float> element_muon_csc_hits_;
+  vector<float> element_muon_type_;
+  vector<float> element_cluster_flags_;
+  vector<float> element_gsf_electronseed_trkorecal_;
+  vector<float> element_num_hits_;
 
   vector<int> element_distance_i_;
   vector<int> element_distance_j_;
@@ -384,6 +391,7 @@ PFAnalysis::PFAnalysis(const edm::ParameterSet& iConfig) {
   t_->Branch("element_eta", &element_eta_);
   t_->Branch("element_phi", &element_phi_);
   t_->Branch("element_energy", &element_energy_);
+  t_->Branch("element_corr_energy", &element_corr_energy_);
   t_->Branch("element_eta_ecal", &element_eta_ecal_);
   t_->Branch("element_phi_ecal", &element_phi_ecal_);
   t_->Branch("element_eta_hcal", &element_eta_hcal_);
@@ -395,6 +403,10 @@ PFAnalysis::PFAnalysis(const edm::ParameterSet& iConfig) {
   t_->Branch("element_trajpoint", &element_trajpoint_);
   t_->Branch("element_muon_dt_hits", &element_muon_dt_hits_);
   t_->Branch("element_muon_csc_hits", &element_muon_csc_hits_);
+  t_->Branch("element_muon_type", &element_muon_type_);
+  t_->Branch("element_cluster_flags", &element_cluster_flags_);
+  t_->Branch("element_gsf_electronseed_trkorecal", &element_gsf_electronseed_trkorecal_);
+  t_->Branch("element_num_hits", &element_num_hits_);
 
   //Distance matrix between PF elements
   t_->Branch("element_distance_i", &element_distance_i_);
@@ -514,6 +526,7 @@ void PFAnalysis::clearVariables() {
   element_eta_.clear();
   element_phi_.clear();
   element_energy_.clear();
+  element_corr_energy_.clear();
   element_eta_ecal_.clear();
   element_phi_ecal_.clear();
   element_eta_hcal_.clear();
@@ -525,6 +538,10 @@ void PFAnalysis::clearVariables() {
   element_trajpoint_.clear();
   element_muon_dt_hits_.clear();
   element_muon_csc_hits_.clear();
+  element_muon_type_.clear();
+  element_cluster_flags_.clear();
+  element_gsf_electronseed_trkorecal_.clear();
+  element_num_hits_.clear();
 
   element_distance_i_.clear();
   element_distance_j_.clear();
@@ -744,6 +761,7 @@ void PFAnalysis::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup
     float eta = 0.0;
     float phi = 0.0;
     float energy = 0.0;
+    float corr_energy = 0.0;
     float trajpoint = 0.0;
     float eta_ecal = 0.0;
     float phi_ecal = 0.0;
@@ -754,6 +772,10 @@ void PFAnalysis::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup
     float depth = 0;
     float muon_dt_hits = 0.0;
     float muon_csc_hits = 0.0;
+    float muon_type = 0.0;
+    float cluster_flags = 0.0;
+    float gsf_electronseed_trkorecal = 0.0;
+    float num_hits = 0.0;
 
     if (type == reco::PFBlockElement::TRACK) {
       const auto& matched_pftrack = orig.trackRefPF();
@@ -778,6 +800,7 @@ void PFAnalysis::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup
       phi = ref->phi();
       energy = ref->p();
       charge = ref->charge();
+      num_hits = ref->recHitsSize();
 
       reco::MuonRef muonRef = orig.muonRef();
       if (muonRef.isNonnull()) {
@@ -786,11 +809,13 @@ void PFAnalysis::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup
           muon_dt_hits = standAloneMu->hitPattern().numberOfValidMuonDTHits();
           muon_csc_hits = standAloneMu->hitPattern().numberOfValidMuonCSCHits();
         }
+        muon_type = muonRef->type();
       }
 
     } else if (type == reco::PFBlockElement::BREM) {
       const auto* orig2 = (const reco::PFBlockElementBrem*)&orig;
       const auto& ref = orig2->GsftrackRef();
+      trajpoint = orig2->indTrajPoint();
       if (ref.isNonnull()) {
         deltap = orig2->DeltaP();
         sigmadeltap = orig2->SigmaDeltaP();
@@ -801,9 +826,22 @@ void PFAnalysis::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup
         eta = ref->eta();
         phi = ref->phi();
         energy = ref->p();
-        trajpoint = orig2->indTrajPoint();
         charge = ref->charge();
       }
+
+      const auto& gsfextraref = ref->extra();
+      if (gsfextraref.isAvailable() && gsfextraref->seedRef().isAvailable()) {
+        reco::ElectronSeedRef seedref = gsfextraref->seedRef().castTo<reco::ElectronSeedRef>();
+        if (seedref.isAvailable()) {
+          if (seedref->isEcalDriven()) {
+            gsf_electronseed_trkorecal = 1.0;
+          }
+          else if (seedref->isTrackerDriven()) {
+            gsf_electronseed_trkorecal = 2.0;
+          }
+        }
+      }
+
     } else if (type == reco::PFBlockElement::GSF) {
       //requires to keep GsfPFRecTracks
       const auto* orig2 = (const reco::PFBlockElementGsfTrack*)&orig;
@@ -815,33 +853,61 @@ void PFAnalysis::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup
       eta = vec.eta();
       phi = vec.phi();
       energy = vec.energy();
+
+      const auto& vec2 = orig2->Pout();
+      eta_ecal = vec2.eta();
+      phi_ecal = vec2.phi();
+
       if (!orig2->GsftrackRefPF().isNull()) {
         charge = orig2->GsftrackRefPF()->charge();
+        num_hits = orig2->GsftrackRefPF()->PFRecBrem().size();
       }
+
+      const auto& ref = orig2->GsftrackRef();
+
+      const auto& gsfextraref = ref->extra();
+      if (gsfextraref.isAvailable() && gsfextraref->seedRef().isAvailable()) {
+        reco::ElectronSeedRef seedref = gsfextraref->seedRef().castTo<reco::ElectronSeedRef>();
+        if (seedref.isAvailable()) {
+          if (seedref->isEcalDriven()) {
+            gsf_electronseed_trkorecal = 1.0;
+          }
+          else if (seedref->isTrackerDriven()) {
+            gsf_electronseed_trkorecal = 2.0;
+          }
+        }
+      };
+
     } else if (type == reco::PFBlockElement::ECAL || type == reco::PFBlockElement::PS1 ||
                type == reco::PFBlockElement::PS2 || type == reco::PFBlockElement::HCAL ||
                type == reco::PFBlockElement::HO || type == reco::PFBlockElement::HFHAD ||
                type == reco::PFBlockElement::HFEM) {
       const auto& ref = ((const reco::PFBlockElementCluster*)&orig)->clusterRef();
       if (ref.isNonnull()) {
+        cluster_flags = ref->flags();
         eta = ref->eta();
         phi = ref->phi();
+        pt = ref->pt();
         px = ref->position().x();
         py = ref->position().y();
         pz = ref->position().z();
         energy = ref->energy();
+        corr_energy = ref->correctedEnergy();
         layer = ref->layer();
         depth = ref->depth();
+        num_hits = ref->recHitFractions().size();
       }
     } else if (type == reco::PFBlockElement::SC) {
       const auto& clref = ((const reco::PFBlockElementSuperCluster*)&orig)->superClusterRef();
       if (clref.isNonnull()) {
+        cluster_flags = clref->flags();
         eta = clref->eta();
         phi = clref->phi();
         px = clref->position().x();
         py = clref->position().y();
         pz = clref->position().z();
         energy = clref->energy();
+        num_hits = clref->clustersSize();
       }
     }
     vector<int> tps;
@@ -866,6 +932,7 @@ void PFAnalysis::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup
     element_eta_.push_back(eta);
     element_phi_.push_back(phi);
     element_energy_.push_back(energy);
+    element_corr_energy_.push_back(corr_energy);
     element_eta_ecal_.push_back(eta_ecal);
     element_phi_ecal_.push_back(phi_ecal);
     element_eta_hcal_.push_back(eta_hcal);
@@ -877,6 +944,10 @@ void PFAnalysis::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup
     element_trajpoint_.push_back(trajpoint);
     element_muon_dt_hits_.push_back(muon_dt_hits);
     element_muon_csc_hits_.push_back(muon_csc_hits);
+    element_muon_type_.push_back(muon_type);
+    element_cluster_flags_.push_back(cluster_flags);
+    element_gsf_electronseed_trkorecal_.push_back(gsf_electronseed_trkorecal);
+    element_num_hits_.push_back(num_hits);
   }
 
   //associate candidates to elements
diff --git a/Validation/RecoParticleFlow/test/pfanalysis_ntuple.py b/Validation/RecoParticleFlow/test/pfanalysis_ntuple.py
index e20ce73b2c735..233f90d58f077 100644
--- a/Validation/RecoParticleFlow/test/pfanalysis_ntuple.py
+++ b/Validation/RecoParticleFlow/test/pfanalysis_ntuple.py
@@ -32,7 +32,7 @@
     duplicateCheckMode = cms.untracked.string("noDuplicateCheck")
 )
 
-process.ana = cms.EDAnalyzer('PFAnalysisNtuplizer')
+process.ana = cms.EDAnalyzer('PFAnalysis')
 
 process.TFileService = cms.Service("TFileService",
     fileName = cms.string("pfntuple.root")