Add track quality emulation to HYBRID_NEWKF (#221)

* Initial commit of TQ emulation + kfout updates * Code checks * Claire's Comments * Update BuildFile.xml Include hls types in build file * Create clf_GBDT_emulation_newKF_digitized.json * Ian's comments 1 from PR #209 Update to comments in KFout producer and update to variable names as per CMS naming conventions * Fix Partial Track functions * fix code formatting --------- Co-authored-by: Christopher <[email protected]> Co-authored-by: Christopher Brown <[email protected]>
cms-sw · Aug 25, 2023 · 3d02f8f · 3d02f8f
1 parent 3bfdb1f
commit 3d02f8f
Show file tree

Hide file tree

Showing 13 changed files with 411 additions and 262 deletions.
diff --git a/L1Trigger/TrackFindingTracklet/plugins/ProducerKFout.cc b/L1Trigger/TrackFindingTracklet/plugins/ProducerKFout.cc
diff --git a/L1Trigger/TrackFindingTracklet/python/Producer_cfi.py b/L1Trigger/TrackFindingTracklet/python/Producer_cfi.py
@@ -1,4 +1,5 @@
 import FWCore.ParameterSet.Config as cms
+from L1Trigger.TrackTrigger.TrackQualityParams_cfi import *
 
 TrackFindingTrackletProducer_params = cms.PSet (
 
@@ -20,5 +21,7 @@
   EnableTruncation     = cms.bool    ( True  ),                                            # enable emulation of truncation for TBout, KF, KFin, lost stubs are filled in BranchLost
   PrintKFDebug         = cms.bool    ( False ),                                            # print end job internal unused MSB
   UseTTStubResiduals   = cms.bool    ( False ),                                            # stub residuals are recalculated from seed parameter and TTStub position
+  TrackQualityPSet     = cms.PSet    ( TrackQualityParams ),
+
 
 )
diff --git a/L1Trigger/TrackTrigger/BuildFile.xml b/L1Trigger/TrackTrigger/BuildFile.xml
@@ -21,6 +21,7 @@
 <use name="boost"/>
 <use name="root"/>
 <use name="rootrflx"/>
+<use name="hls"/>
 <export>
   <flags SEALPLUGIN="1"/>
   <lib name="1"/>

diff --git a/L1Trigger/TrackTrigger/data/clf_GBDT_emulation_newKF_digitized.json b/L1Trigger/TrackTrigger/data/clf_GBDT_emulation_newKF_digitized.json
diff --git a/L1Trigger/TrackTrigger/interface/L1TrackQuality.h b/L1Trigger/TrackTrigger/interface/L1TrackQuality.h
@@ -1,6 +1,5 @@
 /*
 Track Quality Header file
-
 C.Brown 28/07/20
 */
 
@@ -19,14 +18,18 @@ C.Brown 28/07/20
 #include "FWCore/Framework/interface/MakerMacros.h"
 #include "FWCore/ParameterSet/interface/ParameterSet.h"
 #include "DataFormats/L1TrackTrigger/interface/TTTrack.h"
+#include "DataFormats/L1TrackTrigger/interface/TTTrack_TrackWord.h"
 #include "DataFormats/L1TrackTrigger/interface/TTTypes.h"
 #include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h"
 #include <memory>
 
+#include "conifer.h"
+#include "ap_fixed.h"
+
 class L1TrackQuality {
 public:
   // Enum class used for determining prediction behaviour in setL1TrackQuality
-  enum class QualityAlgorithm { Cut, GBDT, NN, None };
+  enum class QualityAlgorithm { Cut, GBDT, GBDT_cpp, NN, None };
 
   //Default Constructor
   L1TrackQuality();
@@ -42,7 +45,10 @@ class L1TrackQuality {
 
   // Passed by reference a track without MVA filled, method fills the track's MVA field
   void setL1TrackQuality(TTTrack<Ref_Phase2TrackerDigi_>& aTrack);
-
+  // Function to run the BDT in isolation allowing a feature vector in the ap_fixed datatype to be passed
+  // and a single output to be returned which is then used to fill the bits in the Track Word for situations
+  // where a TTTrack datatype is unavailable to be passed to the track quality
+  float runEmulatedTQ(std::vector<ap_fixed<10, 5>> inputFeatures);
   // To set private member data
   void setCutParameters(std::string const& AlgorithmString,
                         float maxZ0,

diff --git a/L1Trigger/TrackTrigger/interface/Setup.h b/L1Trigger/TrackTrigger/interface/Setup.h
@@ -505,19 +505,16 @@ namespace tt {
     int kfShiftInitialC33() const { return kfShiftInitialC33_; }
 
     // Parameter specifying KalmanFilter Output Formatter
-
-    // Final Chi2rphi digitization TODO extract from TTTrack Word
-    std::vector<double> kfoutchi2rphiBins() const { return kfoutchi2rphiBins_; }
-    // Final Chi2rz digitization TODO extract from TTTrack Word
-    std::vector<double> kfoutchi2rzBins() const { return kfoutchi2rzBins_; }
     // Conversion factor between dphi^2/weight and chi2rphi
     int kfoutchi2rphiConv() const { return kfoutchi2rphiConv_; }
     // Conversion factor between dz^2/weight and chi2rz
     int kfoutchi2rzConv() const { return kfoutchi2rzConv_; }
-    // Number of bits for the tttrack word
-    int tttrackBits() const { return tttrackBits_; }
     // Fraction of total dphi and dz ranges to calculate v0 and v1 LUT for
     int weightBinFraction() const { return weightBinFraction_; }
+    // Constant used in FW to prevent 32-bit int overflow
+    int dzTruncation() const { return dzTruncation_; }
+    // Constant used in FW to prevent 32-bit int overflow
+    int dphiTruncation() const { return dphiTruncation_; }
 
     // Parameter specifying DuplicateRemoval
 
@@ -922,26 +919,16 @@ namespace tt {
 
     // Parameter specifying KalmanFilter Output Formatter
     edm::ParameterSet pSetKFOut_;
-    // Bins used to digitize dPhi for chi2 calculation
-    std::vector<int> kfoutdPhiBins_;
-    // Bins used to digitize dZ for chi2 calculation
-    std::vector<int> kfoutdZBins_;
-    // v0 weight Bins corresponding to dPhi Bins for chi2 calculation
-    std::vector<int> kfoutv0Bins_;
-    // v1 weight Bins corresponding to dZ Bins for chi2 calculation
-    std::vector<int> kfoutv1Bins_;
-    // Final Chi2rphi digitization TODO extract from TTTrack Word
-    std::vector<double> kfoutchi2rphiBins_;
-    // Final Chi2rz digitization TODO extract from TTTrack Word
-    std::vector<double> kfoutchi2rzBins_;
     // Conversion factor between dphi^2/weight and chi2rphi
     int kfoutchi2rphiConv_;
     // Conversion factor between dz^2/weight and chi2rz
     int kfoutchi2rzConv_;
-    // Number of bits for the tttrack word
-    int tttrackBits_;
     // Fraction of total dphi and dz ranges to calculate v0 and v1 LUT for
     int weightBinFraction_;
+    // Constant used in FW to prevent 32-bit int overflow
+    int dzTruncation_;
+    // Constant used in FW to prevent 32-bit int overflow
+    int dphiTruncation_;
 
     // Parameter specifying DuplicateRemoval
     edm::ParameterSet pSetDR_;

diff --git a/L1Trigger/TrackTrigger/interface/conifer.h b/L1Trigger/TrackTrigger/interface/conifer.h
@@ -0,0 +1,152 @@
+#ifndef CONIFER_CPP_H__
+#define CONIFER_CPP_H__
+#include "nlohmann/json.hpp"
+#include <cassert>
+#include <fstream>
+
+namespace conifer {
+
+  /* ---
+* Balanced tree reduce implementation.
+* Reduces an array of inputs to a single value using the template binary operator 'Op',
+* for example summing all elements with Op_add, or finding the maximum with Op_max
+* Use only when the input array is fully unrolled. Or, slice out a fully unrolled section
+* before applying and accumulate the result over the rolled dimension.
+* Required for emulation to guarantee equality of ordering.
+* --- */
+  constexpr int floorlog2(int x) { return (x < 2) ? 0 : 1 + floorlog2(x / 2); }
+
+  template <int B>
+  constexpr int pow(int x) {
+    return x == 0 ? 1 : B * pow<B>(x - 1);
+  }
+
+  constexpr int pow2(int x) { return pow<2>(x); }
+
+  template <class T, class Op>
+  T reduce(std::vector<T> x, Op op) {
+    int N = x.size();
+    int leftN = pow2(floorlog2(N - 1)) > 0 ? pow2(floorlog2(N - 1)) : 0;
+    //static constexpr int rightN = N - leftN > 0 ? N - leftN : 0;
+    if (N == 1) {
+      return x.at(0);
+    } else if (N == 2) {
+      return op(x.at(0), x.at(1));
+    } else {
+      std::vector<T> left(x.begin(), x.begin() + leftN);
+      std::vector<T> right(x.begin() + leftN, x.end());
+      return op(reduce<T, Op>(left, op), reduce<T, Op>(right, op));
+    }
+  }
+
+  template <class T>
+  class OpAdd {
+  public:
+    T operator()(T a, T b) { return a + b; }
+  };
+
+  template <class T, class U>
+  class DecisionTree {
+  private:
+    std::vector<int> feature;
+    std::vector<int> children_left;
+    std::vector<int> children_right;
+    std::vector<T> threshold_;
+    std::vector<U> value_;
+    std::vector<double> threshold;
+    std::vector<double> value;
+
+  public:
+    U decision_function(std::vector<T> x) const {
+      /* Do the prediction */
+      int i = 0;
+      while (feature[i] != -2) {  // continue until reaching leaf
+        bool comparison = x[feature[i]] <= threshold_[i];
+        i = comparison ? children_left[i] : children_right[i];
+      }
+      return value_[i];
+    }
+
+    void init_() {
+      /* Since T, U types may not be readable from the JSON, read them to double and the cast them here */
+      std::transform(
+          threshold.begin(), threshold.end(), std::back_inserter(threshold_), [](double t) -> T { return (T)t; });
+      std::transform(value.begin(), value.end(), std::back_inserter(value_), [](double v) -> U { return (U)v; });
+    }
+
+    // Define how to read this class to/from JSON
+    NLOHMANN_DEFINE_TYPE_INTRUSIVE(DecisionTree, feature, children_left, children_right, threshold, value);
+
+  };  // class DecisionTree
+
+  template <class T, class U, bool useAddTree = false>
+  class BDT {
+  private:
+    int n_classes;
+    int n_trees;
+    int n_features;
+    std::vector<double> init_predict;
+    std::vector<U> init_predict_;
+    // vector of decision trees: outer dimension tree, inner dimension class
+    std::vector<std::vector<DecisionTree<T, U>>> trees;
+    OpAdd<U> add;
+
+  public:
+    // Define how to read this class to/from JSON
+    NLOHMANN_DEFINE_TYPE_INTRUSIVE(BDT, n_classes, n_trees, n_features, init_predict, trees);
+
+    BDT(std::string filename) {
+      /* Construct the BDT from conifer cpp backend JSON file */
+      std::ifstream ifs(filename);
+      nlohmann::json j = nlohmann::json::parse(ifs);
+      from_json(j, *this);
+      /* Do some transformation to initialise things into the proper emulation T, U types */
+      if (n_classes == 2)
+        n_classes = 1;
+      std::transform(init_predict.begin(), init_predict.end(), std::back_inserter(init_predict_), [](double ip) -> U {
+        return (U)ip;
+      });
+      for (int i = 0; i < n_trees; i++) {
+        for (int j = 0; j < n_classes; j++) {
+          trees.at(i).at(j).init_();
+        }
+      }
+    }
+
+    std::vector<U> decision_function(std::vector<T> x) const {
+      /* Do the prediction */
+      assert("Size of feature vector mismatches expected n_features" && static_cast<int>(x.size()) == n_features);
+      std::vector<U> values;
+      std::vector<std::vector<U>> values_trees;
+      values_trees.resize(n_classes);
+      values.resize(n_classes, U(0));
+      for (int i = 0; i < n_classes; i++) {
+        std::transform(trees.begin(), trees.end(), std::back_inserter(values_trees.at(i)), [&i, &x](auto tree_v) {
+          return tree_v.at(i).decision_function(x);
+        });
+        if (useAddTree) {
+          values.at(i) = init_predict_.at(i);
+          values.at(i) += reduce<U, OpAdd<U>>(values_trees.at(i), add);
+        } else {
+          values.at(i) = std::accumulate(values_trees.at(i).begin(), values_trees.at(i).end(), U(init_predict_.at(i)));
+        }
+      }
+
+      return values;
+    }
+
+    std::vector<double> _decision_function_double(std::vector<double> x) const {
+      /* Do the prediction with data in/out as double, cast to T, U before prediction */
+      std::vector<T> xt;
+      std::transform(x.begin(), x.end(), std::back_inserter(xt), [](double xi) -> T { return (T)xi; });
+      std::vector<U> y = decision_function(xt);
+      std::vector<double> yd;
+      std::transform(y.begin(), y.end(), std::back_inserter(yd), [](U yi) -> double { return (double)yi; });
+      return yd;
+    }
+
+  };  // class BDT
+
+}  // namespace conifer
+
+#endif
diff --git a/L1Trigger/TrackTrigger/python/ProducerSetup_cfi.py b/L1Trigger/TrackTrigger/python/ProducerSetup_cfi.py
@@ -215,15 +215,11 @@
 
   # Parmeter specifying KalmanFilter Output Formatter
   KalmanFilterOut = cms.PSet (
-    chi2rphiBins = cms.vdouble( 0, 0.25, 0.5, 1, 2, 3, 5, 7, 10, 20, 40, 100, 200, 500, 1000, 3000,6000 ), # Final Chi2rphi digitization TODO extract from TTTrack Word 
-    chi2rzBins   = cms.vdouble( 0, 0.25, 0.5, 1, 2, 3, 5, 7, 10, 20, 40, 100, 200, 500, 1000, 3000,6000 ), # Final Chi2rz digitization TODO extract from TTTrack Word 
-
-    chi2rphiConv = cms.int32 ( 3 ), # Conversion factor between dphi^2/weight and chi2rphi
-    chi2rzConv   = cms.int32 ( 13 ), # Conversion factor between dz^2/weight and chi2rz
-
-    WeightBinFraction = cms.int32( 0 ), # Number of bits dropped from dphi and dz for v0 and v1 LUTs
-
-    TTTrackBits  = cms.int32( 96 )  # Number of bits for the tttrack word TODO extract from TTTrack_word dataformat
+    Chi2rphiConv      = cms.int32( 3 ),      # Conversion factor between dphi^2/weight and chi2rphi
+    Chi2rzConv        = cms.int32( 13 ),     # Conversion factor between dz^2/weight and chi2rz
+    WeightBinFraction = cms.int32( 0 ),      # Number of bits dropped from dphi and dz for v0 and v1 LUTs
+    DzTruncation      = cms.int32( 262144 ), # Constant used in FW to prevent 32-bit int overflow
+    DphiTruncation    = cms.int32( 16 )      # Constant used in FW to prevent 32-bit int overflow
   ),
 
   # Parmeter specifying DuplicateRemoval

diff --git a/L1Trigger/TrackTrigger/python/TrackQualityParams_cfi.py b/L1Trigger/TrackTrigger/python/TrackQualityParams_cfi.py
@@ -1,12 +1,13 @@
 import FWCore.ParameterSet.Config as cms
 
-TrackQualityParams = cms.PSet(qualityAlgorithm = cms.string("GBDT"), #None, Cut, NN, GBDT
-                              ONNXmodel = cms.FileInPath("L1Trigger/TrackTrigger/data/GBDT_default.onnx"),
+TrackQualityParams = cms.PSet(qualityAlgorithm = cms.string("GBDT_cpp"), #None, Cut, NN, GBDT, GBDT_cpp
+                              ONNXmodel = cms.FileInPath("L1Trigger/TrackTrigger/data/clf_GBDT_emulation_newKF_digitized.json"),
+                              #ONNXmodel = cms.FileInPath("L1Trigger/TrackTrigger/data/gbdt.json"),
                               # The ONNX model should be found at this path, if you want a local version of the model:
                               # git clone https://github.com/cms-data/L1Trigger-TrackTrigger.git L1Trigger/TrackTrigger/data
                               ONNXInputName = cms.string("feature_input"),
                               #Vector of strings of training features, in the order that the model was trained with
-                              featureNames = cms.vstring(["phi", "eta", "z0", "bendchi2_bin", "nstub", 
+                              featureNames = cms.vstring(["eta", "z0", "bendchi2_bin", "nstub", 
                                                           "nlaymiss_interior", "chi2rphi_bin", "chi2rz_bin"]),
                               # Parameters for cut based classifier, optimized for L1 Track MET
                               # (Table 3.7  The Phase-2 Upgrade of the CMS Level-1 Trigger http://cds.cern.ch/record/2714892) 
@@ -16,4 +17,7 @@
                               bendchi2Max = cms.double( 2.4 ),
                               minPt = cms.double( 2. ),       # in GeV
                               nStubsmin = cms.int32( 4 ),
+                              tqemu_bins = cms.vint32( [-480, -62, -35, -16, 0, 16, 35, 62, 480] ),
+                              tqemu_TanlScale = cms.double( 128.0),
+                              tqemu_Z0Scale = cms.double( 64.0 ),
                               )
diff --git a/L1Trigger/TrackTrigger/src/L1TrackQuality.cc b/L1Trigger/TrackTrigger/src/L1TrackQuality.cc
@@ -1,6 +1,5 @@
 /*
 Track Quality Body file
-
 C.Brown & C.Savard 07/2020
 */
 
@@ -28,7 +27,8 @@ L1TrackQuality::L1TrackQuality(const edm::ParameterSet& qualityParams) : useHPH_
                  qualityParams.getParameter<edm::FileInPath>("ONNXmodel"),
                  qualityParams.getParameter<std::string>("ONNXInputName"),
                  qualityParams.getParameter<std::vector<std::string>>("featureNames"));
-    runTime_ = std::make_unique<cms::Ort::ONNXRuntime>(this->ONNXmodel_.fullPath());
+    if ((AlgorithmString == "GBDT") || (AlgorithmString == "NN"))
+      runTime_ = std::make_unique<cms::Ort::ONNXRuntime>(this->ONNXmodel_.fullPath());
   }
 }
 
@@ -117,7 +117,17 @@ void L1TrackQuality::setL1TrackQuality(TTTrack<Ref_Phase2TrackerDigi_>& aTrack)
     aTrack.settrkMVA1(classification);
   }
 
-  if ((this->qualityAlgorithm_ == QualityAlgorithm::NN) || (this->qualityAlgorithm_ == QualityAlgorithm::GBDT)) {
+  else if (this->qualityAlgorithm_ == QualityAlgorithm::GBDT_cpp) {
+    // load in bdt
+    conifer::BDT<float, float> bdt(this->ONNXmodel_.fullPath());
+
+    // collect features and classify using bdt
+    std::vector<float> inputs = featureTransform(aTrack, this->featureNames_);
+    std::vector<float> output = bdt.decision_function(inputs);
+    aTrack.settrkMVA1(1. / (1. + exp(-output.at(0))));  // need logistic sigmoid fcn applied to xgb output
+  }
+
+  else if ((this->qualityAlgorithm_ == QualityAlgorithm::NN) || (this->qualityAlgorithm_ == QualityAlgorithm::GBDT)) {
     // Setup ONNX input and output names and arrays
     std::vector<std::string> ortinput_names;
     std::vector<std::string> ortoutput_names;
@@ -156,6 +166,16 @@ void L1TrackQuality::setL1TrackQuality(TTTrack<Ref_Phase2TrackerDigi_>& aTrack)
   }
 }
 
+float L1TrackQuality::runEmulatedTQ(std::vector<ap_fixed<10, 5>> inputFeatures) {
+  // load in bdt
+
+  conifer::BDT<ap_fixed<10, 5>, ap_fixed<10, 5>> bdt(this->ONNXmodel_.fullPath());
+
+  // collect features and classify using bdt
+  std::vector<ap_fixed<10, 5>> output = bdt.decision_function(inputFeatures);
+  return output.at(0).to_float();  // need logistic sigmoid fcn applied to xgb output
+}
+
 void L1TrackQuality::setCutParameters(std::string const& AlgorithmString,
                                       float maxZ0,
                                       float maxEta,
@@ -181,6 +201,8 @@ void L1TrackQuality::setONNXModel(std::string const& AlgorithmString,
     qualityAlgorithm_ = QualityAlgorithm::NN;
   } else if (AlgorithmString == "GBDT") {
     qualityAlgorithm_ = QualityAlgorithm::GBDT;
+  } else if (AlgorithmString == "GBDT_cpp") {
+    qualityAlgorithm_ = QualityAlgorithm::GBDT_cpp;
   } else {
     qualityAlgorithm_ = QualityAlgorithm::None;
   }

diff --git a/L1Trigger/TrackTrigger/src/Setup.cc b/L1Trigger/TrackTrigger/src/Setup.cc
@@ -208,12 +208,11 @@ namespace tt {
         kfShiftInitialC33_(pSetKF_.getParameter<int>("ShiftInitialC33")),
         // Parmeter specifying KalmanFilter Output Formatter
         pSetKFOut_(iConfig.getParameter<ParameterSet>("KalmanFilterOut")),
-        kfoutchi2rphiBins_(pSetKFOut_.getParameter<vector<double>>("chi2rphiBins")),
-        kfoutchi2rzBins_(pSetKFOut_.getParameter<vector<double>>("chi2rzBins")),
-        kfoutchi2rphiConv_(pSetKFOut_.getParameter<int>("chi2rphiConv")),
-        kfoutchi2rzConv_(pSetKFOut_.getParameter<int>("chi2rzConv")),
-        tttrackBits_(pSetKFOut_.getParameter<int>("TTTrackBits")),
+        kfoutchi2rphiConv_(pSetKFOut_.getParameter<int>("Chi2rphiConv")),
+        kfoutchi2rzConv_(pSetKFOut_.getParameter<int>("Chi2rzConv")),
         weightBinFraction_(pSetKFOut_.getParameter<int>("WeightBinFraction")),
+        dzTruncation_(pSetKFOut_.getParameter<int>("DzTruncation")),
+        dphiTruncation_(pSetKFOut_.getParameter<int>("DphiTruncation")),
         // Parmeter specifying DuplicateRemoval
         pSetDR_(iConfig.getParameter<ParameterSet>("DuplicateRemoval")),
         drDepthMemory_(pSetDR_.getParameter<int>("DepthMemory")) {