Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Code to process phase2 version of deepTauID v2p5 [12_5_X] #40723

Merged
4 changes: 2 additions & 2 deletions PhysicsTools/PatAlgos/python/slimming/miniAOD_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,8 +382,8 @@ def _add_deepFlavour(process):
toKeep = ['deepTau2017v2p1','deepTau2018v2p5']
)
from Configuration.Eras.Modifier_phase2_common_cff import phase2_common #Phase2 Tau MVA
phase2_common.toModify(tauIdEmbedder.toKeep, func=lambda t:t.append('newDMPhase2v1')) #Phase2 Tau isolation MVA
phase2_common.toModify(tauIdEmbedder.toKeep, func=lambda t:t.append('againstElePhase2v1')) #Phase2 Tau anti-e MVA
_tauIds_phase2 = ['newDMPhase2v1','againstElePhase2v1']
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mbluj newDMPhase2v1 was not kept in the master version. Just to try to understand: why?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mbluj newDMPhase2v1 was not kept in the master version. Just to try to understand: why?

It has quite likely something to do with #40724 (comment), but I fail to find the connection (sorry...)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea is to to keep in master only phase-2 deepTauID ('deepTau2026v2p5') and remove/depreciate older, less powerful phase-2 tauIDs ('newDMPhase2v1' and 'againstElePhase2v1').

phase2_common.toModify(tauIdEmbedder.toKeep, func=lambda t:t.extend(_tauIds_phase2))
tauIdEmbedder.runTauID()
addToProcessAndTask(_noUpdatedTauName, process.slimmedTaus.clone(),process,task)
delattr(process, 'slimmedTaus')
Expand Down
337 changes: 337 additions & 0 deletions RecoTauTag/RecoTau/interface/DeepTauScaling.h

Large diffs are not rendered by default.

45 changes: 32 additions & 13 deletions RecoTauTag/RecoTau/plugins/DeepTauId.cc
Original file line number Diff line number Diff line change
Expand Up @@ -881,6 +881,7 @@ class DeepTauId : public deep_tau::DeepTauBase {
desc.add<std::vector<std::string>>("graph_file",
{"RecoTauTag/TrainingFiles/data/DeepTauId/deepTau_2017v2p6_e6.pb"});
desc.add<bool>("mem_mapped", false);
desc.add<unsigned>("year", 2017);
desc.add<unsigned>("version", 2);
desc.add<unsigned>("sub_version", 1);
desc.add<int>("debug_level", 0);
Expand Down Expand Up @@ -926,6 +927,7 @@ class DeepTauId : public deep_tau::DeepTauBase {
pfTauTransverseImpactParameters_token_(
consumes<edm::AssociationVector<reco::PFTauRefProd, std::vector<reco::PFTauTransverseImpactParameterRef>>>(
cfg.getParameter<edm::InputTag>("pfTauTransverseImpactParameters"))),
year_(cfg.getParameter<unsigned>("year")),
version_(cfg.getParameter<unsigned>("version")),
sub_version_(cfg.getParameter<unsigned>("sub_version")),
debug_level(cfg.getParameter<int>("debug_level")),
Expand Down Expand Up @@ -957,7 +959,11 @@ class DeepTauId : public deep_tau::DeepTauBase {
tensorflow::TensorShape{1,
static_cast<int>(TauBlockInputs::NumberOfInputs) -
static_cast<int>(TauBlockInputs::varsToDrop.size())});
scalingParamsMap_ = &sc::scalingParamsMap_v2p5;
if (year_ == 2026) {
scalingParamsMap_ = &sc::scalingParamsMap_PhaseIIv2p5;
} else {
scalingParamsMap_ = &sc::scalingParamsMap_v2p5;
}
} else
throw cms::Exception("DeepTauId") << "subversion " << sub_version_ << " is not supported.";

Expand Down Expand Up @@ -1236,6 +1242,8 @@ class DeepTauId : public deep_tau::DeepTauBase {
edm::Handle<double> rho;
event.getByToken(rho_token_, rho);

auto const& eventnr = event.id().event();

tensorflow::Tensor predictions(tensorflow::DT_FLOAT, {static_cast<int>(taus->size()), deep_tau::NumberOfOutputs});

for (size_t tau_index = 0; tau_index < taus->size(); ++tau_index) {
Expand Down Expand Up @@ -1263,6 +1271,7 @@ class DeepTauId : public deep_tau::DeepTauBase {
*pfCands,
vertices->at(0),
*rho,
eventnr,
pred_vector,
tauIDs);
} else
Expand All @@ -1274,6 +1283,7 @@ class DeepTauId : public deep_tau::DeepTauBase {
*pfCands,
vertices->at(0),
*rho,
eventnr,
pred_vector,
tauIDs);
} else {
Expand Down Expand Up @@ -1310,13 +1320,15 @@ class DeepTauId : public deep_tau::DeepTauBase {
const edm::View<reco::Candidate>& pfCands,
const reco::Vertex& pv,
double rho,
const edm::EventNumber_t& eventnr,
std::vector<tensorflow::Tensor>& pred_vector,
TauFunc tau_funcs) {
using namespace dnn_inputs_v2;
if (debug_level >= 2) {
std::cout << "<DeepTauId::getPredictionsV2 (moduleLabel = " << moduleDescription().moduleLabel()
<< ")>:" << std::endl;
std::cout << " tau: pT = " << tau.pt() << ", eta = " << tau.eta() << ", phi = " << tau.phi() << std::endl;
std::cout << " tau: pT = " << tau.pt() << ", eta = " << tau.eta() << ", phi = " << tau.phi()
<< ", eventnr = " << eventnr << std::endl;
}
CellGrid inner_grid(number_of_inner_cell, number_of_inner_cell, 0.02, 0.02, disable_CellIndex_workaround_);
CellGrid outer_grid(number_of_outer_cell, number_of_outer_cell, 0.05, 0.05, disable_CellIndex_workaround_);
Expand Down Expand Up @@ -1357,7 +1369,7 @@ class DeepTauId : public deep_tau::DeepTauBase {
checkInputs(*hadronsTensor_[false], "input_outer_hadrons", HadronBlockInputs::NumberOfInputs, &outer_grid);

if (save_inputs_) {
std::string json_file_name = "DeepTauId_" + std::to_string(file_counter_) + ".json";
std::string json_file_name = "DeepTauId_" + std::to_string(eventnr) + "_" + std::to_string(tau_index) + ".json";
json_file_ = new std::ofstream(json_file_name.data());
is_first_block_ = true;
(*json_file_) << "{";
Expand Down Expand Up @@ -1942,16 +1954,22 @@ class DeepTauId : public deep_tau::DeepTauBase {
sp.scale(ele.deltaPhiSuperClusterTrackAtVtx(), dnn::ele_deltaPhiSuperClusterTrackAtVtx - e_index_offset);
get(dnn::ele_deltaPhiSeedClusterTrackAtCalo + fill_index_offset_e) =
sp.scale(ele.deltaPhiSeedClusterTrackAtCalo(), dnn::ele_deltaPhiSeedClusterTrackAtCalo - e_index_offset);
get(dnn::ele_mvaInput_earlyBrem + fill_index_offset_e) =
sp.scale(ele.mvaInput().earlyBrem, dnn::ele_mvaInput_earlyBrem - e_index_offset);
get(dnn::ele_mvaInput_lateBrem + fill_index_offset_e) =
sp.scale(ele.mvaInput().lateBrem, dnn::ele_mvaInput_lateBrem - e_index_offset);
get(dnn::ele_mvaInput_sigmaEtaEta + fill_index_offset_e) =
sp.scale(ele.mvaInput().sigmaEtaEta, dnn::ele_mvaInput_sigmaEtaEta - e_index_offset);
get(dnn::ele_mvaInput_hadEnergy + fill_index_offset_e) =
sp.scale(ele.mvaInput().hadEnergy, dnn::ele_mvaInput_hadEnergy - e_index_offset);
get(dnn::ele_mvaInput_deltaEta + fill_index_offset_e) =
sp.scale(ele.mvaInput().deltaEta, dnn::ele_mvaInput_deltaEta - e_index_offset);
const bool mva_valid =
(ele.mvaInput().earlyBrem > -2) ||
(year_ !=
2026); // Known issue that input can be invalid in Phase2 samples (early/lateBrem==-2, hadEnergy==0, sigmaEtaEta/deltaEta==3.40282e+38). Unknown if also in Run2/3, so don't change there
if (mva_valid) {
get(dnn::ele_mvaInput_earlyBrem + fill_index_offset_e) =
sp.scale(ele.mvaInput().earlyBrem, dnn::ele_mvaInput_earlyBrem - e_index_offset);
get(dnn::ele_mvaInput_lateBrem + fill_index_offset_e) =
sp.scale(ele.mvaInput().lateBrem, dnn::ele_mvaInput_lateBrem - e_index_offset);
get(dnn::ele_mvaInput_sigmaEtaEta + fill_index_offset_e) =
sp.scale(ele.mvaInput().sigmaEtaEta, dnn::ele_mvaInput_sigmaEtaEta - e_index_offset);
get(dnn::ele_mvaInput_hadEnergy + fill_index_offset_e) =
sp.scale(ele.mvaInput().hadEnergy, dnn::ele_mvaInput_hadEnergy - e_index_offset);
get(dnn::ele_mvaInput_deltaEta + fill_index_offset_e) =
sp.scale(ele.mvaInput().deltaEta, dnn::ele_mvaInput_deltaEta - e_index_offset);
}
const auto& gsfTrack = ele.gsfTrack();
if (gsfTrack.isNonnull()) {
get(dnn::ele_gsfTrack_normalizedChi2 + fill_index_offset_e) =
Expand Down Expand Up @@ -2419,6 +2437,7 @@ class DeepTauId : public deep_tau::DeepTauBase {
edm::EDGetTokenT<edm::AssociationVector<reco::PFTauRefProd, std::vector<reco::PFTauTransverseImpactParameterRef>>>
pfTauTransverseImpactParameters_token_;
std::string input_layer_, output_layer_;
const unsigned year_;
const unsigned version_;
const unsigned sub_version_;
const int debug_level;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import FWCore.ParameterSet.Config as cms

# Electron collection merger
mergedSlimmedElectronsForTauId = cms.EDProducer('PATElectronCollectionMerger',
src = cms.VInputTag('slimmedElectrons', 'slimmedElectronsHGC')
)
from RecoTauTag.RecoTau.mergedPhase2SlimmedElectronsForTauId_cff import mergedSlimmedElectronsForTauId

# anti-e phase-2 tauID (Raw)
from RecoTauTag.RecoTau.tauDiscriminationAgainstElectronMVA6Phase2_mvaDefs_cff import mvaNames_phase2, mapping_phase2, workingPoints_phase2
from RecoTauTag.RecoTau.TauDiscriminatorTools import noPrediscriminants
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import FWCore.ParameterSet.Config as cms

# Electron collection merger
mergedSlimmedElectronsForTauId = cms.EDProducer('PATElectronCollectionMerger',
src = cms.VInputTag('slimmedElectrons', 'slimmedElectronsHGC')
)
29 changes: 29 additions & 0 deletions RecoTauTag/RecoTau/python/tauIdWPsDefs.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,32 @@
"VVTight": 0.9931
}
}

WORKING_POINTS_PHASEII_v2p5 = {
"e": {
"VVVLoose": 0.2376,
"VVLoose": 0.3688,
"VLoose": 0.5336,
"Loose": 0.8116,
"Medium": 0.9268,
"Tight": 0.9781,
"VTight": 0.9915,
"VVTight": 0.9961
},
"mu": {
"VLoose": 0.0640,
"Loose": 0.0942,
"Medium": 0.5494,
"Tight": 0.9401
},
"jet": {
"VVVLoose": 0.4918,
"VVLoose": 0.6920,
"VLoose": 0.8299,
"Loose": 0.9166,
"Medium": 0.9507,
"Tight": 0.9683,
"VTight": 0.9788,
"VVTight": 0.9856
}
}
53 changes: 47 additions & 6 deletions RecoTauTag/RecoTau/python/tools/runTauIdMVA.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from RecoTauTag.RecoTau.TauDiscriminatorTools import noPrediscriminants
from RecoTauTag.RecoTau.PATTauDiscriminationByMVAIsolationRun2_cff import patDiscriminationByIsolationMVArun2v1raw, patDiscriminationByIsolationMVArun2v1
from RecoTauTag.RecoTau.DeepTau_cfi import DeepTau
from RecoTauTag.RecoTau.tauIdWPsDefs import WORKING_POINTS_v2p1, WORKING_POINTS_v2p5
from RecoTauTag.RecoTau.tauIdWPsDefs import WORKING_POINTS_v2p1, WORKING_POINTS_v2p5, WORKING_POINTS_PHASEII_v2p5

import os
import re
Expand All @@ -12,7 +12,7 @@ class TauIDEmbedder(object):
"""class to rerun the tau seq and acces trainings from the database"""
availableDiscriminators = [
"2017v1", "2017v2", "newDM2017v2", "dR0p32017v2", "2016v1", "newDM2016v1",
"deepTau2017v2", "deepTau2017v2p1", "deepTau2018v2p5",
"deepTau2017v2", "deepTau2017v2p1", "deepTau2018v2p5", "deepTau2026v2p5",
"againstEle2018",
"newDMPhase2v1",
"againstElePhase2v1"
Expand All @@ -22,7 +22,7 @@ def __init__(self, process, debug = False,
originalTauName = "slimmedTaus",
updatedTauName = "slimmedTausNewID",
postfix = "",
toKeep = ["deepTau2017v2p1", "deepTau2018v2p5"],
toKeep = ["deepTau2017v2p1", "deepTau2018v2p5", "deepTau2026v2p5"],
tauIdDiscrMVA_trainings_run2_2017 = { 'tauIdMVAIsoDBoldDMwLT2017' : "tauIdMVAIsoDBoldDMwLT2017", },
tauIdDiscrMVA_WPs_run2_2017 = {
'tauIdMVAIsoDBoldDMwLT2017' : {
Expand Down Expand Up @@ -560,7 +560,7 @@ def runTauID(self):
tauIDSources.byVVTightIsolationMVArun2v1DBnewDMwLT2016 = self.tauIDMVAinputs(_byIsolationNewDMMVArun2016v1, "_WPEff40")

if "deepTau2017v2" in self.toKeep:
if self.debug: print ("Adding DeepTau IDs")
if self.debug: print ("Adding DeepTau v2 IDs")

_deepTauName = "deepTau2017v2"
workingPoints_ = WORKING_POINTS_v2p1
Expand All @@ -575,6 +575,7 @@ def runTauID(self):
Prediscriminants = noPrediscriminants,
taus = self.originalTauName,
graph_file = file_names,
year = full_version[0],
version = full_version[1],
sub_version = 1 #MB: subversion cannot be properly deduced from file names; it should be 1 also for v2
))
Expand All @@ -587,7 +588,7 @@ def runTauID(self):


if "deepTau2017v2p1" in self.toKeep:
if self.debug: print ("Adding DeepTau IDs")
if self.debug: print ("Adding DeepTau v2p1 IDs")

_deepTauName = "deepTau2017v2p1"
workingPoints_ = WORKING_POINTS_v2p1
Expand All @@ -602,6 +603,7 @@ def runTauID(self):
Prediscriminants = noPrediscriminants,
taus = self.originalTauName,
graph_file = file_names,
year = full_version[0],
version = full_version[1],
sub_version = 1, #MB: subversion cannot be properly deduced from file names
disable_dxy_pca = True
Expand All @@ -614,7 +616,7 @@ def runTauID(self):
_rerunMvaIsolationSequence += _deepTauProducer

if "deepTau2018v2p5" in self.toKeep:
if self.debug: print ("Adding DeepTau IDs")
if self.debug: print ("Adding DeepTau v2p5 IDs")

_deepTauName = "deepTau2018v2p5"
workingPoints_ = WORKING_POINTS_v2p5
Expand All @@ -629,6 +631,7 @@ def runTauID(self):
Prediscriminants = noPrediscriminants,
taus = self.originalTauName,
graph_file = file_names,
year = full_version[0],
version = full_version[1],
sub_version = full_version[2],
disable_dxy_pca = True,
Expand All @@ -642,6 +645,44 @@ def runTauID(self):
_rerunMvaIsolationTask.add(_deepTauProducer)
_rerunMvaIsolationSequence += _deepTauProducer

if "deepTau2026v2p5" in self.toKeep:
if self.debug: print ("Adding Phase2 DeepTau v2p5 IDs")

_deepTauName = "deepTau2026v2p5"
workingPoints_ = WORKING_POINTS_PHASEII_v2p5

file_names = [
'core:RecoTauTag/TrainingFiles/data/DeepTauId/deepTau_2026v2p5_core.pb',
'inner:RecoTauTag/TrainingFiles/data/DeepTauId/deepTau_2026v2p5_inner.pb',
'outer:RecoTauTag/TrainingFiles/data/DeepTauId/deepTau_2026v2p5_outer.pb',
]
full_version = self.getDeepTauVersion(file_names[0])
setattr(self.process,_deepTauName+self.postfix,DeepTau.clone(
Prediscriminants = noPrediscriminants,
taus = self.originalTauName,
graph_file = file_names,
year = full_version[0],
version = full_version[1],
sub_version = full_version[2],
disable_dxy_pca = True,
disable_hcalFraction_workaround = True,
disable_CellIndex_workaround = True
))

from RecoTauTag.RecoTau.mergedPhase2SlimmedElectronsForTauId_cff import mergedSlimmedElectronsForTauId
if not hasattr(self.process,"mergedSlimmedElectronsForTauId"):
self.process.mergedSlimmedElectronsForTauId = mergedSlimmedElectronsForTauId
setattr(getattr(self.process, _deepTauName+self.postfix), "electrons", cms.InputTag("mergedSlimmedElectronsForTauId"))
setattr(getattr(self.process, _deepTauName+self.postfix), "vertices", cms.InputTag("offlineSlimmedPrimaryVertices4D"))

self.processDeepProducer(_deepTauName, tauIDSources, workingPoints_)

_deepTauProducer = getattr(self.process,_deepTauName+self.postfix)
_rerunMvaIsolationTask.add(self.process.mergedSlimmedElectronsForTauId)
_rerunMvaIsolationTask.add(_deepTauProducer)
_rerunMvaIsolationSequence += self.process.mergedSlimmedElectronsForTauId
_rerunMvaIsolationSequence += _deepTauProducer

if "againstEle2018" in self.toKeep:
antiElectronDiscrMVA6_version = "MVA6v3_noeveto"
### Define new anti-e discriminants
Expand Down
23 changes: 20 additions & 3 deletions RecoTauTag/RecoTau/test/runDeepTauIDsOnMiniAOD.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,27 @@
minimalOutput = True
eventsToProcess = 100
nThreads = 1
phase2 = False

process = cms.Process('TauID')
process.load('Configuration.StandardSequences.MagneticField_cff')
process.load('Configuration.Geometry.GeometryRecoDB_cff')
process.load('Configuration.StandardSequences.FrontierConditions_GlobalTag_cff')
process.load('Configuration.StandardSequences.EndOfProcess_cff')

from Configuration.AlCa.GlobalTag import GlobalTag
process.GlobalTag = GlobalTag(process.GlobalTag, 'auto:phase1_2018_realistic', '')
if phase2:
process.load('Configuration.Geometry.GeometryExtended2026D88Reco_cff')
process.GlobalTag = GlobalTag(process.GlobalTag, 'auto:phase2_realistic_T25', '')
inputfile = '/store/mc/Phase2Spring21DRMiniAOD/TTbar_TuneCP5_14TeV-pythia8/MINIAODSIM/PU200Phase2D80_113X_mcRun4_realistic_T25_v1_ext1-v1/280000/04e6741c-489a-4fed-9e0c-d7703c274b5a.root'
else:
process.load('Configuration.Geometry.GeometryRecoDB_cff')
process.GlobalTag = GlobalTag(process.GlobalTag, 'auto:phase1_2018_realistic', '')
inputfile = '/store/mc/RunIISummer20UL18MiniAOD/TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8/MINIAODSIM/106X_upgrade2018_realistic_v11_L1v1-v2/00000/009636D7-07B2-DB49-882D-C251FD62CCE7.root'

# Input source
process.source = cms.Source('PoolSource', fileNames = cms.untracked.vstring(
# File from dataset TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8
'/store/mc/RunIISummer20UL18MiniAOD/TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8/MINIAODSIM/106X_upgrade2018_realistic_v11_L1v1-v2/00000/009636D7-07B2-DB49-882D-C251FD62CCE7.root'
inputfile
))

process.maxEvents = cms.untracked.PSet( input = cms.untracked.int32(eventsToProcess) )
Expand All @@ -38,6 +45,12 @@
# "DPFTau_2016_v1",
"againstEle2018",
]
if phase2:
toKeep = [ "newDMPhase2v1",
# "deepTau2018v2p5",
"deepTau2026v2p5",
"againstElePhase2v1",
]
tauIdEmbedder = tauIdConfig.TauIDEmbedder(process, debug = False,
updatedTauName = updatedTauName,
toKeep = toKeep)
Expand Down Expand Up @@ -67,6 +80,10 @@
process.out.outputCommands.append("keep *_"+updatedTauName+"_*_*")
process.out.outputCommands.append("keep *_"+updatedTauName+postfix+"_*_*")

# Adapt to old phase2 input samples where slimmedElectronsHGC are called slimmedElectronsFromMultiCl
if phase2:
process.mergedSlimmedElectronsForTauId.src = ["slimmedElectrons","slimmedElectronsFromMultiCl"]

# Path and EndPath definitions
process.p = cms.Path(
process.rerunMvaIsolationSequence *
Expand Down