Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New MVA-based Tau-Ids #108

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions PhysicsTools/NanoAOD/python/nanoDQM_cfi.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,7 @@
Plot1D('idDecayModeNewDMs', 'idDecayModeNewDMs', 2, -0.5, 1.5, "tauID('decayModeFindingNewDMs')"),
Plot1D('idMVAnewDM', 'idMVAnewDM', 64, -0.5, 63.5, 'IsolationMVArun2v1DBnewDMwLT ID working point: bitmask 1 = VLoose, 2 = Loose, 4 = Medium, 8 = Tight, 16 = VTight, 32 = VVTight'),
Plot1D('idMVAoldDM', 'idMVAoldDM', 64, -0.5, 63.5, 'IsolationMVArun2v1DBoldDMwLT ID working point: bitmask 1 = VLoose, 2 = Loose, 4 = Medium, 8 = Tight, 16 = VTight, 32 = VVTight'),
Plot1D('idMVAoldDM2017v1', 'idMVAoldDM2017v1', 128, -0.5, 127.5, 'IsolationMVArun2v1DBoldDMwLT ID working point (2017v1): bitmask 1 = VVLoose, 2 = VLoose, 4 = Loose, 8 = Medium, 16 = Tight, 32 = VTight, 64 = VVTight'),
Plot1D('idMVAoldDMdR03', 'idMVAoldDMdR03', 64, -0.5, 63.5, 'IsolationMVArun2v1DBdR03oldDMwLT ID working point: bitmask 1 = VLoose, 2 = Loose, 4 = Medium, 8 = Tight, 16 = VTight, 32 = VVTight'),
NoPlot('jetIdx'),
Plot1D('leadTkDeltaEta', 'leadTkDeltaEta', 20, -0.1, 0.1, 'eta of the leading track, minus tau eta'),
Expand All @@ -460,6 +461,7 @@
Plot1D('rawIso', 'rawIso', 20, 0, 200, 'combined isolation (deltaBeta corrections)'),
Plot1D('rawMVAnewDM', 'rawMVAnewDM', 20, -1, 1, 'byIsolationMVArun2v1DBnewDMwLT raw output discriminator'),
Plot1D('rawMVAoldDM', 'rawMVAoldDM', 20, -1, 1, 'byIsolationMVArun2v1DBoldDMwLT raw output discriminator'),
Plot1D('rawMVAoldDM2017v1', 'rawMVAoldDM2017v1', 20, -1, 1, 'byIsolationMVArun2v1DBoldDMwLT raw output discriminator (2017v1)'),
Plot1D('rawMVAoldDMdR03', 'rawMVAoldDMdR03', 20, -1, 1, 'byIsolationMVArun2v1DBdR03oldDMwLT raw output discriminator'),
)
),
Expand Down
93 changes: 89 additions & 4 deletions PhysicsTools/NanoAOD/python/taus_cff.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,94 @@
from PhysicsTools.JetMCAlgos.TauGenJets_cfi import tauGenJets
from PhysicsTools.JetMCAlgos.TauGenJetsDecayModeSelectorAllHadrons_cfi import tauGenJetsSelectorAllHadrons

##################### Updated tau collection with MVA-based tau-Ids rerun #######
# Used only in some eras
from RecoTauTag.Configuration.loadRecoTauTagMVAsFromPrepDB_cfi import *
from RecoTauTag.RecoTau.PATTauDiscriminationByMVAIsolationRun2_cff import *

### MVAIso DBoldDM
# Raw
patTauDiscriminationByIsolationMVArun2v1DBoldDMwLTraw = patDiscriminationByIsolationMVArun2v1raw.clone(
PATTauProducer = cms.InputTag('slimmedTaus'),
Prediscriminants = noPrediscriminants,
loadMVAfromDB = cms.bool(True),
mvaName = cms.string("RecoTauTag_tauIdMVAIsoDBoldDMwLT2017v1"), # name of the training you want to use
mvaOpt = cms.string("DBoldDMwLTwGJ"), # option you want to use for your training (i.e., which variables are used to compute the BDT score)
requireDecayMode = cms.bool(True),
verbosity = cms.int32(0)
)
# VVLoose WP
patTauDiscriminationByVVLooseIsolationMVArun2v1DBoldDMwLT = patDiscriminationByIsolationMVArun2v1VLoose.clone(
PATTauProducer = cms.InputTag('slimmedTaus'),
Prediscriminants = noPrediscriminants,
toMultiplex = cms.InputTag('patTauDiscriminationByIsolationMVArun2v1DBoldDMwLTraw'),
key = cms.InputTag('patTauDiscriminationByIsolationMVArun2v1DBoldDMwLTraw','category'),
loadMVAfromDB = cms.bool(True),
mvaOutput_normalization = cms.string("RecoTauTag_tauIdMVAIsoDBoldDMwLT2017v1_mvaOutput_normalization"), # normalization fo the training you want to use
mapping = cms.VPSet(
cms.PSet(
category = cms.uint32(0),
cut = cms.string("RecoTauTag_tauIdMVAIsoDBoldDMwLT2017v1_WPEff95"), # this is the name of the working point you want to use
variable = cms.string("pt"),
)
)
)
# VLoose WP
patTauDiscriminationByVLooseIsolationMVArun2v1DBoldDMwLT = patTauDiscriminationByVVLooseIsolationMVArun2v1DBoldDMwLT.clone()
patTauDiscriminationByVLooseIsolationMVArun2v1DBoldDMwLT.mapping[0].cut = cms.string("RecoTauTag_tauIdMVAIsoDBoldDMwLT2017v1_WPEff90")
# Loose WP
patTauDiscriminationByLooseIsolationMVArun2v1DBoldDMwLT = patTauDiscriminationByVVLooseIsolationMVArun2v1DBoldDMwLT.clone()
patTauDiscriminationByLooseIsolationMVArun2v1DBoldDMwLT.mapping[0].cut = cms.string("RecoTauTag_tauIdMVAIsoDBoldDMwLT2017v1_WPEff80")
# Medium WP
patTauDiscriminationByMediumIsolationMVArun2v1DBoldDMwLT = patTauDiscriminationByVVLooseIsolationMVArun2v1DBoldDMwLT.clone()
patTauDiscriminationByMediumIsolationMVArun2v1DBoldDMwLT.mapping[0].cut = cms.string("RecoTauTag_tauIdMVAIsoDBoldDMwLT2017v1_WPEff70")
# Tight WP
patTauDiscriminationByTightIsolationMVArun2v1DBoldDMwLT = patTauDiscriminationByVVLooseIsolationMVArun2v1DBoldDMwLT.clone()
patTauDiscriminationByTightIsolationMVArun2v1DBoldDMwLT.mapping[0].cut = cms.string("RecoTauTag_tauIdMVAIsoDBoldDMwLT2017v1_WPEff60")
# VTight WP
patTauDiscriminationByVTightIsolationMVArun2v1DBoldDMwLT = patTauDiscriminationByVVLooseIsolationMVArun2v1DBoldDMwLT.clone()
patTauDiscriminationByVTightIsolationMVArun2v1DBoldDMwLT.mapping[0].cut = cms.string("RecoTauTag_tauIdMVAIsoDBoldDMwLT2017v1_WPEff50")
# VVTights WP
patTauDiscriminationByVVTightIsolationMVArun2v1DBoldDMwLT = patTauDiscriminationByVVLooseIsolationMVArun2v1DBoldDMwLT.clone()
patTauDiscriminationByVVTightIsolationMVArun2v1DBoldDMwLT.mapping[0].cut = cms.string("RecoTauTag_tauIdMVAIsoDBoldDMwLT2017v1_WPEff40")
# MVAIso DBoldDM Seqeunce
patTauDiscriminationByIsolationMVArun2v1DBoldDMwLTSeq = cms.Sequence(
patTauDiscriminationByIsolationMVArun2v1DBoldDMwLTraw
+ patTauDiscriminationByVVLooseIsolationMVArun2v1DBoldDMwLT
+ patTauDiscriminationByVLooseIsolationMVArun2v1DBoldDMwLT
+ patTauDiscriminationByLooseIsolationMVArun2v1DBoldDMwLT
+ patTauDiscriminationByMediumIsolationMVArun2v1DBoldDMwLT
+ patTauDiscriminationByTightIsolationMVArun2v1DBoldDMwLT
+ patTauDiscriminationByVTightIsolationMVArun2v1DBoldDMwLT
+ patTauDiscriminationByVVTightIsolationMVArun2v1DBoldDMwLT
)
### FIXME: add other tau-Ids when ready

##################### User floats producers, selectors ##########################
### put all new MVA tau-Id stuff to one Sequence
patTauMVAIDsSeq = cms.Sequence(
patTauDiscriminationByIsolationMVArun2v1DBoldDMwLTSeq
)
# embed new MVA tau-Ids into new tau collection
slimmedTausUpdated = cms.EDProducer("PATTauIDEmbedder",
src = cms.InputTag('slimmedTaus'),
tauIDSources = cms.PSet(
byIsolationMVArun2v1DBoldDMwLTraw2017v1 = cms.InputTag('patTauDiscriminationByIsolationMVArun2v1DBoldDMwLTraw'),
byVVLooseIsolationMVArun2v1DBoldDMwLT2017v1 = cms.InputTag('patTauDiscriminationByVVLooseIsolationMVArun2v1DBoldDMwLT'),
byVLooseIsolationMVArun2v1DBoldDMwLT2017v1 = cms.InputTag('patTauDiscriminationByVLooseIsolationMVArun2v1DBoldDMwLT'),
byLooseIsolationMVArun2v1DBoldDMwLT2017v1 = cms.InputTag('patTauDiscriminationByLooseIsolationMVArun2v1DBoldDMwLT'),
byMediumIsolationMVArun2v1DBoldDMwLT2017v1 = cms.InputTag('patTauDiscriminationByMediumIsolationMVArun2v1DBoldDMwLT'),
byTightIsolationMVArun2v1DBoldDMwLT2017v1 = cms.InputTag('patTauDiscriminationByTightIsolationMVArun2v1DBoldDMwLT'),
byVTightIsolationMVArun2v1DBoldDMwLT2017v1 = cms.InputTag('patTauDiscriminationByVTightIsolationMVArun2v1DBoldDMwLT'),
byVVTightIsolationMVArun2v1DBoldDMwLT2017v1 = cms.InputTag('patTauDiscriminationByVVTightIsolationMVArun2v1DBoldDMwLT'),
)
)
patTauMVAIDsSeq += slimmedTausUpdated

##################### User floats producers, selectors ##########################

finalTaus = cms.EDFilter("PATTauRefSelector",
src = cms.InputTag("slimmedTaus"),
cut = cms.string("pt > 18 && tauID('decayModeFindingNewDMs') && (tauID('byLooseCombinedIsolationDeltaBetaCorr3Hits') || tauID('byVLooseIsolationMVArun2v1DBoldDMwLT') || tauID('byVLooseIsolationMVArun2v1DBnewDMwLT') || tauID('byVLooseIsolationMVArun2v1DBdR03oldDMwLT'))")
src = cms.InputTag("slimmedTausUpdated"),
cut = cms.string("pt > 18 && tauID('decayModeFindingNewDMs') && (tauID('byLooseCombinedIsolationDeltaBetaCorr3Hits') || tauID('byVLooseIsolationMVArun2v1DBoldDMwLT') || tauID('byVLooseIsolationMVArun2v1DBnewDMwLT') || tauID('byVLooseIsolationMVArun2v1DBdR03oldDMwLT') || tauID('byVVLooseIsolationMVArun2v1DBoldDMwLT2017v1'))")
)

##################### Tables for final output and docs ##########################
Expand All @@ -22,6 +103,8 @@ def _tauId5WPMask(pattern,doc):
return _tauIdWPMask(pattern,choices=("VLoose","Loose","Medium","Tight","VTight"),doc=doc)
def _tauId6WPMask(pattern,doc):
return _tauIdWPMask(pattern,choices=("VLoose","Loose","Medium","Tight","VTight","VVTight"),doc=doc)
def _tauId7WPMask(pattern,doc):
return _tauIdWPMask(pattern,choices=("VVLoose","VLoose","Loose","Medium","Tight","VTight","VVTight"),doc=doc)


tauTable = cms.EDProducer("SimpleCandidateFlatTableProducer",
Expand Down Expand Up @@ -55,6 +138,7 @@ def _tauId6WPMask(pattern,doc):

rawMVAnewDM = Var( "tauID('byIsolationMVArun2v1DBnewDMwLTraw')",float, doc="byIsolationMVArun2v1DBnewDMwLT raw output discriminator",precision=10),
rawMVAoldDM = Var( "tauID('byIsolationMVArun2v1DBoldDMwLTraw')",float, doc="byIsolationMVArun2v1DBoldDMwLT raw output discriminator",precision=10),
rawMVAoldDM2017v1 = Var( "tauID('byIsolationMVArun2v1DBoldDMwLTraw2017v1')",float, doc="byIsolationMVArun2v1DBoldDMwLT raw output discriminator (2017v1)",precision=10),#FIXME: probably RAW of the old trainig should be removed
rawMVAoldDMdR03 = Var( "tauID('byIsolationMVArun2v1DBdR03oldDMwLTraw')",float, doc="byIsolationMVArun2v1DBdR03oldDMwLT raw output discriminator",precision=10),
rawAntiEle = Var("tauID('againstElectronMVA6Raw')", float, doc= "Anti-electron MVA discriminator V6 raw output discriminator", precision=10),
rawAntiEleCat = Var("tauID('againstElectronMVA6category')", int, doc="Anti-electron MVA discriminator V6 category"),
Expand All @@ -63,6 +147,7 @@ def _tauId6WPMask(pattern,doc):
idAntiEle = _tauId5WPMask("againstElectron%sMVA6", doc= "Anti-electron MVA discriminator V6"),
idMVAnewDM = _tauId6WPMask( "by%sIsolationMVArun2v1DBnewDMwLT", doc="IsolationMVArun2v1DBnewDMwLT ID working point"),
idMVAoldDM = _tauId6WPMask( "by%sIsolationMVArun2v1DBoldDMwLT", doc="IsolationMVArun2v1DBoldDMwLT ID working point"),
idMVAoldDM2017v1 = _tauId7WPMask( "by%sIsolationMVArun2v1DBoldDMwLT2017v1", doc="IsolationMVArun2v1DBoldDMwLT ID working point (2017v1)"),
idMVAoldDMdR03 = _tauId6WPMask( "by%sIsolationMVArun2v1DBdR03oldDMwLT", doc="IsolationMVArun2v1DBdR03oldDMwLT ID working point"),


Expand Down Expand Up @@ -134,7 +219,7 @@ def _tauId6WPMask(pattern,doc):
)


tauSequence = cms.Sequence(finalTaus)
tauSequence = cms.Sequence(patTauMVAIDsSeq + finalTaus)
tauTables = cms.Sequence(tauTable)
tauMC = cms.Sequence(tauGenJets + tauGenJetsSelectorAllHadrons + genVisTaus + genVisTauTable + tausMCMatchLepTauForTable + tausMCMatchHadTauForTable + tauMCTable)

Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
'tauIdMVAIsoDBoldDMwLT2016' : "tauIdMVAIsoDBoldDMwLT2016",
'tauIdMVAIsoDBnewDMwLT2016' : "tauIdMVAIsoDBnewDMwLT2016"
}
tauIdDiscrMVA_trainings_run2_2017 = {
'tauIdMVAIsoDBoldDMwLT2017' : "tauIdMVAIsoDBoldDMwLT2017",
}
tauIdDiscrMVA_WPs = {
'tauIdMVAoldDMwoLT' : {
'Eff90' : "oldDMwoLTEff90",
Expand Down Expand Up @@ -141,6 +144,17 @@
'Eff40' : "DBnewDMwLT2016Eff40"
}
}
tauIdDiscrMVA_WPs_run2_2017 = {
'tauIdMVAIsoDBoldDMwLT2017' : {
'Eff95' : "DBoldDMwLTEff95",
'Eff90' : "DBoldDMwLTEff90",
'Eff80' : "DBoldDMwLTEff80",
'Eff70' : "DBoldDMwLTEff70",
'Eff60' : "DBoldDMwLTEff60",
'Eff50' : "DBoldDMwLTEff50",
'Eff40' : "DBoldDMwLTEff40"
}
}
tauIdDiscrMVA_mvaOutput_normalizations = {
'tauIdMVAoldDMwoLT' : "mvaOutput_normalization_oldDMwoLT",
'tauIdMVAoldDMwLT' : "mvaOutput_normalization_oldDMwLT",
Expand All @@ -159,6 +173,9 @@
'tauIdMVAIsoDBoldDMwLT2016' : "mvaOutput_normalization_DBoldDMwLT2016",
'tauIdMVAIsoDBnewDMwLT2016' : "mvaOutput_normalization_DBnewDMwLT2016"
}
tauIdDiscrMVA_mvaOutput_normalizations_run2_2017 = {
'tauIdMVAIsoDBoldDMwLT2017' : "mvaOutput_normalization"
}
tauIdDiscrMVA_version = "v1"
for training, gbrForestName in tauIdDiscrMVA_trainings.items():
loadRecoTauTagMVAsFromPrepDB.toGet.append(
Expand Down Expand Up @@ -229,7 +246,32 @@
label = cms.untracked.string("RecoTauTag_%s%s_mvaOutput_normalization" % (gbrForestName, tauIdDiscrMVA_version))
)
)
tauIdDiscrMVA_2017_version = "v1"
for training, gbrForestName in tauIdDiscrMVA_trainings_run2_2017.items():
loadRecoTauTagMVAsFromPrepDB.toGet.append(
cms.PSet(
record = cms.string('GBRWrapperRcd'),
tag = cms.string("RecoTauTag_%s%s" % (gbrForestName, tauIdDiscrMVA_2017_version)),
label = cms.untracked.string("RecoTauTag_%s%s" % (gbrForestName, tauIdDiscrMVA_2017_version))
)
)
for WP in tauIdDiscrMVA_WPs_run2_2017[training].keys():
loadRecoTauTagMVAsFromPrepDB.toGet.append(
cms.PSet(
record = cms.string('PhysicsTGraphPayloadRcd'),
tag = cms.string("RecoTauTag_%s%s_WP%s" % (gbrForestName, tauIdDiscrMVA_2017_version, WP)),
label = cms.untracked.string("RecoTauTag_%s%s_WP%s" % (gbrForestName, tauIdDiscrMVA_2017_version, WP))
)
)
loadRecoTauTagMVAsFromPrepDB.toGet.append(
cms.PSet(
record = cms.string('PhysicsTFormulaPayloadRcd'),
tag = cms.string("RecoTauTag_%s%s_mvaOutput_normalization" % (gbrForestName, tauIdDiscrMVA_2017_version)),
label = cms.untracked.string("RecoTauTag_%s%s_mvaOutput_normalization" % (gbrForestName, tauIdDiscrMVA_2017_version))
)
)

####
# register anti-electron discriminator MVA
antiElectronDiscrMVA5_categories = {
'0' : "gbr_NoEleMatch_woGwoGSF_BL",
Expand Down