Skip to content

Commit

Permalink
add albertos exercise
Browse files Browse the repository at this point in the history
  • Loading branch information
cmantill committed Jan 14, 2021
1 parent ed89072 commit afe5092
Show file tree
Hide file tree
Showing 3 changed files with 303 additions and 10 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ Testing one
python CondorHelper.py -r condor/run_bstar.sh -a test_args.txt -i "bs_select.py bstar.cc bstar_config.json helpers.py"
```

For 2016:
For 2016 (then change args file for other years):
```
python CondorHelper.py -r condor/run_bstar.sh -a all_args.txt -i "bs_select.py bstar.cc bstar_config.json helpers.py"
python CondorHelper.py -r condor/run_bstar.sh -a 2016_args.txt -i "bs_select.py bstar.cc bstar_config.json helpers.py"
```
31 changes: 23 additions & 8 deletions condor/run_bstar.sh
Original file line number Diff line number Diff line change
@@ -1,20 +1,35 @@
#!/bin/bash
echo "Run script starting"
ls
source /cvmfs/cms.cern.ch/cmsset_default.sh
xrdcp root://cmseos.fnal.gov//store/user/lcorcodi/CMSDAS2021env.tgz ./
xrdcp root://cmseos.fnal.gov//store/user/cmsdas/2021/long_exercises/BstarTW/CMSSW_11_0_1_cmsdas.tgz ./
export SCRAM_ARCH=slc7_amd64_gcc820
tar -xzf CMSDAS2021env.tgz
rm CMSDAS2021env.tgz
scramv1 project CMSSW CMSSW_11_0_1
tar -xzf CMSSW_11_0_1_cmsdas.tgz
rm CMSSW_11_0_1_cmsdas.tgz
cd CMSSW_11_0_1/src/
eval `scramv1 runtime -sh`
ls -lrth

#source timber-env/bin/activate
#python -c 'import TIMBER.Analyzer'
rm -rf timber-env

#mkdir new; cd new;
#xrdcp root://cmseos.fnal.gov//store/user/cmsdas/2021/long_exercises/BstarTW/timber-env.tgz ./
xrdcp root://cmseos.fnal.gov//store/user/cmsdas/2021/long_exercises/BstarTW/timber-env.tar.gz ./
tar -xzf timber-env.tar.gz
source timber-env/bin/activate
python -c 'import TIMBER.Analyzer'
cd ../../../

mkdir tardir; cp tarball.tgz tardir/; cd tardir/
tar -xzf tarball.tgz; rm tarball.tgz
cp -r * ../CMSSW_11_0_1/src/BstarToTW_CMSDAS2021; cd ../CMSSW_11_0_1/src/
eval `scramv1 runtime -sh`
source timber-env/bin/activate

cd BstarToTW_CMSDAS2021
ls
python -c 'import TIMBER.Analyzer'

echo python bs_select.py $*
python bs_select.py $*

cp Presel_*.root ../../
#xrdcp Presel_*.root root://cmseos.fnal.gov//store/user/cmantill/bstar_select_tau21/
278 changes: 278 additions & 0 deletions exercises/nminus1_sol.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
'''
Apply simple kinematic selection and plot variables for N-1 selections
for signal and background MC and compare.
'''
import ROOT, collections,sys,os, time
sys.path.append('./')
from optparse import OptionParser
from collections import OrderedDict

from TIMBER.Analyzer import analyzer, HistGroup, VarGroup, CutGroup
from TIMBER.Tools.Common import CompileCpp, OpenJSON
from TIMBER.Tools.Plot import *
import helpers

ROOT.gROOT.SetBatch(True)

# CL options
parser = OptionParser()
parser.add_option('-y', '--year', metavar='YEAR', type='string', action='store',
default = '',
dest = 'year',
help = 'Year (16,17,18)')
parser.add_option('--select', metavar='BOOL', action='store_true',
default = False,
dest = 'select',
help = 'Whether to run the selection. If False, will attempt to recycle previous run histograms.')
(options, args) = parser.parse_args()

###########################################
# Establish some global variables for use #
###########################################
plotdir = 'plots/' # this is where we'll save your plots
if not os.path.exists(plotdir):
os.makedirs(plotdir)

rootfile_path = 'root://cmsxrootd.fnal.gov///store/user/cmsdas/2021/long_exercises/BstarTW/rootfiles'
config = OpenJSON('bstar_config.json')
cuts = config['CUTS'][options.year]

CompileCpp("TIMBER/Framework/include/common.h")
CompileCpp('bstar.cc') # has the c++ functions we need when looping of the RDataFrame

# Sets we want to process and some nice naming for our plots
signal_names = ['signalLH%s'%(mass) for mass in [2000]]#range(1400,4200,600)]
#bkg_names = ['singletop_tW', 'singletop_tWB', 'ttbar', 'QCDHT700', 'QCDHT1000', 'QCDHT1500', 'QCDHT2000'] # Everything
#bkg_names = ['singletop_tW','singletop_tWB'] #Alberto
bkg_names = ['QCDHT1500']
#bkg_names = ['singletop_tW','singletop_tWB','ttbar','QCD']

names = {
"singletop_tW":"single top (tW)",
"singletop_tWB":"single top (tW)",
"ttbar":"t#bar{t}",
"QCDHT700":"QCD",
"QCDHT1000":"QCD",
"QCDHT1500":"QCD",
"QCDHT2000":"QCD",
"QCD":"QCD",
"singletop":"single top (tW)"
}
for sig in signal_names:
names[sig] = "b*_{LH} %s (GeV)"%(sig[-4:])
# ... and some nice colors
colors = {}
for p in signal_names+bkg_names:
if 'signal' in p:
colors[p] = ROOT.kCyan-int((int(p[-4:])-1400)/600)
elif 'ttbar' in p:
colors[p] = ROOT.kRed
elif 'singletop' in p:
colors[p] = ROOT.kBlue
elif 'QCD' in p:
colors[p] = ROOT.kYellow
colors["QCD"] = ROOT.kYellow
colors["singletop"] = ROOT.kBlue
# ... and names
prettynames = {
'deltaY':'|#Delta y|',
'tau21':'#tau_{2} / #tau_{1}',
'mW':'m_{W} [GeV]',
'tau32':'#tau_{3} / #tau_{2}',
'subjet_btag':'Sub-jet DeepCSV',
'mtop':'m_{top} [GeV]',
'lead_jet_deepAK8_MD_TvsQCD':'Leading-jet DeepAK8 TvsQCD',
'sublead_jet_deepAK8_MD_TvsQCD':'Sub-jet DeepAK8 TvsQCD',
'lead_jet_deepAK8_MD_WvsQCD':'Leading-jet DeepAK8 WvsQCD',
'sublead_jet_deepAK8_MD_WvsQCD':'Sub-jet DeepAK8 WvsQCD',
'softdrop_mass_ratio':'Jets softdrop_mass_ratio'
}

# Flags - https://twiki.cern.ch/twiki/bin/view/CMS/MissingETOptionalFiltersRun2
flags = ["Flag_goodVertices",
"Flag_globalSuperTightHalo2016Filter",
"Flag_HBHENoiseFilter",
"Flag_HBHENoiseIsoFilter",
"Flag_EcalDeadCellTriggerPrimitiveFilter",
"Flag_BadPFMuonFilter",
"Flag_ecalBadCalibReducedMINIAODFilter"
]
# Triggers
if options.year == '16':
triggers = ["HLT_PFHT800","HLT_PFHT900","HLT_PFJet450"]
else:
triggers = ["HLT_PFHT1050","HLT_PFJet500","HLT_AK8PFJet380_TrimMass30","HLT_AK8PFJet400_TrimMass30"]

#########################################
# Define function for actual processing #
#########################################
def select(setname,year):
ROOT.ROOT.EnableImplicitMT(2) # Just use two threads - no need to kill the interactive nodes

# Initialize TIMBER analyzer
file_path = '%s/%s_bstar%s.root' %(rootfile_path,setname, year)
a = analyzer(file_path)

# Determine normalization weight
if not a.isData:
norm = helpers.getNormFactor(setname,year,config,a.genEventCount)
else:
norm = 1.

# Book actions on the RDataFrame
a.Cut('filters',a.GetFlagString(flags))
a.Cut('trigger',a.GetTriggerString(triggers))
a.Define('jetIdx','hemispherize(FatJet_phi, FatJet_jetId)') # need to calculate if we have two jets (with Id) that are back-to-back
a.Cut('nFatJets_cut','nFatJet > max(jetIdx[0],jetIdx[1])') # If we don't do this, we may try to access variables of jets that don't exist! (leads to seg fault)
a.Cut("hemis","(jetIdx[0] != -1)&&(jetIdx[1] != -1)") # cut on that calculation
a.Cut('pt_cut','FatJet_pt[jetIdx[0]] > 400 && FatJet_pt[jetIdx[1]] > 400')
a.Cut('eta_cut','abs(FatJet_eta[jetIdx[0]]) < 2.4 && abs(FatJet_eta[jetIdx[1]]) < 2.4')
a.Define('norm',str(norm))

#################################
# Build some variables for jets #
#################################
# Wtagging decision logic
# Returns 0 for no tag, 1 for lead tag, 2 for sublead tag, and 3 for both tag (which is physics-wise equivalent to 2)
wtag_str = "1*Wtag(FatJet_tau2[jetIdx[0]]/FatJet_tau1[jetIdx[0]],0,{0}, FatJet_msoftdrop[jetIdx[0]],65,105) + 2*Wtag(FatJet_tau2[jetIdx[1]]/FatJet_tau1[jetIdx[1]],0,{0}, FatJet_msoftdrop[jetIdx[1]],65,105)".format(cuts['tau21'])

jets = VarGroup('jets')
jets.Add('wtag_bit', wtag_str)
jets.Add('top_bit', '(wtag_bit & 2)? 0: (wtag_bit & 1)? 1: -1') # (if wtag==3 or 2 (subleading w), top_index=0) else (if wtag==1, top_index=1) else (-1)
jets.Add('top_index', 'top_bit >= 0 ? jetIdx[top_bit] : -1')
jets.Add('w_index', 'top_index == 0 ? jetIdx[1] : top_index == 1 ? jetIdx[0] : -1')
# Calculate some new comlumns that we'd like to cut on (that were costly to do before the other filtering)
jets.Add("lead_vect", "hardware::TLvector(FatJet_pt[jetIdx[0]],FatJet_eta[jetIdx[0]],FatJet_phi[jetIdx[0]],FatJet_msoftdrop[jetIdx[0]])")
jets.Add("sublead_vect","hardware::TLvector(FatJet_pt[jetIdx[1]],FatJet_eta[jetIdx[1]],FatJet_phi[jetIdx[1]],FatJet_msoftdrop[jetIdx[1]])")
jets.Add("deltaY", "abs(lead_vect.Rapidity()-sublead_vect.Rapidity())")
jets.Add("mtw", "hardware::invariantMass({lead_vect,sublead_vect})")

#########
# N - 1 #
#########
plotting_vars = VarGroup('plotting_vars') # assume leading is top and subleading is W
plotting_vars.Add("mtop", "FatJet_msoftdrop[jetIdx[0]]")
plotting_vars.Add("mW", "FatJet_msoftdrop[jetIdx[1]]")
#plotting_vars.Add("tau32", "FatJet_tau3[jetIdx[0]]/FatJet_tau2[jetIdx[0]]")
#plotting_vars.Add("subjet_btag", "max(SubJet_btagDeepB[FatJet_subJetIdx1[jetIdx[0]]],SubJet_btagDeepB[FatJet_subJetIdx2[jetIdx[0]]])")
#plotting_vars.Add("tau21", "FatJet_tau2[jetIdx[1]]/FatJet_tau1[jetIdx[1]]")
plotting_vars.Add("lead_jet_deepAK8_MD_TvsQCD", "FatJet_deepTagMD_TvsQCD[jetIdx[0]]")
plotting_vars.Add("sublead_jet_deepAK8_MD_TvsQCD", "FatJet_deepTagMD_TvsQCD[jetIdx[1]]")
plotting_vars.Add("lead_jet_deepAK8_MD_WvsQCD", "FatJet_deepTagMD_WvsQCD[jetIdx[0]]")
plotting_vars.Add("sublead_jet_deepAK8_MD_WvsQCD", "FatJet_deepTagMD_WvsQCD[jetIdx[1]]")
plotting_vars.Add('softdrop_mass_ratio','max(FatJet_msoftdrop[jetIdx[0]]/FatJet_msoftdrop[jetIdx[1]], FatJet_msoftdrop[jetIdx[1]]/FatJet_msoftdrop[jetIdx[0]])')


N_cuts = CutGroup('Ncuts') # cuts
N_cuts.Add("deltaY_cut", "deltaY<1.6")
N_cuts.Add("mtop_cut", "(mtop > 105.)&&(mtop < 220.)")
N_cuts.Add("mW_cut", "(mW > 65.)&&(mW < 105.)")
#N_cuts.Add("tau32_cut", "(tau32 > 0.0)&&(tau32 < %s)"%(cuts['tau32']))
#N_cuts.Add("subjet_btag_cut", "(subjet_btag > %s)&&(subjet_btag < 1.)"%(cuts['sjbtag']))
#N_cuts.Add("tau21_cut", "(tau21 > 0.0)&&(tau21 < %s)"%(cuts['tau21']))


# Organize N-1 of tagging variables when assuming top is always leading
nodeToPlot = a.Apply([jets,plotting_vars])
nminus1Nodes = a.Nminus1(N_cuts,nodeToPlot) # constructs N nodes with a different N-1 selection for each
nminus1Hists = HistGroup('nminus1Hists')
binning = {
'mtop': [25,50,300],
'mW': [25,30,270],
#'tau32': [20,0,1],
#'tau21': [20,0,1],
#'subjet_btag': [20,0,1],
'deltaY': [20,0,2.0],
'lead_jet_deepAK8_MD_TvsQCD': [20, 0.,1.],
'sublead_jet_deepAK8_MD_TvsQCD': [20, 0.,1.],
'lead_jet_deepAK8_MD_WvsQCD': [20, 0.,1.],
'sublead_jet_deepAK8_MD_WvsQCD': [20, 0.,1.],
'softdrop_mass_ratio': [40, 1., 5.]

}
# Add hists to group and write out
for nkey in nminus1Nodes.keys():
if nkey == 'full': continue
var = nkey.replace('_cut','').replace('minus_','')
hist_tuple = (var,var,binning[var][0],binning[var][1],binning[var][2])
hist = nminus1Nodes[nkey].DataFrame.Histo1D(hist_tuple,var,'norm')
hist.GetValue()
nminus1Hists.Add(var,hist)

#a.SetActiveNode(nminus1Nodes['full'])
for var in ['lead_jet_deepAK8_MD_TvsQCD', 'sublead_jet_deepAK8_MD_TvsQCD', 'lead_jet_deepAK8_MD_WvsQCD', 'sublead_jet_deepAK8_MD_WvsQCD', 'softdrop_mass_ratio']:
hist_tuple = (var,prettynames[var],binning[var][0],binning[var][1],binning[var][2])
hist = a.DataFrame.Histo1D(hist_tuple,var,'norm')
hist.GetValue()
nminus1Hists.Add(var,hist)

a.PrintNodeTree('my_exercises/nminus1_tree.dot')

# Return the group
return nminus1Hists

# Runs when calling `python ex4.py`
if __name__ == "__main__":
start_time = time.time()
histgroups = {} # all of the HistGroups we want to track
varnames = []
for setname in signal_names+bkg_names:
print ('Selecting for %s...'%setname)
rootfile_name = "rootfiles/%s_%s_Nminus1.root"%(setname,options.year)

# Perform selection and write out histograms if using --select
if options.select:
histgroup = select(setname,options.year)
outfile = ROOT.TFile.Open(rootfile_name,'RECREATE')
outfile.cd()
histgroup.Do('Write') # This will call TH1.Write() for all of the histograms in the group
outfile.Close()
del histgroup # Now that they are saved out, drop from memory

# Open histogram files that we saved
print ('Opening '+rootfile_name)
infile = ROOT.TFile.Open(rootfile_name)
# ... raise exception if we forgot to run with --select!
if infile == None:
raise TypeError(rootfile_name)
# Put histograms back into HistGroups
histgroups[setname] = HistGroup(setname)
for key in infile.GetListOfKeys(): # loop over histograms in the file
keyname = key.GetName()
inhist = infile.Get(key.GetName()) # get it from the file
inhist.SetDirectory(0) # set the directory so hist is stored in memory and not as reference to TFile (this way it doesn't get tossed by python garbage collection when infile changes)
histgroups[setname].Add(keyname,inhist) # add to our group
if keyname not in varnames:
varnames.append(keyname)

# For each variable to plot...
for varname in varnames:
if varname == 'deltaY': continue # deltaY optimization requires cuts on mtw to make sense so skipping
plot_filename = plotdir+'/%s_%s_Nminus1.png'%(varname,options.year)

# Setup ordered dictionaries so processes plot in the order we specify
bkg_hists,signal_hists = OrderedDict(),OrderedDict()
# Get the backgrounds
for bkg in bkg_names:
histgroups[bkg][varname].SetTitle('%s N-1 20%s'%(varname, options.year)) # empty title
if 'QCD' in bkg: # Add the QCD HT bins together for one QCD sample
if 'QCD' not in bkg_hists.keys():
bkg_hists['QCD'] = histgroups[bkg][varname].Clone('QCD_'+varname)
else:
bkg_hists['QCD'].Add(histgroups[bkg][varname])
elif 'singletop' in bkg: # Add the single top channels together for one single top sample
if 'singletop' not in bkg_hists.keys():
bkg_hists['singletop'] = histgroups[bkg][varname].Clone('singletop_'+varname)
else:
bkg_hists['singletop'].Add(histgroups[bkg][varname])
else: # add everything else normally (ttbar)
bkg_hists[bkg] = histgroups[bkg][varname]#all_hists[bkg] = histgroups[bkg][varname]#

# Add the signals
for sig in signal_names: signal_hists[sig] = histgroups[sig][varname]#all_hists[sig] = histgroups[sig][varname]#

# Plot everything together!
print ('Plotting %s'%(plot_filename))
helpers.CompareShapesWithSoverB(plot_filename,options.year,prettynames[varname],bkgs=bkg_hists,signals=signal_hists,colors=colors,names=names,stackBkg=True)

print ("Total time: "+str((time.time()-start_time)/60.) + ' min')

0 comments on commit afe5092

Please sign in to comment.