Skip to content

Commit

Permalink
fix: b-tagging threshold comparison (#182)
Browse files Browse the repository at this point in the history
* fix b-tagging threshold
* improve event number formatting
  • Loading branch information
alexander-held authored Jul 25, 2023
1 parent db4890c commit 15cd83c
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 55 deletions.
76 changes: 48 additions & 28 deletions analyses/cms-open-data-ttbar/ttbar_analysis_pipeline.ipynb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "0dc37683",
"metadata": {},
Expand All @@ -24,6 +25,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "404927fe",
"metadata": {},
Expand All @@ -35,6 +37,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "bd72d323",
"metadata": {},
Expand Down Expand Up @@ -75,6 +78,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "cd573bb1",
"metadata": {},
Expand All @@ -89,15 +93,15 @@
"\n",
"| setting | number of files | total size | number of events |\n",
"| --- | --- | --- | --- |\n",
"| `1` | 9 | 22.9 GB | 10455719 |\n",
"| `2` | 18 | 42.8 GB | 19497435 |\n",
"| `5` | 43 | 105 GB | 47996231 |\n",
"| `10` | 79 | 200 GB | 90546458 |\n",
"| `20` | 140 | 359 GB | 163123242 |\n",
"| `50` | 255 | 631 GB | 297247463 |\n",
"| `100` | 395 | 960 GB | 470397795 |\n",
"| `200` | 595 | 1.40 TB | 705273291 |\n",
"| `-1` | 787 | 1.78 TB | 940160174 |\n",
"| `1` | 9 | 22.9 GB | 10,455,719 |\n",
"| `2` | 18 | 42.8 GB | 19,497,435 |\n",
"| `5` | 43 | 105 GB | 47,996,231 |\n",
"| `10` | 79 | 200 GB | 90,546,458 |\n",
"| `20` | 140 | 359 GB | 163,123,242 |\n",
"| `50` | 255 | 631 GB | 297,247,463 |\n",
"| `100` | 395 | 960 GB | 470,397,795 |\n",
"| `200` | 595 | 1.40 TB | 705,273,291 |\n",
"| `-1` | 787 | 1.78 TB | 940,160,174 |\n",
"\n",
"The input files are all in the 1–3 GB range."
]
Expand Down Expand Up @@ -131,6 +135,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "141d6520",
"metadata": {},
Expand Down Expand Up @@ -288,7 +293,7 @@
" # Basic selection criteria\n",
" selections.add(\"exactly_1l\", (ak.num(elecs) + ak.num(muons)) == 1)\n",
" selections.add(\"atleast_4j\", ak.num(jets) >= 4)\n",
" selections.add(\"exactly_1b\", ak.sum(jets.btagCSVV2 >= B_TAG_THRESHOLD, axis=1) == 1)\n",
" selections.add(\"exactly_1b\", ak.sum(jets.btagCSVV2 > B_TAG_THRESHOLD, axis=1) == 1)\n",
" selections.add(\"atleast_2b\", ak.sum(jets.btagCSVV2 > B_TAG_THRESHOLD, axis=1) >= 2)\n",
" # Complex selection criteria\n",
" selections.add(\"4j1b\", selections.all(\"exactly_1l\", \"atleast_4j\", \"exactly_1b\"))\n",
Expand Down Expand Up @@ -402,6 +407,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "90dd4c9e",
"metadata": {},
Expand Down Expand Up @@ -446,6 +452,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "b910d3d5",
"metadata": {},
Expand All @@ -465,40 +472,40 @@
"outputs": [],
"source": [
"def get_query(source: ObjectStream) -> ObjectStream:\n",
" \"\"\"Query for event / column selection: >=4j >=1b, ==1 lep with pT>30 GeV + additional cuts, \n",
" \"\"\"Query for event / column selection: >=4j >=1b, ==1 lep with pT>30 GeV + additional cuts,\n",
" return relevant columns\n",
" *NOTE* jet pT cut is set lower to account for systematic variations to jet pT\n",
" \"\"\"\n",
" cuts = source.Where(lambda e: {\"pt\": e.Electron_pt, \n",
" \"eta\": e.Electron_eta, \n",
" \"cutBased\": e.Electron_cutBased, \n",
" cuts = source.Where(lambda e: {\"pt\": e.Electron_pt,\n",
" \"eta\": e.Electron_eta,\n",
" \"cutBased\": e.Electron_cutBased,\n",
" \"sip3d\": e.Electron_sip3d,}.Zip()\\\n",
" .Where(lambda electron: (electron.pt > 30\n",
" and abs(electron.eta) < 2.1 \n",
" and abs(electron.eta) < 2.1\n",
" and electron.cutBased == 4\n",
" and electron.sip3d < 4)).Count() \n",
" + {\"pt\": e.Muon_pt, \n",
" and electron.sip3d < 4)).Count()\n",
" + {\"pt\": e.Muon_pt,\n",
" \"eta\": e.Muon_eta,\n",
" \"tightId\": e.Muon_tightId,\n",
" \"sip3d\": e.Muon_sip3d,\n",
" \"pfRelIso04_all\": e.Muon_pfRelIso04_all}.Zip()\\\n",
" .Where(lambda muon: (muon.pt > 30 \n",
" and abs(muon.eta) < 2.1 \n",
" and muon.tightId \n",
" .Where(lambda muon: (muon.pt > 30\n",
" and abs(muon.eta) < 2.1\n",
" and muon.tightId\n",
" and muon.pfRelIso04_all < 0.15)).Count()== 1)\\\n",
" .Where(lambda f: {\"pt\": f.Jet_pt, \n",
" .Where(lambda f: {\"pt\": f.Jet_pt,\n",
" \"eta\": f.Jet_eta,\n",
" \"jetId\": f.Jet_jetId}.Zip()\\\n",
" .Where(lambda jet: (jet.pt > 25 \n",
" and abs(jet.eta) < 2.4 \n",
" .Where(lambda jet: (jet.pt > 25\n",
" and abs(jet.eta) < 2.4\n",
" and jet.jetId == 6)).Count() >= 4)\\\n",
" .Where(lambda g: {\"pt\": g.Jet_pt, \n",
" .Where(lambda g: {\"pt\": g.Jet_pt,\n",
" \"eta\": g.Jet_eta,\n",
" \"btagCSVV2\": g.Jet_btagCSVV2,\n",
" \"jetId\": g.Jet_jetId}.Zip()\\\n",
" .Where(lambda jet: (jet.btagCSVV2 >= 0.5 \n",
" .Where(lambda jet: (jet.btagCSVV2 > 0.5\n",
" and jet.pt > 25\n",
" and abs(jet.eta) < 2.4) \n",
" and abs(jet.eta) < 2.4)\n",
" and jet.jetId == 6).Count() >= 1)\n",
" selection = cuts.Select(lambda h: {\"Electron_pt\": h.Electron_pt,\n",
" \"Electron_eta\": h.Electron_eta,\n",
Expand All @@ -524,7 +531,7 @@
" })\n",
" if USE_INFERENCE:\n",
" return selection\n",
" \n",
"\n",
" # some branches are only needed if USE_INFERENCE is turned on\n",
" return selection.Select(lambda h: {\"Electron_pt\": h.Electron_pt,\n",
" \"Electron_eta\": h.Electron_eta,\n",
Expand All @@ -545,6 +552,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "d8f08fc1",
"metadata": {},
Expand Down Expand Up @@ -587,6 +595,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "c28a9e49",
"metadata": {},
Expand Down Expand Up @@ -694,6 +703,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "d7bb4428",
"metadata": {},
Expand Down Expand Up @@ -759,6 +769,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "bed3df8b",
"metadata": {},
Expand Down Expand Up @@ -875,6 +886,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "7c334dd3",
"metadata": {},
Expand Down Expand Up @@ -907,6 +919,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "e904cd3c",
"metadata": {},
Expand Down Expand Up @@ -934,6 +947,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "f36dc601",
"metadata": {},
Expand Down Expand Up @@ -981,6 +995,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "c74e4361",
"metadata": {},
Expand Down Expand Up @@ -1018,6 +1033,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "bd480eec",
"metadata": {},
Expand Down Expand Up @@ -1048,6 +1064,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "3a293479",
"metadata": {},
Expand Down Expand Up @@ -1103,6 +1120,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "14dc4b23",
"metadata": {},
Expand Down Expand Up @@ -1150,6 +1168,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "7f60c316",
"metadata": {},
Expand Down Expand Up @@ -1293,6 +1312,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "a2ce2d14",
"metadata": {},
Expand Down Expand Up @@ -1329,7 +1349,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
"version": "3.9.16"
}
},
"nbformat": 4,
Expand Down
54 changes: 27 additions & 27 deletions analyses/cms-open-data-ttbar/ttbar_analysis_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,15 @@
#
# | setting | number of files | total size | number of events |
# | --- | --- | --- | --- |
# | `1` | 9 | 22.9 GB | 10455719 |
# | `2` | 18 | 42.8 GB | 19497435 |
# | `5` | 43 | 105 GB | 47996231 |
# | `10` | 79 | 200 GB | 90546458 |
# | `20` | 140 | 359 GB | 163123242 |
# | `50` | 255 | 631 GB | 297247463 |
# | `100` | 395 | 960 GB | 470397795 |
# | `200` | 595 | 1.40 TB | 705273291 |
# | `-1` | 787 | 1.78 TB | 940160174 |
# | `1` | 9 | 22.9 GB | 10,455,719 |
# | `2` | 18 | 42.8 GB | 19,497,435 |
# | `5` | 43 | 105 GB | 47,996,231 |
# | `10` | 79 | 200 GB | 90,546,458 |
# | `20` | 140 | 359 GB | 163,123,242 |
# | `50` | 255 | 631 GB | 297,247,463 |
# | `100` | 395 | 960 GB | 470,397,795 |
# | `200` | 595 | 1.40 TB | 705,273,291 |
# | `-1` | 787 | 1.78 TB | 940,160,174 |
#
# The input files are all in the 1–3 GB range.

Expand Down Expand Up @@ -251,7 +251,7 @@ def process(self, events):
# Basic selection criteria
selections.add("exactly_1l", (ak.num(elecs) + ak.num(muons)) == 1)
selections.add("atleast_4j", ak.num(jets) >= 4)
selections.add("exactly_1b", ak.sum(jets.btagCSVV2 >= B_TAG_THRESHOLD, axis=1) == 1)
selections.add("exactly_1b", ak.sum(jets.btagCSVV2 > B_TAG_THRESHOLD, axis=1) == 1)
selections.add("atleast_2b", ak.sum(jets.btagCSVV2 > B_TAG_THRESHOLD, axis=1) >= 2)
# Complex selection criteria
selections.add("4j1b", selections.all("exactly_1l", "atleast_4j", "exactly_1b"))
Expand Down Expand Up @@ -387,40 +387,40 @@ def postprocess(self, accumulator):

# %% tags=[]
def get_query(source: ObjectStream) -> ObjectStream:
"""Query for event / column selection: >=4j >=1b, ==1 lep with pT>30 GeV + additional cuts,
"""Query for event / column selection: >=4j >=1b, ==1 lep with pT>30 GeV + additional cuts,
return relevant columns
*NOTE* jet pT cut is set lower to account for systematic variations to jet pT
"""
cuts = source.Where(lambda e: {"pt": e.Electron_pt,
"eta": e.Electron_eta,
"cutBased": e.Electron_cutBased,
cuts = source.Where(lambda e: {"pt": e.Electron_pt,
"eta": e.Electron_eta,
"cutBased": e.Electron_cutBased,
"sip3d": e.Electron_sip3d,}.Zip()\
.Where(lambda electron: (electron.pt > 30
and abs(electron.eta) < 2.1
and abs(electron.eta) < 2.1
and electron.cutBased == 4
and electron.sip3d < 4)).Count()
+ {"pt": e.Muon_pt,
and electron.sip3d < 4)).Count()
+ {"pt": e.Muon_pt,
"eta": e.Muon_eta,
"tightId": e.Muon_tightId,
"sip3d": e.Muon_sip3d,
"pfRelIso04_all": e.Muon_pfRelIso04_all}.Zip()\
.Where(lambda muon: (muon.pt > 30
and abs(muon.eta) < 2.1
and muon.tightId
.Where(lambda muon: (muon.pt > 30
and abs(muon.eta) < 2.1
and muon.tightId
and muon.pfRelIso04_all < 0.15)).Count()== 1)\
.Where(lambda f: {"pt": f.Jet_pt,
.Where(lambda f: {"pt": f.Jet_pt,
"eta": f.Jet_eta,
"jetId": f.Jet_jetId}.Zip()\
.Where(lambda jet: (jet.pt > 25
and abs(jet.eta) < 2.4
.Where(lambda jet: (jet.pt > 25
and abs(jet.eta) < 2.4
and jet.jetId == 6)).Count() >= 4)\
.Where(lambda g: {"pt": g.Jet_pt,
.Where(lambda g: {"pt": g.Jet_pt,
"eta": g.Jet_eta,
"btagCSVV2": g.Jet_btagCSVV2,
"jetId": g.Jet_jetId}.Zip()\
.Where(lambda jet: (jet.btagCSVV2 >= 0.5
.Where(lambda jet: (jet.btagCSVV2 > 0.5
and jet.pt > 25
and abs(jet.eta) < 2.4)
and abs(jet.eta) < 2.4)
and jet.jetId == 6).Count() >= 1)
selection = cuts.Select(lambda h: {"Electron_pt": h.Electron_pt,
"Electron_eta": h.Electron_eta,
Expand All @@ -446,7 +446,7 @@ def get_query(source: ObjectStream) -> ObjectStream:
})
if USE_INFERENCE:
return selection

# some branches are only needed if USE_INFERENCE is turned on
return selection.Select(lambda h: {"Electron_pt": h.Electron_pt,
"Electron_eta": h.Electron_eta,
Expand Down

0 comments on commit 15cd83c

Please sign in to comment.