Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: b-tagging threshold comparison #182

Merged
merged 2 commits into from
Jul 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 48 additions & 28 deletions analyses/cms-open-data-ttbar/ttbar_analysis_pipeline.ipynb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "0dc37683",
"metadata": {},
Expand All @@ -24,6 +25,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "404927fe",
"metadata": {},
Expand All @@ -35,6 +37,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "bd72d323",
"metadata": {},
Expand Down Expand Up @@ -75,6 +78,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "cd573bb1",
"metadata": {},
Expand All @@ -89,15 +93,15 @@
"\n",
"| setting | number of files | total size | number of events |\n",
"| --- | --- | --- | --- |\n",
"| `1` | 9 | 22.9 GB | 10455719 |\n",
"| `2` | 18 | 42.8 GB | 19497435 |\n",
"| `5` | 43 | 105 GB | 47996231 |\n",
"| `10` | 79 | 200 GB | 90546458 |\n",
"| `20` | 140 | 359 GB | 163123242 |\n",
"| `50` | 255 | 631 GB | 297247463 |\n",
"| `100` | 395 | 960 GB | 470397795 |\n",
"| `200` | 595 | 1.40 TB | 705273291 |\n",
"| `-1` | 787 | 1.78 TB | 940160174 |\n",
"| `1` | 9 | 22.9 GB | 10,455,719 |\n",
"| `2` | 18 | 42.8 GB | 19,497,435 |\n",
"| `5` | 43 | 105 GB | 47,996,231 |\n",
"| `10` | 79 | 200 GB | 90,546,458 |\n",
"| `20` | 140 | 359 GB | 163,123,242 |\n",
"| `50` | 255 | 631 GB | 297,247,463 |\n",
"| `100` | 395 | 960 GB | 470,397,795 |\n",
"| `200` | 595 | 1.40 TB | 705,273,291 |\n",
"| `-1` | 787 | 1.78 TB | 940,160,174 |\n",
"\n",
"The input files are all in the 1–3 GB range."
]
Expand Down Expand Up @@ -131,6 +135,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "141d6520",
"metadata": {},
Expand Down Expand Up @@ -288,7 +293,7 @@
" # Basic selection criteria\n",
" selections.add(\"exactly_1l\", (ak.num(elecs) + ak.num(muons)) == 1)\n",
" selections.add(\"atleast_4j\", ak.num(jets) >= 4)\n",
" selections.add(\"exactly_1b\", ak.sum(jets.btagCSVV2 >= B_TAG_THRESHOLD, axis=1) == 1)\n",
" selections.add(\"exactly_1b\", ak.sum(jets.btagCSVV2 > B_TAG_THRESHOLD, axis=1) == 1)\n",
" selections.add(\"atleast_2b\", ak.sum(jets.btagCSVV2 > B_TAG_THRESHOLD, axis=1) >= 2)\n",
" # Complex selection criteria\n",
" selections.add(\"4j1b\", selections.all(\"exactly_1l\", \"atleast_4j\", \"exactly_1b\"))\n",
Expand Down Expand Up @@ -402,6 +407,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "90dd4c9e",
"metadata": {},
Expand Down Expand Up @@ -446,6 +452,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "b910d3d5",
"metadata": {},
Expand All @@ -465,40 +472,40 @@
"outputs": [],
"source": [
"def get_query(source: ObjectStream) -> ObjectStream:\n",
" \"\"\"Query for event / column selection: >=4j >=1b, ==1 lep with pT>30 GeV + additional cuts, \n",
" \"\"\"Query for event / column selection: >=4j >=1b, ==1 lep with pT>30 GeV + additional cuts,\n",
" return relevant columns\n",
" *NOTE* jet pT cut is set lower to account for systematic variations to jet pT\n",
" \"\"\"\n",
" cuts = source.Where(lambda e: {\"pt\": e.Electron_pt, \n",
" \"eta\": e.Electron_eta, \n",
" \"cutBased\": e.Electron_cutBased, \n",
" cuts = source.Where(lambda e: {\"pt\": e.Electron_pt,\n",
" \"eta\": e.Electron_eta,\n",
" \"cutBased\": e.Electron_cutBased,\n",
" \"sip3d\": e.Electron_sip3d,}.Zip()\\\n",
" .Where(lambda electron: (electron.pt > 30\n",
" and abs(electron.eta) < 2.1 \n",
" and abs(electron.eta) < 2.1\n",
" and electron.cutBased == 4\n",
" and electron.sip3d < 4)).Count() \n",
" + {\"pt\": e.Muon_pt, \n",
" and electron.sip3d < 4)).Count()\n",
" + {\"pt\": e.Muon_pt,\n",
" \"eta\": e.Muon_eta,\n",
" \"tightId\": e.Muon_tightId,\n",
" \"sip3d\": e.Muon_sip3d,\n",
" \"pfRelIso04_all\": e.Muon_pfRelIso04_all}.Zip()\\\n",
" .Where(lambda muon: (muon.pt > 30 \n",
" and abs(muon.eta) < 2.1 \n",
" and muon.tightId \n",
" .Where(lambda muon: (muon.pt > 30\n",
" and abs(muon.eta) < 2.1\n",
" and muon.tightId\n",
" and muon.pfRelIso04_all < 0.15)).Count()== 1)\\\n",
" .Where(lambda f: {\"pt\": f.Jet_pt, \n",
" .Where(lambda f: {\"pt\": f.Jet_pt,\n",
" \"eta\": f.Jet_eta,\n",
" \"jetId\": f.Jet_jetId}.Zip()\\\n",
" .Where(lambda jet: (jet.pt > 25 \n",
" and abs(jet.eta) < 2.4 \n",
" .Where(lambda jet: (jet.pt > 25\n",
" and abs(jet.eta) < 2.4\n",
" and jet.jetId == 6)).Count() >= 4)\\\n",
" .Where(lambda g: {\"pt\": g.Jet_pt, \n",
" .Where(lambda g: {\"pt\": g.Jet_pt,\n",
" \"eta\": g.Jet_eta,\n",
" \"btagCSVV2\": g.Jet_btagCSVV2,\n",
" \"jetId\": g.Jet_jetId}.Zip()\\\n",
" .Where(lambda jet: (jet.btagCSVV2 >= 0.5 \n",
" .Where(lambda jet: (jet.btagCSVV2 > 0.5\n",
" and jet.pt > 25\n",
" and abs(jet.eta) < 2.4) \n",
" and abs(jet.eta) < 2.4)\n",
" and jet.jetId == 6).Count() >= 1)\n",
" selection = cuts.Select(lambda h: {\"Electron_pt\": h.Electron_pt,\n",
" \"Electron_eta\": h.Electron_eta,\n",
Expand All @@ -524,7 +531,7 @@
" })\n",
" if USE_INFERENCE:\n",
" return selection\n",
" \n",
"\n",
" # some branches are only needed if USE_INFERENCE is turned on\n",
" return selection.Select(lambda h: {\"Electron_pt\": h.Electron_pt,\n",
" \"Electron_eta\": h.Electron_eta,\n",
Expand All @@ -545,6 +552,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "d8f08fc1",
"metadata": {},
Expand Down Expand Up @@ -587,6 +595,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "c28a9e49",
"metadata": {},
Expand Down Expand Up @@ -694,6 +703,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "d7bb4428",
"metadata": {},
Expand Down Expand Up @@ -759,6 +769,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "bed3df8b",
"metadata": {},
Expand Down Expand Up @@ -875,6 +886,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "7c334dd3",
"metadata": {},
Expand Down Expand Up @@ -907,6 +919,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "e904cd3c",
"metadata": {},
Expand Down Expand Up @@ -934,6 +947,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "f36dc601",
"metadata": {},
Expand Down Expand Up @@ -981,6 +995,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "c74e4361",
"metadata": {},
Expand Down Expand Up @@ -1018,6 +1033,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "bd480eec",
"metadata": {},
Expand Down Expand Up @@ -1048,6 +1064,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "3a293479",
"metadata": {},
Expand Down Expand Up @@ -1103,6 +1120,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "14dc4b23",
"metadata": {},
Expand Down Expand Up @@ -1150,6 +1168,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "7f60c316",
"metadata": {},
Expand Down Expand Up @@ -1293,6 +1312,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "a2ce2d14",
"metadata": {},
Expand Down Expand Up @@ -1329,7 +1349,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
"version": "3.9.16"
}
},
"nbformat": 4,
Expand Down
54 changes: 27 additions & 27 deletions analyses/cms-open-data-ttbar/ttbar_analysis_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,15 @@
#
# | setting | number of files | total size | number of events |
# | --- | --- | --- | --- |
# | `1` | 9 | 22.9 GB | 10455719 |
# | `2` | 18 | 42.8 GB | 19497435 |
# | `5` | 43 | 105 GB | 47996231 |
# | `10` | 79 | 200 GB | 90546458 |
# | `20` | 140 | 359 GB | 163123242 |
# | `50` | 255 | 631 GB | 297247463 |
# | `100` | 395 | 960 GB | 470397795 |
# | `200` | 595 | 1.40 TB | 705273291 |
# | `-1` | 787 | 1.78 TB | 940160174 |
# | `1` | 9 | 22.9 GB | 10,455,719 |
# | `2` | 18 | 42.8 GB | 19,497,435 |
# | `5` | 43 | 105 GB | 47,996,231 |
# | `10` | 79 | 200 GB | 90,546,458 |
# | `20` | 140 | 359 GB | 163,123,242 |
# | `50` | 255 | 631 GB | 297,247,463 |
# | `100` | 395 | 960 GB | 470,397,795 |
# | `200` | 595 | 1.40 TB | 705,273,291 |
# | `-1` | 787 | 1.78 TB | 940,160,174 |
#
# The input files are all in the 1–3 GB range.

Expand Down Expand Up @@ -251,7 +251,7 @@ def process(self, events):
# Basic selection criteria
selections.add("exactly_1l", (ak.num(elecs) + ak.num(muons)) == 1)
selections.add("atleast_4j", ak.num(jets) >= 4)
selections.add("exactly_1b", ak.sum(jets.btagCSVV2 >= B_TAG_THRESHOLD, axis=1) == 1)
selections.add("exactly_1b", ak.sum(jets.btagCSVV2 > B_TAG_THRESHOLD, axis=1) == 1)
selections.add("atleast_2b", ak.sum(jets.btagCSVV2 > B_TAG_THRESHOLD, axis=1) >= 2)
# Complex selection criteria
selections.add("4j1b", selections.all("exactly_1l", "atleast_4j", "exactly_1b"))
Expand Down Expand Up @@ -387,40 +387,40 @@ def postprocess(self, accumulator):

# %% tags=[]
def get_query(source: ObjectStream) -> ObjectStream:
"""Query for event / column selection: >=4j >=1b, ==1 lep with pT>30 GeV + additional cuts,
"""Query for event / column selection: >=4j >=1b, ==1 lep with pT>30 GeV + additional cuts,
return relevant columns
*NOTE* jet pT cut is set lower to account for systematic variations to jet pT
"""
cuts = source.Where(lambda e: {"pt": e.Electron_pt,
"eta": e.Electron_eta,
"cutBased": e.Electron_cutBased,
cuts = source.Where(lambda e: {"pt": e.Electron_pt,
"eta": e.Electron_eta,
"cutBased": e.Electron_cutBased,
"sip3d": e.Electron_sip3d,}.Zip()\
.Where(lambda electron: (electron.pt > 30
and abs(electron.eta) < 2.1
and abs(electron.eta) < 2.1
and electron.cutBased == 4
and electron.sip3d < 4)).Count()
+ {"pt": e.Muon_pt,
and electron.sip3d < 4)).Count()
+ {"pt": e.Muon_pt,
"eta": e.Muon_eta,
"tightId": e.Muon_tightId,
"sip3d": e.Muon_sip3d,
"pfRelIso04_all": e.Muon_pfRelIso04_all}.Zip()\
.Where(lambda muon: (muon.pt > 30
and abs(muon.eta) < 2.1
and muon.tightId
.Where(lambda muon: (muon.pt > 30
and abs(muon.eta) < 2.1
and muon.tightId
and muon.pfRelIso04_all < 0.15)).Count()== 1)\
.Where(lambda f: {"pt": f.Jet_pt,
.Where(lambda f: {"pt": f.Jet_pt,
"eta": f.Jet_eta,
"jetId": f.Jet_jetId}.Zip()\
.Where(lambda jet: (jet.pt > 25
and abs(jet.eta) < 2.4
.Where(lambda jet: (jet.pt > 25
and abs(jet.eta) < 2.4
and jet.jetId == 6)).Count() >= 4)\
.Where(lambda g: {"pt": g.Jet_pt,
.Where(lambda g: {"pt": g.Jet_pt,
"eta": g.Jet_eta,
"btagCSVV2": g.Jet_btagCSVV2,
"jetId": g.Jet_jetId}.Zip()\
.Where(lambda jet: (jet.btagCSVV2 >= 0.5
.Where(lambda jet: (jet.btagCSVV2 > 0.5
and jet.pt > 25
and abs(jet.eta) < 2.4)
and abs(jet.eta) < 2.4)
and jet.jetId == 6).Count() >= 1)
selection = cuts.Select(lambda h: {"Electron_pt": h.Electron_pt,
"Electron_eta": h.Electron_eta,
Expand All @@ -446,7 +446,7 @@ def get_query(source: ObjectStream) -> ObjectStream:
})
if USE_INFERENCE:
return selection

# some branches are only needed if USE_INFERENCE is turned on
return selection.Select(lambda h: {"Electron_pt": h.Electron_pt,
"Electron_eta": h.Electron_eta,
Expand Down