forked from mdmorris/JMECoffea
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathLHE_flavour.py
171 lines (144 loc) · 7.04 KB
/
LHE_flavour.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# LHE_flavour.py
import awkward as ak
import numpy as np
import numba as nb
# ''' numba implementation of a function similar to ak.count that works on 2d arrays
# counts the number of times each element appears in the each subarray.
# Output: the list of the same size as 'data', but each element is replaced by the number of times it is repeated in the subarray.
# numba+awkward array example emplementation taken from
# https://github.com/scikit-hep/awkward/discussions/902#discussioncomment-844323
# '''
# ### The wrapper to make numba+awkward work
# def njit_at_dim(dim=1):
# def wrapper(impl_dim):
# def token(data, builder):
# pass
# def impl_nd(data, builder):
# for inner in data:
# builder.begin_list()
# token(inner, builder)
# builder.end_list()
# return builder
# @nb.extending.overload(token)
# def dispatch(data, builder):
# if data.type.ndim == dim:
# return impl_dim
# else:
# return impl_nd
# @nb.njit
# def jitted(data, builder):
# return token(data, builder)
# return jitted
# return wrapper
# ### The implementation part
# @njit_at_dim()
# def count_2d(data, builder):
# for ii in range(len(data)):
# count = 0
# a = data[ii]
# for jj in range(len(data)):
# if a==data[jj]:
# count+=1
# builder.integer(count)
# return builder
@nb.njit
def count_2d(array, builder):
''' numba implementation of a function similar to ak.count that works on 2d arrays
counts the number of times each element appears in the each subarray.
Output: the list of the same size as 'data', but each element is replaced by the number of times it is repeated in the subarray.
'''
for row in array:
builder.begin_list()
for x in row:
count = 0
for jj in range(len(row)):
if x==row[jj]:
count+=1
builder.integer(count)
builder.end_list()
return builder
def get_LHE_flavour2(jets, events):
''' Algorithm of LHE_Flavour2 for the jet:
Cuts all the outgoing LHE particles that have pdgId as quarks (except top) and gluons.
For each LHE particle finds the closest jet and gives the jet its flavour.
If a jet is marked by two or more LHE particles: assign -999
Using the numba compiled `count_2d` funciton.
The difference with the LHE flavour is that here we start from the LHE particle and match to the jet.
'''
LHE_flavour2 = ak.zeros_like(jets.hadronFlavour)
n_jets_per_event = ak.num(jets)
## have to work with flattened objects as awkwards doesn not allow to modify it's entries
LHE_flavour_np = ak.flatten(LHE_flavour2).to_numpy().copy()
LHEPart = events.LHEPart
absLHEid = np.abs(LHEPart.pdgId)
LHE_outgoing = LHEPart[(LHEPart.status==1) & ((absLHEid < 6) | (absLHEid == 21))]
drs, [LHE_match, jets_match] = LHE_outgoing.metric_table(jets, return_combinations=True, axis=1)
arms = ak.argmin(drs, axis=2) ## for each event, for each LHE particle, the closest jet index
cums = np.cumsum(n_jets_per_event)[:-1]
cums = np.append(0,cums)
arms_flat = arms + cums ### positions of the matchet jets in the flattened list
arms_np = ak.flatten(arms_flat).to_numpy().data
LHE_match_flat = ak.flatten(LHE_match[:,:,:1].pdgId,axis=1)
### For each jet, count the number of LHEpartons that match to it. Leave those jets unmatched that have more than one LHE particle pointing to them.
n_matches_LHEpart_jet = count_2d(arms, ak.ArrayBuilder())
n_matches_np = ak.flatten(n_matches_LHEpart_jet).to_numpy()
LHE_flavour_np = ak.flatten(LHE_flavour2).to_numpy().copy()
LHE_flavour_np[arms_np[ak.num(LHE_match_flat)>0][n_matches_np==1]] = ak.flatten(LHE_match_flat)[n_matches_np==1]
### Some LHE particles might point to the same jets. Those are kept unmatched.
LHE_flavour_np[arms_np[ak.num(LHE_match_flat)>0][n_matches_np>1]] = -999
jets["LHE_flavour2"] = ak.unflatten(LHE_flavour_np, n_jets_per_event)
return jets
def get_LHE_flavour(reco_jets, events):
""" Algorithm for the flavour derivation:
- Find all the matched outgoing LHE particles within dR<0.4
- If there is at least one LHE particle with b flavour (bbar flavour), set LHE_flavour to 5 (-5). If both b and bbar are found, set LHE_flavour=0
- If there is no b quark then:
If there is at least one LHE particle with c flavour (cbar flavour), set LHE_flavour to 4 (-4).
If both are found, set LHE_flavour=0.
- If none of the above:
Assign the flavour of the hardest selected LHE particle.
The difference with the LHE flavor2 is that here we start from the jet and match to the LHE particle.
"""
LHE_flavour = ak.zeros_like(reco_jets.hadronFlavour)
n_jets_per_event = ak.num(reco_jets)
LHE_flavour_np = ak.flatten(LHE_flavour).to_numpy()
LHEPart = events.LHEPart
absLHEid = np.abs(LHEPart.pdgId)
LHE_outgoing = LHEPart[(LHEPart.status==1) & ((absLHEid < 6) | (absLHEid == 21))]
drs, [_, LHE_match] = reco_jets.metric_table(LHE_outgoing, return_combinations=True, axis=1)
LHE_match = LHE_match[drs<0.4]
b_criteria = ak.any((LHE_match.pdgId==5),axis=2)
bbar_criteria = ak.any((LHE_match.pdgId==-5),axis=2)
c_criteria = ak.any((LHE_match.pdgId==4),axis=2)
cbar_criteria = ak.any((LHE_match.pdgId==-4),axis=2)
rest_crit = ((LHE_match.pdgId==1) | (LHE_match.pdgId==2) | (LHE_match.pdgId==3) | (LHE_match.pdgId==-1)
| (LHE_match.pdgId==-2) | (LHE_match.pdgId==-3) | (LHE_match.pdgId==21))
rest_match_candidates = LHE_match[rest_crit]
rest_match = rest_match_candidates[ak.argmax(rest_match_candidates.pt, axis=2, keepdims=True )]
#for some reason it does not work with just one ak.flatten
rest_flav_ids = ak.flatten(ak.flatten(rest_match.pdgId, axis=-1 )).to_numpy()
LHE_flavour_np[~rest_flav_ids.mask] = rest_flav_ids[~rest_flav_ids.mask]
c_cri_np = ak.flatten(c_criteria & ~cbar_criteria).to_numpy()
LHE_flavour_np[c_cri_np] = 4
cbar_cri_np = ak.flatten(cbar_criteria & ~c_criteria).to_numpy()
LHE_flavour_np[cbar_cri_np] = -4
c_criteria_unknown = ak.flatten(cbar_criteria & c_criteria).to_numpy()
LHE_flavour_np[c_criteria_unknown] = 0
b_criteria_np = ak.flatten(b_criteria & ~bbar_criteria ).to_numpy()
LHE_flavour_np[b_criteria_np] = 5
bbar_criteria_np = ak.flatten(bbar_criteria & ~b_criteria).to_numpy()
LHE_flavour_np[bbar_criteria_np] = -5
b_criteria_unknown = ak.flatten(bbar_criteria & b_criteria).to_numpy()
LHE_flavour_np[b_criteria_unknown] = 0
reco_jets["LHE_flavour"] = ak.unflatten(LHE_flavour_np, n_jets_per_event)
return reco_jets
def is_bhad(pdgId):
pdgId_str = str(np.abs(pdgId))
if len(pdgId_str)<3:
return False
elif pdgId_str[0]=='5':
return True
elif len(pdgId_str)>4 and pdgId_str[-3]=='5':
return True
else:
return False