Skip to content

Commit

Permalink
extend Poisson loss 2
Browse files Browse the repository at this point in the history
  • Loading branch information
mieskolainen committed Nov 2, 2024
1 parent 718b5af commit d3fac0a
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 20 deletions.
48 changes: 31 additions & 17 deletions icefit/icepeak.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ def binned_1D_fit(hist: dict, param: dict, fitfunc: dict, techno: dict, par_fixe
### [Chi2 loss function]
def chi2_loss(par):

total = 0
tot = 0

# Over histograms
for key in counts.keys():
Expand All @@ -529,14 +529,14 @@ def chi2_loss(par):

residual = (y_pred[fmask] - counts[key][rmask][fmask]) / errors[key][rmask][fmask]

total += np.sum(residual**2)
tot += np.sum(residual**2)

return total
return tot

### [Huber loss function]
def huber_loss(par):

total = 0
tot = 0
delta = techno['huber_delta']

# Over histograms
Expand All @@ -552,14 +552,15 @@ def huber_loss(par):
T = huber_lossfunc(y_true=counts[key][rmask][fmask], y_pred=y_pred[fmask],
sigma=errors[key][rmask][fmask], delta=delta)

total += T
tot += T

return total
return tot

### [Poissonian negative log-likelihood loss function]
### [Poissonian negative delta log-likelihood loss function]
def poiss_nll_loss(par):

total = 0
EPS = 1e-9
nll = 0

# Over histograms
for key in counts.keys():
Expand All @@ -571,16 +572,27 @@ def poiss_nll_loss(par):
# ** Note use x=cbins[range_mask] here, due to trapz integral in fitfunc ! **
y_pred = fitfunc[key](cbins[key][rmask], par, par_fixed)

valid = (y_pred > 0) & fmask
valid = (y_pred > 0)
if np.sum(valid) == 0: return 1e9

T1 = counts[key][rmask][valid] * np.log(y_pred[valid])
T2 = y_pred[valid]

total += (-1)*(np.sum(T1) - np.sum(T2))

return total

y_pred[~valid] = EPS

# Bohm-Zech scale transform for weighted events (https://arxiv.org/abs/1309.1287)
# https://scikit-hep.org/iminuit/notebooks/weighted_histograms.html
s = counts[key][rmask][fmask] / (errors[key][rmask][fmask]**2) # Per bin

n = counts[key][rmask][fmask] # Observed
mu = y_pred[fmask] # Predicted

# Factor 2 x taken into account by setting `errordef = iminuit.Minuit.LIKELIHOOD`

# Delta log-likelihood with Bohm-Zech scale
nll += np.sum( s * (n * (np.log(n) - np.log(mu)) - (n - mu)) )

# Simple log-likelihood (NB. x -1)
#nll += (-1) * np.sum(n * np.log(mu) - mu)

return nll

# --------------------------------------------------------------------
loss_type = techno['loss_type']
Expand Down Expand Up @@ -739,9 +751,11 @@ def optimizer_execute(trials, loss, param, techno):
m1.fixed[k] = param['fixed'][k]
# --------------------------------------------------------------------

if techno['loss_type'] == 'nll':
if 'nll' in techno['loss_type']:
cprint('Setting errordef "LIKELIHOOD" ', 'yellow')
m1.errordef = iminuit.Minuit.LIKELIHOOD
else:
cprint('Setting errordef "LEAST_SQUARES" ', 'yellow')
m1.errordef = iminuit.Minuit.LEAST_SQUARES

# Set parameter bounds
Expand Down
10 changes: 7 additions & 3 deletions icefit/peakfit.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@
# - Keep all pdf functions normalized in the steering yml (norm: True),
# otherwise fit stability problems and uncertainty estimation is not consistent.
#
# - Use 'chi2' loss if using weighted event histograms (either MC or data)
# Use 'nll' for unweighted Poisson count histograms (both MC and data)
# - Use 'chi2' or 'huber' loss if using weighted event histograms (either MC or data)
# Use 'nll' for unweighted Poisson count histograms
# and a weighted count histograms via a scale transform (experimental)
#
# - For different fit types see: /docs/pdf/peakfit.pdf
#
#
# Run with: python icefit/peakfit.py --analyze --group (--test_mode)
#
Expand All @@ -34,7 +38,7 @@

import ray

__VERSION__ = 0.02
__VERSION__ = 0.03
__AUTHOR__ = '[email protected]'

# ========================================================================
Expand Down
11 changes: 11 additions & 0 deletions tests/test_peakfit.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/sh
#
# Test peakfit

git clone https://github.com/mieskolainen/actions-stash.git

for L in chi2 huber nll; do
for T in single dual dual-unitary-I dual-unitary-II; do
python icefit/peakfit.py --num_cpus 1 --test_mode --analyze --group --loss_type "$L" --fit_type "$T" --output_name "test_${L}_${T}" > "peakfit_${L}_${T}.log"
done
done

0 comments on commit d3fac0a

Please sign in to comment.