Skip to content

Commit

Permalink
update icezee model param
Browse files Browse the repository at this point in the history
  • Loading branch information
mieskolainen committed Jul 22, 2024
1 parent e36c840 commit 90bb5ad
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 37 deletions.
61 changes: 30 additions & 31 deletions configs/zee/models.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ xgb0:
tree_method: 'hist'
device: 'auto' # 'auto', 'cpu', 'cuda'

learning_rate: 0.05
learning_rate: 0.08
gamma: 1.5
max_depth: 15
max_depth: 13
min_child_weight: 1.0
max_delta_step: 1.0
subsample: 1
Expand Down Expand Up @@ -123,9 +123,9 @@ iceboost0: &ICEBOOST0
tree_method: 'hist'
device: 'auto' # 'auto', 'cpu', 'cuda'

learning_rate: 0.05
learning_rate: 0.08
gamma: 1.5
max_depth: 15
max_depth: 13
min_child_weight: 1.0
max_delta_step: 1.0
subsample: 1
Expand Down Expand Up @@ -154,8 +154,8 @@ iceboost0: &ICEBOOST0
plot_trees: false

# Read/Write of epochs
evalmode: 10 # Evaluation and saving of the model every n-th epoch (int) during training
readmode: -1 # -1 takes the minimum loss model
evalmode: 10 # Evaluation and saving of the model every n-th epoch (int) during training
readmode: -1 # -1 takes the minimum loss model


# ICEBOOST with custom loss [BCE + Sliced Wasserstein]
Expand All @@ -172,15 +172,15 @@ iceboost_swd:
beta: 1.0
classes: [0,1]
#set_filter: *MAIN_DOMAIN_FILTER # Comment out for 'inclusive'
label_eps: 0.0 # label smoothing epsilon (regularization)
label_eps: 0.0 # label smoothing epsilon (regularization)

# Sliced Wasserstein distance [use with custom:binary_cross_entropy and custom:sliced_wasserstein]
SWD_param:
beta: 0.2
p: 1 # p-norm (1,2, ...)
num_slices: 4000 # Number of MC projections (Higher the better)
mode: 'EBSW'
max_N: 30000 # Max events limit (30k & 4000 slices works with 32 GB Nvidia V100)
beta: 0.01
p: 1 # p-norm (1,2, ...)
num_slices: 500 # Number of MC projections (Higher the better)
mode: 'SWD' # 'SWD' (basic), 'EBSW' (see icefit/transport.py)
max_N: 500000 # Max events limit (500k & 500 slices works with 32 GB Nvidia V100)


# ICEBOOST with an additional re-weighting in-the-loop regularization
Expand Down Expand Up @@ -242,14 +242,14 @@ lzmlp0: &LZMLP

# Optimization
opt_param:
#lossfunc: 'binary_cross_entropy' # binary_cross_entropy, cross_entropy, focal_entropy, logit_norm_cross_entropy
#lossfunc: 'binary_Lq_entropy' # binary_cross_entropy, cross_entropy, focal_entropy, logit_norm_cross_entropy
#q: 0.8 # Lq exponent (q < 1 -> high density vals emphasized, q > 1 then low emphasized)
lossfunc: 'binary_cross_entropy' # binary_cross_entropy, cross_entropy, focal_entropy, logit_norm_cross_entropy
#lossfunc: 'binary_Lq_entropy'
#q: 0.8 # Lq exponent (q < 1 -> high density vals emphasized, q > 1 then low emphasized)

lossfunc: 'SWD' # Sliced Wasserstein
SWD_beta: 0.01 # Sliced Wasserstein [reweighting regularization]
SWD_p: 1 # p-norm (1,2,..), 1 perhaps more robust
SWD_num_slices: 10000 # Number of MC projections (higher the better)
SWD_mode: 'EBSW'
SWD_num_slices: 1000 # Number of MC projections (higher the better)
SWD_mode: 'SWD' # 'SWD' (basic), 'EBSW' (see icefit/transport.py)

lipschitz_beta: 5.0e-5 # lipschitz regularization (use with 'lzmlp')
#logit_L1_beta: 1.0e-2 # logit norm reg. ~ beta * torch.sum(|logits|)
Expand All @@ -262,7 +262,7 @@ lzmlp0: &LZMLP
clip_norm: 1.0

epochs: 300
batch_size: 16384
batch_size: 8096
lr: 5.0e-4
weight_decay: 1.0e-4 # L2-regularization

Expand Down Expand Up @@ -334,10 +334,10 @@ fastkan0: &FASTKAN
#lossfunc: 'binary_Lq_entropy' # binary_cross_entropy, cross_entropy, focal_entropy, logit_norm_cross_entropy
#q: 0.8 # Lq exponent (q < 1 -> high density vals emphasized, q > 1 then low emphasized)

#lossfunc: 'SWD' # Sliced Wasserstein
#SWD_p: 1 # p-norm (1,2,..), 1 perhaps more robust
#SWD_num_slices: 10000 # Number of MC projections (higher the better)
#SWD_mode: 'EBSW'
SWD_beta: 0.01 # Sliced Wasserstein [reweighting regularization]
SWD_p: 1 # p-norm (1,2,..), 1 perhaps more robust
SWD_num_slices: 1000 # Number of MC projections (higher the better)
SWD_mode: 'SWD' # 'SWD' (basic), 'EBSW' (see icefit/transport.py)

#lipshitz_beta: 1.0e-4 # Lipshitz regularization (use with 'lzmlp')
#logit_L1_beta: 1.0e-2 # logit norm reg. ~ beta * torch.sum(|logits|)
Expand Down Expand Up @@ -419,18 +419,17 @@ dmlp0: &DMLP
skip_connections: False
last_tanh: True # Extra tanh layer
last_tanh_scale: 10.0 # Scale after tanh()

# Optimization
opt_param:
#lossfunc: 'binary_cross_entropy' # binary_cross_entropy, cross_entropy, focal_entropy, logit_norm_cross_entropy

#lossfunc: 'binary_Lq_entropy' # binary_cross_entropy, cross_entropy, focal_entropy, logit_norm_cross_entropy
lossfunc: 'binary_cross_entropy' # binary_cross_entropy, cross_entropy, focal_entropy, logit_norm_cross_entropy
#lossfunc: 'binary_Lq_entropy'
#q: 0.8 # Lq exponent (q < 1 -> high density vals emphasized, q > 1 then low emphasized)

lossfunc: 'SWD' # Sliced Wasserstein
SWD_beta: 0.01 # Sliced Wasserstein [reweighting regularization]
SWD_p: 1 # p-norm (1,2,..), 1 perhaps more robust
SWD_num_slices: 10000 # Number of MC projections (higher the better)
SWD_mode: 'EBSW'
SWD_num_slices: 1000 # Number of MC projections (higher the better)
SWD_mode: 'SWD' # 'SWD' (basic), 'EBSW' (see icefit/transport.py)

#logit_L1_beta: 1.0e-2 # logit norm reg. ~ lambda * torch.sum(|logits|)
logit_L2_beta: 5.0e-3 # logit norm reg. ~ lambda * torch.sum(logits**2)
Expand All @@ -442,7 +441,7 @@ dmlp0: &DMLP
clip_norm: 1.0

epochs: 300
batch_size: 16384
batch_size: 8096
lr: 5.0e-4
weight_decay: 1.0e-4 # L2-regularization

Expand Down
27 changes: 21 additions & 6 deletions icenet/deep/losstools.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,21 @@ def loss_wrapper(model, x, y, num_classes, weights, param, y_DA=None, w_DA=None,
weights = None # TBD. Could re-compute a new set of edge weights
# --------------------------------------------

def SWD_helper(logits):
"""
Sliced Wasserstein reweight regularization
"""
if 'SWD_beta' in param and param['SWD_beta'] > 0:

beta = param['SWD_beta']
value = beta * SWD_reweight_loss(logits=logits, x=x, y=y, weights=weights,
p=param['SWD_p'], num_slices=param['SWD_num_slices'],
mode=param['SWD_mode'])

return {f'SWD x $\\beta = {beta}$': value}
else:
return {}

def MI_helper(output):
"""
Mutual Information regularization
Expand Down Expand Up @@ -159,21 +174,21 @@ def LM_helper(logits):
logits = model.forward(x)
loss = BCE_loss(logits=logits, y=y, weights=weights)

loss = {'BCE': loss, **LZ_helper(), **LM_helper(logits), **MI_helper(torch.sigmoid(logits))}
loss = {'BCE': loss, **SWD_helper(logits), **LZ_helper(), **LM_helper(logits), **MI_helper(torch.sigmoid(logits))}

elif param['lossfunc'] == 'binary_focal_entropy':

logits = model.forward(x)
loss = binary_focal_loss(logits=logits, y=y, gamma=param['gamma'], weights=weights)

loss = {f"FE ($\\gamma = {param['gamma']}$)": loss, **LZ_helper(), **LM_helper(logits), **MI_helper(torch.sigmoid(logits))}
loss = {f"FE ($\\gamma = {param['gamma']}$)": loss, **SWD_helper(logits), **LZ_helper(), **LM_helper(logits), **MI_helper(torch.sigmoid(logits))}

elif param['lossfunc'] == 'binary_Lq_entropy':

logits = model.forward(x)
loss = Lq_binary_loss(logits=logits, y=y, q=param['q'], weights=weights)

loss = {f"LQ ($\\gamma = {param['q']}$)": loss, **LZ_helper(), **LM_helper(logits), **MI_helper(torch.sigmoid(logits))}
loss = {f"LQ ($\\gamma = {param['q']}$)": loss, **SWD_helper(logits), **LZ_helper(), **LM_helper(logits), **MI_helper(torch.sigmoid(logits))}

elif param['lossfunc'] == 'SWD':

Expand All @@ -192,22 +207,22 @@ def LM_helper(logits):
y_hat = model.forward(x)
loss = MSE_loss(y_hat=y_hat, y=y, weights=weights)

loss = {'MSE': loss, **LZ_helper(), **LM_helper(y_hat), **MI_helper(y_hat)}
loss = {'MSE': loss, **SWD_helper(logits), **LZ_helper(), **LM_helper(y_hat), **MI_helper(y_hat)}

elif param['lossfunc'] == 'MSE_prob':

logits = model.forward(x)
y_hat = torch.sigmoid(logits)
loss = MSE_loss(y_hat=y_hat, y=y, weights=weights)

loss = {'MSE': loss, **LZ_helper(), **LM_helper(logits), **MI_helper(y_hat)}
loss = {'MSE': loss, **SWD_helper(logits), **LZ_helper(), **LM_helper(logits), **MI_helper(y_hat)}

elif param['lossfunc'] == 'MAE':

y_hat = model.forward(x)
loss = MSE_loss(y_hat=y_hat, y=y, weights=weights)

loss = {'MAE': loss, **LZ_helper(), **LM_helper(y_hat), **MI_helper(y_hat)}
loss = {'MAE': loss, **SWD_helper(logits), **LZ_helper(), **LM_helper(y_hat), **MI_helper(y_hat)}

elif param['lossfunc'] == 'cross_entropy':
"""
Expand Down

0 comments on commit 90bb5ad

Please sign in to comment.