Skip to content

Commit

Permalink
Fix cosine annealing scheduling in training parts
Browse files Browse the repository at this point in the history
  • Loading branch information
mittagessen committed Apr 3, 2024
1 parent 5263797 commit f7fb622
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 15 deletions.
7 changes: 6 additions & 1 deletion kraken/ketos/pretrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@
show_default=True,
default=RECOGNITION_PRETRAIN_HYPER_PARAMS['cos_t_max'],
help='Epoch of minimal learning rate for cosine LR scheduler.')
@click.option('--cos-min-lr',
show_default=True,
default=RECOGNITION_HYPER_PARAMS['cos_min_lr'],
help='Minimal final learning rate for cosine LR scheduler.')
@click.option('-p', '--partition', show_default=True, default=0.9,
help='Ground truth data partition ratio between train/validation set')
@click.option('--fixed-splits/--ignore-fixed-splits', show_default=True, default=False,
Expand Down Expand Up @@ -183,7 +187,7 @@
def pretrain(ctx, batch_size, pad, output, spec, load, freq, quit, epochs,
min_epochs, lag, min_delta, device, precision, optimizer, lrate, momentum,
weight_decay, warmup, schedule, gamma, step_size, sched_patience,
cos_max, partition, fixed_splits, training_files,
cos_max, cos_min_lr, partition, fixed_splits, training_files,
evaluation_files, workers, threads, load_hyper_parameters, repolygonize,
force_binarization, format_type, augment,
mask_probability, mask_width, num_negatives, logit_temp,
Expand Down Expand Up @@ -227,6 +231,7 @@ def pretrain(ctx, batch_size, pad, output, spec, load, freq, quit, epochs,
'step_size': step_size,
'rop_patience': sched_patience,
'cos_t_max': cos_max,
'cos_min_lr': cos_min_lr,
'augment': augment,
'mask_prob': mask_probability,
'mask_width': mask_width,
Expand Down
20 changes: 13 additions & 7 deletions kraken/ketos/recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@
show_default=True,
default=RECOGNITION_HYPER_PARAMS['cos_t_max'],
help='Epoch of minimal learning rate for cosine LR scheduler.')
@click.option('--cos-min-lr',
show_default=True,
default=RECOGNITION_HYPER_PARAMS['cos_min_lr'],
help='Minimal final learning rate for cosine LR scheduler.')
@click.option('-p', '--partition', show_default=True, default=0.9,
help='Ground truth data partition ratio between train/validation set')
@click.option('--fixed-splits/--ignore-fixed-split', show_default=True, default=False,
Expand Down Expand Up @@ -194,13 +198,14 @@
@click.argument('ground_truth', nargs=-1, callback=_expand_gt, type=click.Path(exists=False, dir_okay=False))
@click.option('--legacy-polygons', show_default=True, default=False, is_flag=True, help='Use the legacy polygon extractor.')
def train(ctx, batch_size, pad, output, spec, append, load, freq, quit, epochs,
min_epochs, lag, min_delta, device, precision, optimizer, lrate, momentum,
weight_decay, warmup, freeze_backbone, schedule, gamma, step_size,
sched_patience, cos_max, partition, fixed_splits, normalization,
normalize_whitespace, codec, resize, reorder, base_dir,
training_files, evaluation_files, workers, threads, load_hyper_parameters,
repolygonize, force_binarization, format_type, augment,
pl_logger, log_dir, ground_truth, legacy_polygons):
min_epochs, lag, min_delta, device, precision, optimizer, lrate,
momentum, weight_decay, warmup, freeze_backbone, schedule, gamma,
step_size, sched_patience, cos_max, cos_min_lr, partition,
fixed_splits, normalization, normalize_whitespace, codec, resize,
reorder, base_dir, training_files, evaluation_files, workers,
threads, load_hyper_parameters, repolygonize, force_binarization,
format_type, augment, pl_logger, log_dir, ground_truth,
legacy_polygons):
"""
Trains a model from image-text pairs.
"""
Expand Down Expand Up @@ -253,6 +258,7 @@ def train(ctx, batch_size, pad, output, spec, append, load, freq, quit, epochs,
'step_size': step_size,
'rop_patience': sched_patience,
'cos_t_max': cos_max,
'cos_min_lr': cos_min_lr,
'normalization': normalization,
'normalize_whitespace': normalize_whitespace,
'augment': augment,
Expand Down
11 changes: 8 additions & 3 deletions kraken/ketos/ro.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@
show_default=True,
default=READING_ORDER_HYPER_PARAMS['cos_t_max'],
help='Epoch of minimal learning rate for cosine LR scheduler.')
@click.option('--cos-min-lr',
show_default=True,
default=RECOGNITION_HYPER_PARAMS['cos_min_lr'],
help='Minimal final learning rate for cosine LR scheduler.')
@click.option('-p', '--partition', show_default=True, default=0.9,
help='Ground truth data partition ratio between train/validation set')
@click.option('-t', '--training-files', show_default=True, default=None, multiple=True,
Expand Down Expand Up @@ -143,9 +147,9 @@
def rotrain(ctx, batch_size, output, load, freq, quit, epochs, min_epochs, lag,
min_delta, device, precision, optimizer, lrate, momentum,
weight_decay, warmup, schedule, gamma, step_size, sched_patience,
cos_max, partition, training_files, evaluation_files, workers,
threads, load_hyper_parameters, format_type, pl_logger, log_dir,
level, reading_order, ground_truth):
cos_max, cos_min_lr, partition, training_files, evaluation_files,
workers, threads, load_hyper_parameters, format_type, pl_logger,
log_dir, level, reading_order, ground_truth):
"""
Trains a baseline labeling model for layout analysis
"""
Expand Down Expand Up @@ -189,6 +193,7 @@ def rotrain(ctx, batch_size, output, load, freq, quit, epochs, min_epochs, lag,
'step_size': step_size,
'rop_patience': sched_patience,
'cos_t_max': cos_max,
'cos_min_lr': cos_min_lr,
'pl_logger': pl_logger,
}
)
Expand Down
11 changes: 8 additions & 3 deletions kraken/ketos/segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,10 @@ def _validate_merging(ctx, param, value):
show_default=True,
default=SEGMENTATION_HYPER_PARAMS['cos_t_max'],
help='Epoch of minimal learning rate for cosine LR scheduler.')
@click.option('--cos-min-lr',
show_default=True,
default=RECOGNITION_HYPER_PARAMS['cos_min_lr'],
help='Minimal final learning rate for cosine LR scheduler.')
@click.option('-p', '--partition', show_default=True, default=0.9,
help='Ground truth data partition ratio between train/validation set')
@click.option('-t', '--training-files', show_default=True, default=None, multiple=True,
Expand Down Expand Up @@ -226,12 +230,12 @@ def _validate_merging(ctx, param, value):
def segtrain(ctx, output, spec, line_width, pad, load, freq, quit, epochs,
min_epochs, lag, min_delta, device, precision, optimizer, lrate,
momentum, weight_decay, warmup, schedule, gamma, step_size,
sched_patience, cos_max, partition, training_files,
sched_patience, cos_max, cos_min_lr, partition, training_files,
evaluation_files, workers, threads, load_hyper_parameters,
force_binarization, format_type, suppress_regions,
suppress_baselines, valid_regions, valid_baselines, merge_regions,
merge_baselines, bounding_regions,
augment, resize, topline, pl_logger, log_dir, ground_truth):
merge_baselines, bounding_regions, augment, resize, topline,
pl_logger, log_dir, ground_truth):
"""
Trains a baseline labeling model for layout analysis
"""
Expand Down Expand Up @@ -285,6 +289,7 @@ def segtrain(ctx, output, spec, line_width, pad, load, freq, quit, epochs,
'step_size': step_size,
'rop_patience': sched_patience,
'cos_t_max': cos_max,
'cos_min_lr': cos_min_lr,
})

# disable automatic partition when given evaluation set explicitly
Expand Down
4 changes: 4 additions & 0 deletions kraken/lib/default_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
'rop_patience': 5,
# cosine
'cos_t_max': 100,
'cos_min_lr': 0.001,
'warmup': 0,
}

Expand Down Expand Up @@ -67,6 +68,7 @@
'rop_patience': 5,
# cosine
'cos_t_max': 100,
'cos_min_lr': 1e-7,
# masking parameters
'mask_width': 4,
'mask_prob': 0.5,
Expand Down Expand Up @@ -101,6 +103,7 @@
'rop_patience': 5,
# cosine
'cos_t_max': 50,
'cos_min_lr': 1e-4,
'warmup': 0,
'freeze_backbone': 0,
}
Expand Down Expand Up @@ -129,5 +132,6 @@
'rop_patience': 5,
# cosine
'cos_t_max': 50,
'cos_min_r': 2e-5,
'warmup': 0,
}
7 changes: 6 additions & 1 deletion kraken/lib/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -1129,6 +1129,8 @@ def _configure_optimizer_and_lr_scheduler(hparams, params, len_train_set=None, l
weight_decay = hparams.get("weight_decay")
schedule = hparams.get("schedule")
gamma = hparams.get("gamma")
cos_t_max = hparams.get("cos_t_max")
cos_min_lr = hparams.get("cos_min_lr")
step_size = hparams.get("step_size")
rop_factor = hparams.get("rop_factor")
rop_patience = hparams.get("rop_patience")
Expand All @@ -1149,7 +1151,10 @@ def _configure_optimizer_and_lr_scheduler(hparams, params, len_train_set=None, l
lr_sched = {'scheduler': lr_scheduler.ExponentialLR(optim, gamma, last_epoch=completed_epochs-1),
'interval': 'step'}
elif schedule == 'cosine':
lr_sched = {'scheduler': lr_scheduler.CosineAnnealingLR(optim, gamma, last_epoch=completed_epochs-1),
lr_sched = {'scheduler': lr_scheduler.CosineAnnealingLR(optim,
cos_t_max,
cos_min_lr,
last_epoch=completed_epochs-1),
'interval': 'step'}
elif schedule == 'step':
lr_sched = {'scheduler': lr_scheduler.StepLR(optim, step_size, gamma, last_epoch=completed_epochs-1),
Expand Down

0 comments on commit f7fb622

Please sign in to comment.