From f7fb6222aaefb9eee9681cd92b5f5d68a5aabbc7 Mon Sep 17 00:00:00 2001 From: Benjamin Kiessling Date: Thu, 4 Apr 2024 00:54:21 +0200 Subject: [PATCH] Fix cosine annealing scheduling in training parts --- kraken/ketos/pretrain.py | 7 ++++++- kraken/ketos/recognition.py | 20 +++++++++++++------- kraken/ketos/ro.py | 11 ++++++++--- kraken/ketos/segmentation.py | 11 ++++++++--- kraken/lib/default_specs.py | 4 ++++ kraken/lib/train.py | 7 ++++++- 6 files changed, 45 insertions(+), 15 deletions(-) diff --git a/kraken/ketos/pretrain.py b/kraken/ketos/pretrain.py index 5d6055849..27eabfd1f 100644 --- a/kraken/ketos/pretrain.py +++ b/kraken/ketos/pretrain.py @@ -123,6 +123,10 @@ show_default=True, default=RECOGNITION_PRETRAIN_HYPER_PARAMS['cos_t_max'], help='Epoch of minimal learning rate for cosine LR scheduler.') +@click.option('--cos-min-lr', + show_default=True, + default=RECOGNITION_HYPER_PARAMS['cos_min_lr'], + help='Minimal final learning rate for cosine LR scheduler.') @click.option('-p', '--partition', show_default=True, default=0.9, help='Ground truth data partition ratio between train/validation set') @click.option('--fixed-splits/--ignore-fixed-splits', show_default=True, default=False, @@ -183,7 +187,7 @@ def pretrain(ctx, batch_size, pad, output, spec, load, freq, quit, epochs, min_epochs, lag, min_delta, device, precision, optimizer, lrate, momentum, weight_decay, warmup, schedule, gamma, step_size, sched_patience, - cos_max, partition, fixed_splits, training_files, + cos_max, cos_min_lr, partition, fixed_splits, training_files, evaluation_files, workers, threads, load_hyper_parameters, repolygonize, force_binarization, format_type, augment, mask_probability, mask_width, num_negatives, logit_temp, @@ -227,6 +231,7 @@ def pretrain(ctx, batch_size, pad, output, spec, load, freq, quit, epochs, 'step_size': step_size, 'rop_patience': sched_patience, 'cos_t_max': cos_max, + 'cos_min_lr': cos_min_lr, 'augment': augment, 'mask_prob': mask_probability, 'mask_width': mask_width, diff --git a/kraken/ketos/recognition.py b/kraken/ketos/recognition.py index 559d86d3b..edc321f80 100644 --- a/kraken/ketos/recognition.py +++ b/kraken/ketos/recognition.py @@ -127,6 +127,10 @@ show_default=True, default=RECOGNITION_HYPER_PARAMS['cos_t_max'], help='Epoch of minimal learning rate for cosine LR scheduler.') +@click.option('--cos-min-lr', + show_default=True, + default=RECOGNITION_HYPER_PARAMS['cos_min_lr'], + help='Minimal final learning rate for cosine LR scheduler.') @click.option('-p', '--partition', show_default=True, default=0.9, help='Ground truth data partition ratio between train/validation set') @click.option('--fixed-splits/--ignore-fixed-split', show_default=True, default=False, @@ -194,13 +198,14 @@ @click.argument('ground_truth', nargs=-1, callback=_expand_gt, type=click.Path(exists=False, dir_okay=False)) @click.option('--legacy-polygons', show_default=True, default=False, is_flag=True, help='Use the legacy polygon extractor.') def train(ctx, batch_size, pad, output, spec, append, load, freq, quit, epochs, - min_epochs, lag, min_delta, device, precision, optimizer, lrate, momentum, - weight_decay, warmup, freeze_backbone, schedule, gamma, step_size, - sched_patience, cos_max, partition, fixed_splits, normalization, - normalize_whitespace, codec, resize, reorder, base_dir, - training_files, evaluation_files, workers, threads, load_hyper_parameters, - repolygonize, force_binarization, format_type, augment, - pl_logger, log_dir, ground_truth, legacy_polygons): + min_epochs, lag, min_delta, device, precision, optimizer, lrate, + momentum, weight_decay, warmup, freeze_backbone, schedule, gamma, + step_size, sched_patience, cos_max, cos_min_lr, partition, + fixed_splits, normalization, normalize_whitespace, codec, resize, + reorder, base_dir, training_files, evaluation_files, workers, + threads, load_hyper_parameters, repolygonize, force_binarization, + format_type, augment, pl_logger, log_dir, ground_truth, + legacy_polygons): """ Trains a model from image-text pairs. """ @@ -253,6 +258,7 @@ def train(ctx, batch_size, pad, output, spec, append, load, freq, quit, epochs, 'step_size': step_size, 'rop_patience': sched_patience, 'cos_t_max': cos_max, + 'cos_min_lr': cos_min_lr, 'normalization': normalization, 'normalize_whitespace': normalize_whitespace, 'augment': augment, diff --git a/kraken/ketos/ro.py b/kraken/ketos/ro.py index 33191d596..a8b7e4c9b 100644 --- a/kraken/ketos/ro.py +++ b/kraken/ketos/ro.py @@ -115,6 +115,10 @@ show_default=True, default=READING_ORDER_HYPER_PARAMS['cos_t_max'], help='Epoch of minimal learning rate for cosine LR scheduler.') +@click.option('--cos-min-lr', + show_default=True, + default=RECOGNITION_HYPER_PARAMS['cos_min_lr'], + help='Minimal final learning rate for cosine LR scheduler.') @click.option('-p', '--partition', show_default=True, default=0.9, help='Ground truth data partition ratio between train/validation set') @click.option('-t', '--training-files', show_default=True, default=None, multiple=True, @@ -143,9 +147,9 @@ def rotrain(ctx, batch_size, output, load, freq, quit, epochs, min_epochs, lag, min_delta, device, precision, optimizer, lrate, momentum, weight_decay, warmup, schedule, gamma, step_size, sched_patience, - cos_max, partition, training_files, evaluation_files, workers, - threads, load_hyper_parameters, format_type, pl_logger, log_dir, - level, reading_order, ground_truth): + cos_max, cos_min_lr, partition, training_files, evaluation_files, + workers, threads, load_hyper_parameters, format_type, pl_logger, + log_dir, level, reading_order, ground_truth): """ Trains a baseline labeling model for layout analysis """ @@ -189,6 +193,7 @@ def rotrain(ctx, batch_size, output, load, freq, quit, epochs, min_epochs, lag, 'step_size': step_size, 'rop_patience': sched_patience, 'cos_t_max': cos_max, + 'cos_min_lr': cos_min_lr, 'pl_logger': pl_logger, } ) diff --git a/kraken/ketos/segmentation.py b/kraken/ketos/segmentation.py index f1391e358..171e834f1 100644 --- a/kraken/ketos/segmentation.py +++ b/kraken/ketos/segmentation.py @@ -151,6 +151,10 @@ def _validate_merging(ctx, param, value): show_default=True, default=SEGMENTATION_HYPER_PARAMS['cos_t_max'], help='Epoch of minimal learning rate for cosine LR scheduler.') +@click.option('--cos-min-lr', + show_default=True, + default=RECOGNITION_HYPER_PARAMS['cos_min_lr'], + help='Minimal final learning rate for cosine LR scheduler.') @click.option('-p', '--partition', show_default=True, default=0.9, help='Ground truth data partition ratio between train/validation set') @click.option('-t', '--training-files', show_default=True, default=None, multiple=True, @@ -226,12 +230,12 @@ def _validate_merging(ctx, param, value): def segtrain(ctx, output, spec, line_width, pad, load, freq, quit, epochs, min_epochs, lag, min_delta, device, precision, optimizer, lrate, momentum, weight_decay, warmup, schedule, gamma, step_size, - sched_patience, cos_max, partition, training_files, + sched_patience, cos_max, cos_min_lr, partition, training_files, evaluation_files, workers, threads, load_hyper_parameters, force_binarization, format_type, suppress_regions, suppress_baselines, valid_regions, valid_baselines, merge_regions, - merge_baselines, bounding_regions, - augment, resize, topline, pl_logger, log_dir, ground_truth): + merge_baselines, bounding_regions, augment, resize, topline, + pl_logger, log_dir, ground_truth): """ Trains a baseline labeling model for layout analysis """ @@ -285,6 +289,7 @@ def segtrain(ctx, output, spec, line_width, pad, load, freq, quit, epochs, 'step_size': step_size, 'rop_patience': sched_patience, 'cos_t_max': cos_max, + 'cos_min_lr': cos_min_lr, }) # disable automatic partition when given evaluation set explicitly diff --git a/kraken/lib/default_specs.py b/kraken/lib/default_specs.py index af08fd1e5..53d4bfb5d 100644 --- a/kraken/lib/default_specs.py +++ b/kraken/lib/default_specs.py @@ -40,6 +40,7 @@ 'rop_patience': 5, # cosine 'cos_t_max': 100, + 'cos_min_lr': 0.001, 'warmup': 0, } @@ -67,6 +68,7 @@ 'rop_patience': 5, # cosine 'cos_t_max': 100, + 'cos_min_lr': 1e-7, # masking parameters 'mask_width': 4, 'mask_prob': 0.5, @@ -101,6 +103,7 @@ 'rop_patience': 5, # cosine 'cos_t_max': 50, + 'cos_min_lr': 1e-4, 'warmup': 0, 'freeze_backbone': 0, } @@ -129,5 +132,6 @@ 'rop_patience': 5, # cosine 'cos_t_max': 50, + 'cos_min_r': 2e-5, 'warmup': 0, } diff --git a/kraken/lib/train.py b/kraken/lib/train.py index fb54d5791..65b65b1e1 100644 --- a/kraken/lib/train.py +++ b/kraken/lib/train.py @@ -1129,6 +1129,8 @@ def _configure_optimizer_and_lr_scheduler(hparams, params, len_train_set=None, l weight_decay = hparams.get("weight_decay") schedule = hparams.get("schedule") gamma = hparams.get("gamma") + cos_t_max = hparams.get("cos_t_max") + cos_min_lr = hparams.get("cos_min_lr") step_size = hparams.get("step_size") rop_factor = hparams.get("rop_factor") rop_patience = hparams.get("rop_patience") @@ -1149,7 +1151,10 @@ def _configure_optimizer_and_lr_scheduler(hparams, params, len_train_set=None, l lr_sched = {'scheduler': lr_scheduler.ExponentialLR(optim, gamma, last_epoch=completed_epochs-1), 'interval': 'step'} elif schedule == 'cosine': - lr_sched = {'scheduler': lr_scheduler.CosineAnnealingLR(optim, gamma, last_epoch=completed_epochs-1), + lr_sched = {'scheduler': lr_scheduler.CosineAnnealingLR(optim, + cos_t_max, + cos_min_lr, + last_epoch=completed_epochs-1), 'interval': 'step'} elif schedule == 'step': lr_sched = {'scheduler': lr_scheduler.StepLR(optim, step_size, gamma, last_epoch=completed_epochs-1),