-
Notifications
You must be signed in to change notification settings - Fork 1.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Feature] Support reduce_lr_update #1183
Open
gengenkai
wants to merge
1
commit into
open-mmlab:master
Choose a base branch
from
gengenkai:ReduceLR
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
# Copyright (c) Open-MMLab. All rights reserved. | ||
import numbers | ||
from math import cos, pi | ||
from typing import Optional | ||
|
||
import mmcv | ||
from .hook import HOOKS, Hook | ||
|
@@ -614,3 +615,223 @@ def format_param(name, optim, param): | |
if name not in param: | ||
raise KeyError(f'{name} is not found in {param.keys()}') | ||
return param[name] | ||
|
||
|
||
@HOOKS.register_module() | ||
class ReduceLrUpdateHook(LrUpdaterHook): | ||
"""ReduceLROnPlateau Scheduler. | ||
|
||
Reduce learning rate when a metric has stopped improving. This scheduler | ||
reads a metrics quantity and if no improvement is seen for a 'patience' | ||
number of epochs, the learning rate is reduced. | ||
|
||
Args: | ||
periods (list[int]): Periods that taking the metric value in count. | ||
val_metric (str, optional): Metrics to be evaluated. If val_metric is | ||
None, the metrics will be loss value. Default: None. | ||
mode (str, optional): One of `min`, `max`. In `min` mode, lr will | ||
be reduced when the quantity monitored has stopped | ||
decreasing; in `max` mode it will be reduced when the | ||
quantity monitored has stopped increasing. Default: 'min'. | ||
factor (float, optional): Factor by which the learning rate will be | ||
reduced. new_lr = lr * factor. Default: 0.1. | ||
patience (int, optional): Number of epochs with no improvement after | ||
which learning rate will be reduced. For example, if | ||
`patience = 2`, then we will ignore the first 2 epochs | ||
with no improvement, and will only decrease the LR after the | ||
3rd epoch if the loss still hasn't improved then. | ||
Default: 10. | ||
threshold (float, optional): Threshold for measuring the new optimum, | ||
to only focus on significant changes. Default: 1e-4. | ||
threshold_mode (str, optional): One of `rel`, `abs`. In `rel` mode, | ||
dynamic_threshold = best * ( 1 + threshold ) in 'max' | ||
mode or best * ( 1 - threshold ) in `min` mode. | ||
In `abs` mode, dynamic_threshold = best + threshold in | ||
`max` mode or best - threshold in `min` mode. Default: 'rel'. | ||
cooldown (int, optional): Number of epochs to wait before resuming | ||
normal operation after lr has been reduced. Default: 0. | ||
min_lr (float, optional): Minimum LR value to keep. If LR after decay | ||
is lower than `min_lr`, it will be clipped to this value. | ||
Default: 0. | ||
eps (float, optional): Minimal decay applied to lr. If the difference | ||
between new and old lr is smaller than eps, the update is | ||
ignored. Default: 1e-8. | ||
""" | ||
|
||
def __init__(self, | ||
periods: list, | ||
val_metric: Optional[str] = None, | ||
mode: str = 'min', | ||
factor: float = 0.1, | ||
patience: int = 10, | ||
threshold: float = 1e-4, | ||
threshold_mode: str = 'rel', | ||
cooldown: int = 0, | ||
min_lr: float = 0., | ||
eps: float = 1e-8, | ||
**kwargs): | ||
assert isinstance(periods, list), '"periods" must be a list' | ||
assert mmcv.is_list_of(periods, int) and all([s >= 0 for s in periods]) | ||
self.periods = periods | ||
self.val_metric = val_metric | ||
|
||
if mode not in ['min', 'max']: | ||
raise ValueError( | ||
'mode must be one of "min" or "max", instead got {mode}') | ||
self.mode = mode | ||
|
||
if factor >= 1.0: | ||
raise ValueError('Factor should be < 1.0') | ||
self.factor = factor | ||
|
||
self.patience = patience | ||
self.threshold = threshold | ||
|
||
if threshold_mode not in ['rel', 'abs']: | ||
raise ValueError('thresh_mode must be one of "rel" or "abs",\ | ||
instead got {threshold_mode}') | ||
self.threshold_mode = threshold_mode | ||
|
||
self.cooldown = cooldown | ||
self.cooldown_counter = 0 | ||
self.best = None | ||
self.num_bad_epochs = None | ||
self.mode_worse = None # the worse value for the chosen mode | ||
self.min_lr = min_lr | ||
self.eps = eps | ||
self.last_epoch = 0 | ||
self._init_is_better(self.mode) | ||
self._reset() | ||
super(ReduceLrUpdateHook, self).__init__(**kwargs) | ||
|
||
def get_lr(self, runner, regular_lr): | ||
if self.num_bad_epochs > self.patience: | ||
self.cooldown_counter = self.cooldown | ||
self.num_bad_epochs = 0 | ||
if regular_lr - regular_lr * self.factor > self.eps: | ||
new_lr = max(regular_lr * self.factor, self.min_lr) | ||
else: | ||
new_lr = regular_lr | ||
return new_lr | ||
else: | ||
return regular_lr | ||
|
||
def get_regular_lr(self, runner): | ||
if not self.regular_lr: | ||
self.regular_lr = self.base_lr | ||
if isinstance(runner.optimizer, dict): | ||
lr_groups = {} | ||
for k in runner.optimizer.keys(): | ||
_lr_group = [ | ||
self.get_lr(runner, _regular_lr) | ||
for _regular_lr in self.regular_lr[k] | ||
] | ||
lr_groups.update({k: _lr_group}) | ||
return lr_groups | ||
else: | ||
return [ | ||
self.get_lr(runner, _regular_lr) | ||
for _regular_lr in self.regular_lr | ||
] | ||
|
||
def _init_is_better(self, mode): | ||
if mode == 'min': | ||
self.mode_worse = float('inf') | ||
else: | ||
self.mode_worse = float('-inf') | ||
|
||
def _reset(self): | ||
self.best = self.mode_worse | ||
self.cooldown_counter = 0 | ||
self.num_bad_epochs = 0 | ||
|
||
def is_better(self, a, best): | ||
if self.mode == 'min' and self.threshold_mode == 'rel': | ||
rel_epsilon = 1. - self.threshold | ||
return a < best * rel_epsilon | ||
elif self.mode == 'min' and self.threshold_mode == 'abs': | ||
return a < best - self.threshold | ||
elif self.mode == 'max' and self.threshold_mode == 'rel': | ||
rel_epsilon = 1. + self.threshold | ||
return a > best * rel_epsilon | ||
else: | ||
return a > best + self.threshold | ||
|
||
@property | ||
def in_cooldown(self): | ||
return self.cooldown_counter > 0 | ||
|
||
def after_train_epoch(self, runner): | ||
if not self.by_epoch: | ||
return | ||
cur_epoch = runner.epoch | ||
if self.warmup is not None and self.warmup_by_epoch: | ||
if cur_epoch <= self.warmup_epochs: | ||
return | ||
if cur_epoch in self.periods and self.val_metric is None: | ||
current = runner.outputs['loss'] | ||
if self.is_better(current, self.best): | ||
self.best = current | ||
self.num_bad_epochs = 0 | ||
else: | ||
self.num_bad_epochs += 1 | ||
|
||
if self.in_cooldown: | ||
self.cooldown_counter -= 1 | ||
self.num_bad_epochs = 0 | ||
print('epoch--', cur_epoch, ' lr:', self.regular_lr) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shall we use |
||
|
||
def after_train_iter(self, runner): | ||
if self.by_epoch: | ||
return | ||
cur_iter = runner.iter | ||
if self.warmup_epochs is not None and cur_iter <= self.warmup_iters: | ||
return | ||
if cur_iter in self.periods and self.val_metric is None: | ||
current = runner.outputs['loss'] | ||
if self.is_better(current, self.best): | ||
self.best = current | ||
self.num_bad_epochs = 0 | ||
else: | ||
self.num_bad_epochs += 1 | ||
|
||
if self.in_cooldown: | ||
self.cooldown_counter -= 1 | ||
self.num_bad_epochs = 0 | ||
|
||
def after_val_epoch(self, runner): | ||
if not self.by_epoch: | ||
return | ||
cur_epoch = runner.epoch | ||
if self.warmup is not None and self.warmup_by_epoch: | ||
if cur_epoch <= self.warmup_epochs: | ||
return | ||
if cur_epoch in self.periods and self.val_metric is not None: | ||
current = runner.outputs[self.val_metric] | ||
if self.is_better(current, self.best): | ||
self.best = current | ||
self.num_bad_epochs = 0 | ||
else: | ||
self.num_bad_epochs += 1 | ||
|
||
if self.in_cooldown: | ||
self.cooldown_counter -= 1 | ||
self.num_bad_epochs = 0 | ||
|
||
def after_val_iter(self, runner): | ||
if self.by_epoch: | ||
return | ||
cur_iter = runner.iter | ||
if self.warmup_epochs is not None and cur_iter <= self.warmup_iters: | ||
return | ||
if cur_iter in self.periods and self.val_metric is not None: | ||
current = runner.outputs[self.val_metric] | ||
if self.is_better(current, self.best): | ||
self.best = current | ||
self.num_bad_epochs = 0 | ||
else: | ||
self.num_bad_epochs += 1 | ||
|
||
if self.in_cooldown: | ||
self.cooldown_counter -= 1 | ||
self.num_bad_epochs = 0 |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please check the behavior, when used in
iterbasedrunner
, or if this lr scheduler does not suititerbasedrunner
, I think it would be better to assertself.by_epoch
in__inti__
.