From 7023c2daf2e562d57dae3d46af837dee716cfc55 Mon Sep 17 00:00:00 2001 From: Louis Dupont Date: Sun, 27 Aug 2023 15:31:24 +0300 Subject: [PATCH 1/2] start --- documentation/source/LRScheduling.md | 54 ++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/documentation/source/LRScheduling.md b/documentation/source/LRScheduling.md index 78a01b2227..8523b621cc 100644 --- a/documentation/source/LRScheduling.md +++ b/documentation/source/LRScheduling.md @@ -238,7 +238,9 @@ From `Trainer.train(...)` docs: train_epoch(...) scheduler.step() .... -For example: + +### Examples +Using `StepLR` ```python ... @@ -260,10 +262,34 @@ train_params = { "valid_metrics_list": [Accuracy()], "metric_to_watch": "Accuracy", "greater_metric_to_watch_is_better": True, - } +} trainer.train(model=model, training_params=train_params, train_loader=dataloader, valid_loader=dataloader) ``` +Equivalent in `.yaml` +```yaml +max_epochs: 2 +lr_mode: + StepLR: + gamma: 0.1 + step_size: 1 + phase: TRAIN_EPOCH_END +lr_warmup_epochs: 0 +initial_lr: 0.1 +loss: CrossEntropyLoss +optimizer: SGD +criterion_params: {} +optimizer_params: + weight_decay: 1e-4 + momentum: 0.9 +train_metrics_list: + - Accuracy +valid_metrics_list: + - Accuracy +metric_to_watch: Accuracy +greater_metric_to_watch_is_better: true +``` + And as stated above, for ReduceLROnPlateau we need to pass a "metric_name", which follows the same rules as the training parameter "metric_to_watch"(see [metrics guide](Metrics.md) when not familiar). For example: @@ -290,7 +316,31 @@ train_params = { "greater_metric_to_watch_is_better": True, } trainer.train(model=model, training_params=train_params, train_loader=dataloader, valid_loader=dataloader) +``` + +```yaml +trainer = Trainer("torch_ROP_Scheduler_example") +train_dataloader = ... +valid_dataloader = ... +model = ... +train_params = { + "max_epochs": 2, + "lr_decay_factor": 0.1, + "lr_mode": { + "ReduceLROnPlateau": {"patience": 0, "phase": Phase.TRAIN_EPOCH_END, "metric_name": "DummyMetric"}}, + "lr_warmup_epochs": 0, + "initial_lr": 0.1, + "loss": torch.nn.CrossEntropyLoss(), + "optimizer": "SGD", + "criterion_params": {}, + "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, + "train_metrics_list": [Accuracy()], + "valid_metrics_list": [Accuracy()], + "metric_to_watch": "DummyMetric", + "greater_metric_to_watch_is_better": True, +} +trainer.train(model=model, training_params=train_params, train_loader=dataloader, valid_loader=dataloader) ``` The scheduler's `state_dict` is saved under `torch_scheduler_state_dict` entry inside the checkpoint during training, From b8ac375e4c47820c1ffba0d550d9da0aefa7a6a3 Mon Sep 17 00:00:00 2001 From: Louis Dupont Date: Sun, 27 Aug 2023 21:55:25 +0300 Subject: [PATCH 2/2] draft --- documentation/source/LRScheduling.md | 201 ++++++++++++++++++--------- 1 file changed, 134 insertions(+), 67 deletions(-) diff --git a/documentation/source/LRScheduling.md b/documentation/source/LRScheduling.md index 8523b621cc..4912ffccb0 100644 --- a/documentation/source/LRScheduling.md +++ b/documentation/source/LRScheduling.md @@ -20,26 +20,44 @@ Learning rate scheduling type is controlled by the training parameter `lr_mode`. For example, the training code below will start with an initial learning rate of 0.1 and decay by 0.1 at epochs 100,150 and 200: ```python - from super_gradients.training import Trainer -... + trainer = Trainer("my_custom_scheduler_training_experiment") train_dataloader = ... valid_dataloader = ... model = ... -train_params = {... - "initial_lr": 0.1, - "lr_mode":"step", - "lr_updates": [100, 150, 200], - "lr_decay_factor": 0.1, - ...} +train_params = { + "initial_lr": 0.1, + "lr_mode":"step", + "lr_updates": [100, 150, 200], + "lr_decay_factor": 0.1, + ..., +} trainer.train(model=model, training_params=train_params, train_loader=train_dataloader, valid_loader=valid_dataloader) - ``` +
+Equivalent in a .yaml configuration file: + +```yaml +training_hyperparams: + initial_lr: 0.1 + lr_mode: step + user_lr_updates: + - 100 + - 150 + - 200 + lr_decay_factor: 0.1 + ... + +... +``` +
+ + ## Using Custom LR Schedulers Prerequisites: [phase callbacks](PhaseCallbacks.md), [training with configuration files](configuration_files.md). @@ -182,26 +200,49 @@ from myscheduler import UserStepLRCallback # triggers registry, now we can pass And finally, use your new scheduler just as any other one supported by SG. ```python - - trainer = Trainer("my_custom_scheduler_training_experiment") +# The following code sections marked with '...' are placeholders +# indicating additional necessary code that is not shown for simplicity. train_dataloader = ... valid_dataloader = ... model = ... -train_params = {... - "initial_lr": 0.1, - "lr_mode": "user_step", - "user_lr_updates": [100, 150, 200], # WILL BE PASSED TO UserStepLRCallback CONSTRUCTOR - "user_lr_decay_factors": [0.1, 0.01, 0.001] # WILL BE PASSED TO UserStepLRCallback CONSTRUCTOR - ...} + +train_params = { + "initial_lr": 0.1, + "lr_mode": "user_step", + "user_lr_updates": [100, 150, 200], # WILL BE PASSED TO UserStepLRCallback CONSTRUCTOR + "user_lr_decay_factors": [0.1, 0.01, 0.001], # WILL BE PASSED TO UserStepLRCallback CONSTRUCTOR + ... +} trainer.train(model=model, training_params=train_params, train_loader=train_dataloader, valid_loader=valid_dataloader) - ``` Note that internally, Trainer unpacks [training_params to the scheduler callback constructor](https://github.com/Deci-AI/super-gradients/blob/537a0f0afe7bcf28d331fe2c0fa797fa10f54b99/src/super_gradients/training/sg_trainer/sg_trainer.py#L1078), so we pass scheduler related parameters through training_params as well. + +
+Equivalent in a .yaml configuration file: + +```yaml +training_hyperparams: + initial_lr: 0.1 + lr_mode: user_step + user_lr_updates: # WILL BE PASSED TO UserStepLRCallback CONSTRUCTOR + - 100 + - 150 + - 200 + user_lr_decay_factors: # WILL BE PASSED TO UserStepLRCallback CONSTRUCTOR + - 0.1 + - 0.01 + - 0.001 + ... + +... +``` +
+ ### Using PyTorchs Native LR Schedulers (torch.optim.lr_scheduler) PyTorch offers a [wide variety of learning rate schedulers](https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate). @@ -243,8 +284,10 @@ From `Trainer.train(...)` docs: Using `StepLR` ```python -... trainer = Trainer("torch_Scheduler_example") + +# The following code sections marked with '...' are placeholders +# indicating additional necessary code that is not shown for simplicity. train_dataloader = ... valid_dataloader = ... model = ... @@ -266,60 +309,50 @@ train_params = { trainer.train(model=model, training_params=train_params, train_loader=dataloader, valid_loader=dataloader) ``` -Equivalent in `.yaml` +
+Equivalent in a .yaml configuration file: + ```yaml -max_epochs: 2 -lr_mode: - StepLR: - gamma: 0.1 - step_size: 1 - phase: TRAIN_EPOCH_END -lr_warmup_epochs: 0 -initial_lr: 0.1 -loss: CrossEntropyLoss -optimizer: SGD -criterion_params: {} -optimizer_params: - weight_decay: 1e-4 - momentum: 0.9 -train_metrics_list: - - Accuracy -valid_metrics_list: - - Accuracy -metric_to_watch: Accuracy -greater_metric_to_watch_is_better: true +training_hyperparams: + # Setting up LR Scheduler + lr_mode: + StepLR: + gamma: 0.1 + step_size: 1 + phase: TRAIN_EPOCH_END + + # Setting up other parameters + max_epochs: 2 + lr_warmup_epochs: 0 + initial_lr: 0.1 + loss: CrossEntropyLoss + optimizer: SGD + criterion_params: {} + optimizer_params: + weight_decay: 1e-4 + momentum: 0.9 + train_metrics_list: + - Accuracy + valid_metrics_list: + - Accuracy + metric_to_watch: Accuracy + greater_metric_to_watch_is_better: true + +... ``` +
+ -And as stated above, for ReduceLROnPlateau we need to pass a "metric_name", which follows the same -rules as the training parameter "metric_to_watch"(see [metrics guide](Metrics.md) when not familiar). -For example: +**Using `ReduceLROnPlateau`** -```python -trainer = Trainer("torch_ROP_Scheduler_example") -train_dataloader = ... -valid_dataloader = ... -model = ... -train_params = { - "max_epochs": 2, - "lr_decay_factor": 0.1, - "lr_mode": { - "ReduceLROnPlateau": {"patience": 0, "phase": Phase.TRAIN_EPOCH_END, "metric_name": "DummyMetric"}}, - "lr_warmup_epochs": 0, - "initial_lr": 0.1, - "loss": torch.nn.CrossEntropyLoss(), - "optimizer": "SGD", - "criterion_params": {}, - "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, - "train_metrics_list": [Accuracy()], - "valid_metrics_list": [Accuracy()], - "metric_to_watch": "DummyMetric", - "greater_metric_to_watch_is_better": True, -} -trainer.train(model=model, training_params=train_params, train_loader=dataloader, valid_loader=dataloader) -``` +If you choose to use `ReduceLROnPlateau` as the learning rate scheduler, you need to specify a `metric_name`. +This parameter follows the same guidelines as `metric_to_watch`. +For an in-depth understanding of these metrics, +see the [metrics guide](Metrics.md). -```yaml + +```python trainer = Trainer("torch_ROP_Scheduler_example") train_dataloader = ... valid_dataloader = ... @@ -345,3 +378,37 @@ trainer.train(model=model, training_params=train_params, train_loader=dataloader The scheduler's `state_dict` is saved under `torch_scheduler_state_dict` entry inside the checkpoint during training, allowing us to resume from the same state of the scheduling. + +
+Equivalent in a .yaml configuration file: + +```yaml +training_hyperparams: + # Setting up LR Scheduler + lr_mode: + ReduceLROnPlateau: + patience: 0 + phase: TRAIN_EPOCH_END + metric_name: DummyMetric + + # Setting up other parameters + max_epochs: 2 + lr_decay_factor: 0.1 + lr_warmup_epochs: 0 + initial_lr: 0.1 + loss: CrossEntropyLoss + optimizer: SGD + criterion_params: {} + optimizer_params: + weight_decay: 1e-4 + momentum: 0.9 + train_metrics_list: + - Accuracy + valid_metrics_list: + - Accuracy + metric_to_watch: DummyMetric + greater_metric_to_watch_is_better: true + +... +``` +