-
Notifications
You must be signed in to change notification settings - Fork 2.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add new script for finetuning asr models Signed-off-by: Nithin Rao Koluguri <nithinraok> * Update config for PTL 2.0 Signed-off-by: Nithin Rao Koluguri <nithinraok> * style fix Signed-off-by: Nithin Rao Koluguri <nithinraok> * update jenkins run Signed-off-by: Nithin Rao Koluguri <nithinraok> * add doc strings Signed-off-by: Nithin Rao Koluguri <nithinraok> * improve code to support all decoder types Signed-off-by: Nithin Rao Koluguri <nithinraok> * add doc strings and support for char models Signed-off-by: Nithin Rao Koluguri <nithinraok> * typo fix Signed-off-by: Nithin Rao Koluguri <nithinraok> --------- Signed-off-by: Nithin Rao Koluguri <nithinraok> Co-authored-by: Nithin Rao Koluguri <nithinraok>
- Loading branch information
1 parent
0dcc3c7
commit 68fea1a
Showing
5 changed files
with
377 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
name: "Speech_To_Text_Finetuning" | ||
|
||
# use `init_from_nemo_model` or `init_from_pretrained_model` to initialize the model | ||
# We do not currently support `init_from_ptl_ckpt` to create a single script for all types of models. | ||
init_from_nemo_model: null # path to nemo model | ||
|
||
model: | ||
sample_rate: 16000 | ||
compute_eval_loss: false # eval samples can be very long and exhaust memory. Disable computation of transducer loss during validation/testing with this flag. | ||
log_prediction: true # enables logging sample predictions in the output during training | ||
rnnt_reduction: 'mean_volume' | ||
skip_nan_grad: false | ||
|
||
train_ds: | ||
manifest_filepath: ??? | ||
sample_rate: ${model.sample_rate} | ||
batch_size: 16 # you may increase batch_size if your memory allows | ||
shuffle: true | ||
num_workers: 8 | ||
pin_memory: true | ||
max_duration: 20 | ||
min_duration: 0.1 | ||
# tarred datasets | ||
is_tarred: false | ||
tarred_audio_filepaths: null | ||
shuffle_n: 2048 | ||
# bucketing params | ||
bucketing_strategy: "fully_randomized" | ||
bucketing_batch_size: null | ||
|
||
validation_ds: | ||
manifest_filepath: ??? | ||
sample_rate: ${model.sample_rate} | ||
batch_size: 16 | ||
shuffle: false | ||
use_start_end_token: false | ||
num_workers: 8 | ||
pin_memory: true | ||
|
||
test_ds: | ||
manifest_filepath: null | ||
sample_rate: ${model.sample_rate} | ||
batch_size: 16 | ||
shuffle: false | ||
use_start_end_token: false | ||
num_workers: 8 | ||
pin_memory: true | ||
|
||
char_labels: # use for char based models | ||
update_labels: false | ||
labels: null # example list config: \[' ', 'a', 'b', 'c'\] | ||
|
||
tokenizer: # use for spe/bpe based tokenizer models | ||
update_tokenizer: false | ||
dir: null # path to directory which contains either tokenizer.model (bpe) or vocab.txt (for wpe) | ||
type: bpe # Can be either bpe (SentencePiece tokenizer) or wpe (WordPiece tokenizer) | ||
|
||
spec_augment: | ||
_target_: nemo.collections.asr.modules.SpectrogramAugmentation | ||
freq_masks: 2 # set to zero to disable it | ||
time_masks: 10 # set to zero to disable it | ||
freq_width: 27 | ||
time_width: 0.05 | ||
|
||
optim: | ||
name: adamw | ||
lr: 1e-4 | ||
# optimizer arguments | ||
betas: [0.9, 0.98] | ||
weight_decay: 1e-3 | ||
|
||
# scheduler setup | ||
sched: | ||
name: CosineAnnealing | ||
# scheduler config override | ||
warmup_steps: 5000 | ||
warmup_ratio: null | ||
min_lr: 5e-6 | ||
|
||
trainer: | ||
devices: -1 # number of GPUs, -1 would use all available GPUs | ||
num_nodes: 1 | ||
max_epochs: 50 | ||
max_steps: -1 # computed at runtime if not set | ||
val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations | ||
accelerator: auto | ||
strategy: ddp | ||
accumulate_grad_batches: 1 | ||
gradient_clip_val: 0.0 | ||
precision: 32 # 16, 32, or bf16 | ||
log_every_n_steps: 10 # Interval of logging. | ||
enable_progress_bar: True | ||
num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it | ||
check_val_every_n_epoch: 1 # number of evaluations on validation every n epochs | ||
sync_batchnorm: true | ||
enable_checkpointing: False # Provided by exp_manager | ||
logger: false # Provided by exp_manager | ||
benchmark: false # needs to be false for models with variable-length speech input as it slows down training | ||
|
||
|
||
exp_manager: | ||
exp_dir: null | ||
name: ${name} | ||
create_tensorboard_logger: true | ||
create_checkpoint_callback: true | ||
checkpoint_callback_params: | ||
# in case of multiple validation sets, first one is used | ||
monitor: "val_wer" | ||
mode: "min" | ||
save_top_k: 5 | ||
always_save_nemo: True # saves the checkpoints as nemo files along with PTL checkpoints | ||
resume_if_exists: false | ||
resume_ignore_no_checkpoint: false | ||
|
||
create_wandb_logger: false | ||
wandb_logger_kwargs: | ||
name: null | ||
project: null |
Oops, something went wrong.