From 09a0d859543af014fc2a39328add03b540c3844a Mon Sep 17 00:00:00 2001 From: nithinraok Date: Mon, 24 Aug 2020 20:21:57 -0700 Subject: [PATCH 01/12] angular loss for 1.0 Signed-off-by: nithinraok --- nemo/collections/asr/data/audio_to_label.py | 8 ++- nemo/collections/asr/losses/angularloss.py | 62 +++++++++++++++++++++ nemo/collections/asr/models/label_models.py | 10 +++- nemo/collections/asr/modules/conv_asr.py | 25 ++++++++- 4 files changed, 98 insertions(+), 7 deletions(-) create mode 100644 nemo/collections/asr/losses/angularloss.py diff --git a/nemo/collections/asr/data/audio_to_label.py b/nemo/collections/asr/data/audio_to_label.py index 3d35b45611db..5a55210fa0bb 100644 --- a/nemo/collections/asr/data/audio_to_label.py +++ b/nemo/collections/asr/data/audio_to_label.py @@ -120,9 +120,9 @@ def fixed_seq_collate_fn(self, batch): _, audio_lengths, _, tokens_lengths = zip(*batch) has_audio = audio_lengths[0] is not None - fixed_length = min(fixed_length, max(audio_lengths)) + fixed_length = int(min(fixed_length, max(audio_lengths))) - audio_signal, tokens = [], [] + audio_signal, tokens, new_audio_lengths = [], [], [] for sig, sig_len, tokens_i, _ in batch: if has_audio: sig_len = sig_len.item() @@ -134,17 +134,19 @@ def fixed_seq_collate_fn(self, batch): sub = sig[-rem:] if rem > 0 else torch.tensor([]) rep_sig = torch.cat(repeat * [sig]) signal = torch.cat((rep_sig, sub)) + new_audio_lengths.append(torch.tensor(fixed_length)) else: start_idx = torch.randint(0, chunck_len, (1,)) if chunck_len else torch.tensor(0) end_idx = start_idx + fixed_length signal = sig[start_idx:end_idx] + new_audio_lengths.append(torch.tensor(fixed_length)) audio_signal.append(signal) tokens.append(tokens_i) if has_audio: audio_signal = torch.stack(audio_signal) - audio_lengths = torch.stack(audio_lengths) + audio_lengths = torch.stack(new_audio_lengths) else: audio_signal, audio_lengths = None, None tokens = torch.stack(tokens) diff --git a/nemo/collections/asr/losses/angularloss.py b/nemo/collections/asr/losses/angularloss.py new file mode 100644 index 000000000000..a6f9ff081f78 --- /dev/null +++ b/nemo/collections/asr/losses/angularloss.py @@ -0,0 +1,62 @@ +# ! /usr/bin/python +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch + +from nemo.core.classes import Loss, Typing, typecheck +from nemo.core.neural_types import LabelsType, LogitsType, LossType, NeuralType +from nemo.utils.decorators import experimental + +__all__ = ['AngularSoftmaxLoss'] + + +@experimental +class AngularSoftmaxLoss(Loss, Typing): + @property + def input_types(self): + """Input types definitions for AnguarLoss. + """ + return { + "logits": NeuralType(('B', 'D'), LogitsType()), + "labels": NeuralType(('B',), LabelsType()), + } + + @property + def output_types(self): + """Output types definitions for CTCLoss. + loss: + NeuralType(None) + """ + return {"loss": NeuralType(elements_type=LossType())} + + def __init__(self, s=20.0, m=1.35): + super().__init__() + + self.eps = 1e-7 + self.s = s + self.m = m + + @typecheck() + def forward(self, logits, labels): + numerator = self.s * torch.cos( + torch.acos(torch.clamp(torch.diagonal(logits.transpose(0, 1)[labels]), -1.0 + self.eps, 1 - self.eps)) + + self.m + ) + excl = torch.cat( + [torch.cat((logits[i, :y], logits[i, y + 1 :])).unsqueeze(0) for i, y in enumerate(labels)], dim=0 + ) + denominator = torch.exp(numerator) + torch.sum(torch.exp(self.s * excl), dim=1) + L = numerator - torch.log(denominator) + return -torch.mean(L) diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py index e6eb1961a0c5..f7d363a44438 100644 --- a/nemo/collections/asr/models/label_models.py +++ b/nemo/collections/asr/models/label_models.py @@ -23,6 +23,7 @@ from pytorch_lightning import Trainer from nemo.collections.asr.data.audio_to_label import AudioToSpeechLabelDataSet +from nemo.collections.asr.losses.angularloss import AngularSoftmaxLoss from nemo.collections.asr.parts.features import WaveformFeaturizer from nemo.collections.asr.parts.perturb import process_augmentations from nemo.collections.common.losses import CrossEntropyLoss as CELoss @@ -50,7 +51,14 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): self.preprocessor = EncDecSpeakerLabelModel.from_config_dict(cfg.preprocessor) self.encoder = EncDecSpeakerLabelModel.from_config_dict(cfg.encoder) self.decoder = EncDecSpeakerLabelModel.from_config_dict(cfg.decoder) - self.loss = CELoss() + if 'angular' in cfg.decoder.params and cfg.decoder.params['angular']: + logging.info("Training with Angular Softmax Loss") + s = cfg.loss.s + m = cfg.loss.m + self.loss = AngularSoftmaxLoss(s=s, m=m) + else: + logging.info("Training with Softmax-CrossEntropy loss") + self.loss = CELoss() def __setup_dataloader_from_config(self, config: Optional[Dict]): if 'augmentor' in config: diff --git a/nemo/collections/asr/modules/conv_asr.py b/nemo/collections/asr/modules/conv_asr.py index 28b3431d5ce6..68d497a0e15b 100644 --- a/nemo/collections/asr/modules/conv_asr.py +++ b/nemo/collections/asr/modules/conv_asr.py @@ -15,6 +15,7 @@ import torch import torch.nn as nn +import torch.nn.functional as F from omegaconf import ListConfig, OmegaConf from nemo.collections.asr.parts.jasper import ( @@ -356,9 +357,20 @@ def output_types(self): ) def __init__( - self, feat_in, num_classes, emb_sizes=[1024, 1024], pool_mode='xvector', init_mode="xavier_uniform", + self, + feat_in, + num_classes, + emb_sizes=[1024, 1024], + pool_mode='xvector', + angular=False, + init_mode="xavier_uniform", ): super().__init__() + self.angular = angular + if self.angular: + bias = False + else: + bias = True if type(emb_sizes) is str: emb_sizes = emb_sizes.split(',') @@ -380,7 +392,7 @@ def __init__( self.emb_layers = nn.ModuleList(emb_layers) - self.final = nn.Linear(shapes[-1], self._num_classes) + self.final = nn.Linear(shapes[-1], self._num_classes, bias=bias) self.apply(lambda x: init_weights(x, mode=init_mode)) @@ -402,6 +414,13 @@ def forward(self, encoder_output): pool, emb = layer(pool), layer[:2](pool) embs.append(emb) - out = self.final(pool) + if self.angular: + for W in self.final.parameters(): + W = F.normalize(W, p=2, dim=1) + out = F.normalize(pool, p=2, dim=1) + out = self.final(out) + + else: + out = self.final(pool) return out, embs[-1] From 41081d892649ef2379562261440e54ee9ab1e3b8 Mon Sep 17 00:00:00 2001 From: nithinraok Date: Wed, 26 Aug 2020 13:32:56 -0700 Subject: [PATCH 02/12] metrics update Signed-off-by: nithinraok --- examples/speaker_recognition/spkr_get_emb.py | 4 +- nemo/collections/asr/models/label_models.py | 52 ++++++++++++-------- nemo/collections/asr/modules/conv_asr.py | 8 ++- 3 files changed, 37 insertions(+), 27 deletions(-) diff --git a/examples/speaker_recognition/spkr_get_emb.py b/examples/speaker_recognition/spkr_get_emb.py index de4887c0a262..adff12cbbe60 100644 --- a/examples/speaker_recognition/spkr_get_emb.py +++ b/examples/speaker_recognition/spkr_get_emb.py @@ -54,8 +54,8 @@ def main(cfg): model_path = os.path.join(log_dir, '..', 'spkr.nemo') speaker_model = ExtractSpeakerEmbeddingsModel.restore_from(model_path) speaker_model.setup_test_data(cfg.model.test_ds) - - trainer.test(speaker_model) + if speaker_model.prepare_test(trainer): + trainer.test(speaker_model) if __name__ == '__main__': diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py index f7d363a44438..1aefbfe31280 100644 --- a/nemo/collections/asr/models/label_models.py +++ b/nemo/collections/asr/models/label_models.py @@ -17,7 +17,6 @@ import pickle as pkl from typing import Dict, Optional, Union -import numpy as np import torch from omegaconf import DictConfig from pytorch_lightning import Trainer @@ -27,6 +26,7 @@ from nemo.collections.asr.parts.features import WaveformFeaturizer from nemo.collections.asr.parts.perturb import process_augmentations from nemo.collections.common.losses import CrossEntropyLoss as CELoss +from nemo.collections.common.metrics import TopKClassificationAccuracy, compute_topk_accuracy from nemo.core.classes import ModelPT from nemo.core.classes.common import typecheck from nemo.core.neural_types import * @@ -59,6 +59,8 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): else: logging.info("Training with Softmax-CrossEntropy loss") self.loss = CELoss() + + self._accuracy = TopKClassificationAccuracy() def __setup_dataloader_from_config(self, config: Optional[Dict]): if 'augmentor' in config: @@ -147,38 +149,48 @@ def forward(self, input_signal, input_signal_length): return logits, embs # PTL-specific methods - def training_step(self, batch, batch_nb): + def training_step(self, batch, batch_idx ): audio_signal, audio_signal_len, labels, _ = batch logits, _ = self.forward(input_signal=audio_signal, input_signal_length=audio_signal_len) loss_value = self.loss(logits=logits, labels=labels) - labels_hat = torch.argmax(logits, dim=1) - n_correct_pred = torch.sum(labels == labels_hat, dim=0).item() - tensorboard_logs = {'train_loss': loss_value, 'training_batch_acc': (n_correct_pred / len(labels)) * 100} - return {'loss': loss_value, 'log': tensorboard_logs, "n_correct_pred": n_correct_pred, "n_pred": len(labels)} + tensorboard_logs = { + 'train_loss': loss_value, + 'learning_rate': self._optimizer.param_groups[0]['lr'], + } + + correct_counts, total_counts = self._accuracy(logits=logits, labels=labels) + + for ki in range(correct_counts.shape[-1]): + correct_count = correct_counts[ki] + total_count = total_counts[ki] + top_k = self._accuracy.top_k[ki] - def training_epoch_end(self, outputs): - train_acc = (sum([x['n_correct_pred'] for x in outputs]) / sum(x['n_pred'] for x in outputs)) * 100 - tensorboard_logs = {'train_acc': train_acc} + tensorboard_logs['training_batch_accuracy_top@{}'.format(top_k)] = correct_count / float(total_count) - return {'train_acc': train_acc, 'log': tensorboard_logs} + return {'loss': loss_value, 'log': tensorboard_logs} - def validation_step(self, batch, batch_idx): + def validation_step(self, batch, batch_idx, dataloader_idx: int =0): audio_signal, audio_signal_len, labels, _ = batch logits, _ = self.forward(input_signal=audio_signal, input_signal_length=audio_signal_len) loss_value = self.loss(logits=logits, labels=labels) - labels_hat = torch.argmax(logits, dim=1) - n_correct_pred = torch.sum(labels == labels_hat, dim=0).item() + correct_counts, total_counts = self._accuracy(logits=logits, labels=labels) + return {'val_loss': loss_value, 'val_correct_counts': correct_counts, 'val_total_counts': total_counts} - return {'val_loss': loss_value, "n_correct_pred": n_correct_pred, "n_pred": len(labels)} - - def validation_epoch_end(self, outputs): + def multi_validation_epoch_end(self, outputs, dataloader_idx: int=0): val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean() - val_acc = (sum([x['n_correct_pred'] for x in outputs]) / sum(x['n_pred'] for x in outputs)) * 100 - logging.info("validation accuracy {:.3f}".format(val_acc)) - tensorboard_logs = {'validation_loss': val_loss_mean, 'validation_acc': val_acc} + correct_counts = torch.stack([x['val_correct_counts'] for x in outputs]) + total_counts = torch.stack([x['val_total_counts'] for x in outputs]) + + topk_scores = compute_topk_accuracy(correct_counts, total_counts) + + tensorboard_log = {'val_loss': val_loss_mean} + logging.info("val_loss{}".format(val_loss_mean)) + for top_k, score in zip(self._accuracy.top_k, topk_scores): + tensorboard_log['val_epoch_top@{}'.format(top_k)] = score + logging.info("val_epoch_top@{}: {}".format(top_k,score)) - return {'val_loss': val_loss_mean, 'log': tensorboard_logs} + return {'log': tensorboard_log} def test_step(self, batch, batch_ix): audio_signal, audio_signal_len, labels, _ = batch diff --git a/nemo/collections/asr/modules/conv_asr.py b/nemo/collections/asr/modules/conv_asr.py index 68d497a0e15b..40233a4fba1b 100644 --- a/nemo/collections/asr/modules/conv_asr.py +++ b/nemo/collections/asr/modules/conv_asr.py @@ -417,10 +417,8 @@ def forward(self, encoder_output): if self.angular: for W in self.final.parameters(): W = F.normalize(W, p=2, dim=1) - out = F.normalize(pool, p=2, dim=1) - out = self.final(out) - - else: - out = self.final(pool) + pool = F.normalize(pool, p=2, dim=1) + + out = self.final(pool) return out, embs[-1] From b97b9408c911de788e7f21f025024d8129ff9777 Mon Sep 17 00:00:00 2001 From: nithinraok Date: Wed, 26 Aug 2020 14:51:22 -0700 Subject: [PATCH 03/12] metric update Signed-off-by: nithinraok --- examples/speaker_recognition/spkr_get_emb.py | 3 +-- nemo/collections/asr/data/audio_to_label.py | 1 + nemo/collections/asr/models/label_models.py | 12 ++++++------ nemo/collections/asr/modules/conv_asr.py | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/examples/speaker_recognition/spkr_get_emb.py b/examples/speaker_recognition/spkr_get_emb.py index adff12cbbe60..c2560748348e 100644 --- a/examples/speaker_recognition/spkr_get_emb.py +++ b/examples/speaker_recognition/spkr_get_emb.py @@ -54,8 +54,7 @@ def main(cfg): model_path = os.path.join(log_dir, '..', 'spkr.nemo') speaker_model = ExtractSpeakerEmbeddingsModel.restore_from(model_path) speaker_model.setup_test_data(cfg.model.test_ds) - if speaker_model.prepare_test(trainer): - trainer.test(speaker_model) + trainer.test(speaker_model) if __name__ == '__main__': diff --git a/nemo/collections/asr/data/audio_to_label.py b/nemo/collections/asr/data/audio_to_label.py index 5a55210fa0bb..1945470bc915 100644 --- a/nemo/collections/asr/data/audio_to_label.py +++ b/nemo/collections/asr/data/audio_to_label.py @@ -96,6 +96,7 @@ def __init__( self.trim = trim self.load_audio = load_audio self.time_length = time_length + logging.info("Timelength considered for collate func is {}".format(time_length)) self.labels = labels if labels else self.collection.uniq_labels self.num_classes = len(self.labels) diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py index 1aefbfe31280..a152422381f3 100644 --- a/nemo/collections/asr/models/label_models.py +++ b/nemo/collections/asr/models/label_models.py @@ -59,7 +59,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): else: logging.info("Training with Softmax-CrossEntropy loss") self.loss = CELoss() - + self._accuracy = TopKClassificationAccuracy() def __setup_dataloader_from_config(self, config: Optional[Dict]): @@ -149,7 +149,7 @@ def forward(self, input_signal, input_signal_length): return logits, embs # PTL-specific methods - def training_step(self, batch, batch_idx ): + def training_step(self, batch, batch_idx): audio_signal, audio_signal_len, labels, _ = batch logits, _ = self.forward(input_signal=audio_signal, input_signal_length=audio_signal_len) loss_value = self.loss(logits=logits, labels=labels) @@ -170,14 +170,14 @@ def training_step(self, batch, batch_idx ): return {'loss': loss_value, 'log': tensorboard_logs} - def validation_step(self, batch, batch_idx, dataloader_idx: int =0): + def validation_step(self, batch, batch_idx, dataloader_idx: int = 0): audio_signal, audio_signal_len, labels, _ = batch logits, _ = self.forward(input_signal=audio_signal, input_signal_length=audio_signal_len) loss_value = self.loss(logits=logits, labels=labels) correct_counts, total_counts = self._accuracy(logits=logits, labels=labels) return {'val_loss': loss_value, 'val_correct_counts': correct_counts, 'val_total_counts': total_counts} - def multi_validation_epoch_end(self, outputs, dataloader_idx: int=0): + def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0): val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean() correct_counts = torch.stack([x['val_correct_counts'] for x in outputs]) total_counts = torch.stack([x['val_total_counts'] for x in outputs]) @@ -185,10 +185,10 @@ def multi_validation_epoch_end(self, outputs, dataloader_idx: int=0): topk_scores = compute_topk_accuracy(correct_counts, total_counts) tensorboard_log = {'val_loss': val_loss_mean} - logging.info("val_loss{}".format(val_loss_mean)) + logging.info("val_loss{}".format(val_loss_mean)) for top_k, score in zip(self._accuracy.top_k, topk_scores): tensorboard_log['val_epoch_top@{}'.format(top_k)] = score - logging.info("val_epoch_top@{}: {}".format(top_k,score)) + logging.info("val_epoch_top@{}: {}".format(top_k, score)) return {'log': tensorboard_log} diff --git a/nemo/collections/asr/modules/conv_asr.py b/nemo/collections/asr/modules/conv_asr.py index 40233a4fba1b..2c4d6bf4da33 100644 --- a/nemo/collections/asr/modules/conv_asr.py +++ b/nemo/collections/asr/modules/conv_asr.py @@ -418,7 +418,7 @@ def forward(self, encoder_output): for W in self.final.parameters(): W = F.normalize(W, p=2, dim=1) pool = F.normalize(pool, p=2, dim=1) - + out = self.final(pool) return out, embs[-1] From 2ba342edd519511e8576be12d49a2d3d4e551e08 Mon Sep 17 00:00:00 2001 From: nithinraok Date: Fri, 28 Aug 2020 14:10:13 -0700 Subject: [PATCH 04/12] logvallcallback Signed-off-by: nithinraok --- examples/speaker_recognition/speaker_reco.py | 2 ++ nemo/collections/asr/models/label_models.py | 18 ++++++++++++------ nemo/collections/common/callbacks/__init__.py | 3 ++- nemo/collections/common/callbacks/callbacks.py | 15 +++++++++++++++ 4 files changed, 31 insertions(+), 7 deletions(-) diff --git a/examples/speaker_recognition/speaker_reco.py b/examples/speaker_recognition/speaker_reco.py index 3259a6512b56..50440f8115d0 100644 --- a/examples/speaker_recognition/speaker_reco.py +++ b/examples/speaker_recognition/speaker_reco.py @@ -21,6 +21,7 @@ from nemo.core.config import hydra_runner from nemo.utils import logging from nemo.utils.exp_manager import exp_manager +from nemo.collections.common.callbacks import LogTrainValidLossCallback """ Basic run (on CPU for 50 epochs): @@ -52,6 +53,7 @@ def main(cfg): trainer = pl.Trainer(**cfg.trainer) log_dir = exp_manager(trainer, cfg.get("exp_manager", None)) speaker_model = EncDecSpeakerLabelModel(cfg=cfg.model, trainer=trainer) + trainer.callbacks.extend([LogTrainValidLossCallback()]) trainer.fit(speaker_model) model_path = os.path.join(log_dir, '..', 'spkr.nemo') speaker_model.save_to(model_path) diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py index a152422381f3..ef37d04bcebd 100644 --- a/nemo/collections/asr/models/label_models.py +++ b/nemo/collections/asr/models/label_models.py @@ -150,12 +150,13 @@ def forward(self, input_signal, input_signal_length): # PTL-specific methods def training_step(self, batch, batch_idx): + # import ipdb; ipdb.set_trace() audio_signal, audio_signal_len, labels, _ = batch logits, _ = self.forward(input_signal=audio_signal, input_signal_length=audio_signal_len) - loss_value = self.loss(logits=logits, labels=labels) + self.loss_value = self.loss(logits=logits, labels=labels) tensorboard_logs = { - 'train_loss': loss_value, + 'train_loss': self.loss_value, 'learning_rate': self._optimizer.param_groups[0]['lr'], } @@ -168,14 +169,19 @@ def training_step(self, batch, batch_idx): tensorboard_logs['training_batch_accuracy_top@{}'.format(top_k)] = correct_count / float(total_count) - return {'loss': loss_value, 'log': tensorboard_logs} + return {'loss': self.loss_value, 'log': tensorboard_logs} + + # def training_epoch_end(self,outputs): + # val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean() + # logging.info("") + def validation_step(self, batch, batch_idx, dataloader_idx: int = 0): audio_signal, audio_signal_len, labels, _ = batch logits, _ = self.forward(input_signal=audio_signal, input_signal_length=audio_signal_len) - loss_value = self.loss(logits=logits, labels=labels) + self.loss_value = self.loss(logits=logits, labels=labels) correct_counts, total_counts = self._accuracy(logits=logits, labels=labels) - return {'val_loss': loss_value, 'val_correct_counts': correct_counts, 'val_total_counts': total_counts} + return {'val_loss': self.loss_value, 'val_correct_counts': correct_counts, 'val_total_counts': total_counts} def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0): val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean() @@ -185,7 +191,7 @@ def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0): topk_scores = compute_topk_accuracy(correct_counts, total_counts) tensorboard_log = {'val_loss': val_loss_mean} - logging.info("val_loss{}".format(val_loss_mean)) + logging.info("val_loss {}".format(val_loss_mean)) for top_k, score in zip(self._accuracy.top_k, topk_scores): tensorboard_log['val_epoch_top@{}'.format(top_k)] = score logging.info("val_epoch_top@{}: {}".format(top_k, score)) diff --git a/nemo/collections/common/callbacks/__init__.py b/nemo/collections/common/callbacks/__init__.py index 9ad5c9c85a5f..ef03e582cba7 100644 --- a/nemo/collections/common/callbacks/__init__.py +++ b/nemo/collections/common/callbacks/__init__.py @@ -12,4 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.collections.common.callbacks.callbacks import LogEpochTimeCallback +from nemo.collections.common.callbacks.callbacks import LogEpochTimeCallback,LogTrainValidLossCallback + diff --git a/nemo/collections/common/callbacks/callbacks.py b/nemo/collections/common/callbacks/callbacks.py index 55fa5c50a1c5..153d7ce2a844 100644 --- a/nemo/collections/common/callbacks/callbacks.py +++ b/nemo/collections/common/callbacks/callbacks.py @@ -15,6 +15,7 @@ from pytorch_lightning.callbacks.base import Callback from pytorch_lightning.utilities import rank_zero_only +from nemo.utils import logging class LogEpochTimeCallback(Callback): @@ -30,3 +31,17 @@ def on_epoch_end(self, trainer, pl_module): curr_time = time.time() duration = curr_time - self.epoch_start trainer.logger.log_metrics({"epoch_time": duration}, step=trainer.global_step) + +class LogTrainValidLossCallback(Callback): + + @rank_zero_only + def on_epoch_start(self, trainer, pl_module): + logging.info(" Training started") + + @rank_zero_only + def on_train_epoch_start(self,trainer,pl_module): + print_freq = trainer.row_log_interval + logging.info("batch_idx") + if 4 % print_freq == 0: + logging.info("Epoch: {} batch: {} train_loss: {}".format(trainer.current_epoch,1,pl_module.loss)) + From 0bb0092a130d3ac81d01e90e9f68151b03a624fe Mon Sep 17 00:00:00 2001 From: nithinraok Date: Fri, 28 Aug 2020 18:04:00 -0700 Subject: [PATCH 05/12] CallbackManager Signed-off-by: nithinraok --- examples/speaker_recognition/speaker_reco.py | 9 +++- nemo/collections/asr/models/label_models.py | 15 +++--- nemo/collections/common/callbacks/__init__.py | 7 ++- .../collections/common/callbacks/callbacks.py | 53 ++++++++++++++++--- 4 files changed, 64 insertions(+), 20 deletions(-) diff --git a/examples/speaker_recognition/speaker_reco.py b/examples/speaker_recognition/speaker_reco.py index 50440f8115d0..2cf7fd0553d4 100644 --- a/examples/speaker_recognition/speaker_reco.py +++ b/examples/speaker_recognition/speaker_reco.py @@ -18,10 +18,10 @@ from pytorch_lightning import seed_everything from nemo.collections.asr.models import EncDecSpeakerLabelModel +from nemo.collections.common.callbacks import CallbackManager from nemo.core.config import hydra_runner from nemo.utils import logging from nemo.utils.exp_manager import exp_manager -from nemo.collections.common.callbacks import LogTrainValidLossCallback """ Basic run (on CPU for 50 epochs): @@ -51,9 +51,14 @@ def main(cfg): logging.info(f'Hydra config: {cfg.pretty()}') trainer = pl.Trainer(**cfg.trainer) + + callbacks = ['LogEpochTimeCallback()', 'LogTrainValidLossCallback()'] + callback_mgr = CallbackManager() + callbacks = callback_mgr.add_callback(callbacks) + trainer.callbacks.extend(callbacks) + log_dir = exp_manager(trainer, cfg.get("exp_manager", None)) speaker_model = EncDecSpeakerLabelModel(cfg=cfg.model, trainer=trainer) - trainer.callbacks.extend([LogTrainValidLossCallback()]) trainer.fit(speaker_model) model_path = os.path.join(log_dir, '..', 'spkr.nemo') speaker_model.save_to(model_path) diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py index ef37d04bcebd..50ec7ee6d66f 100644 --- a/nemo/collections/asr/models/label_models.py +++ b/nemo/collections/asr/models/label_models.py @@ -60,7 +60,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): logging.info("Training with Softmax-CrossEntropy loss") self.loss = CELoss() - self._accuracy = TopKClassificationAccuracy() + self._accuracy = TopKClassificationAccuracy(top_k=[1]) def __setup_dataloader_from_config(self, config: Optional[Dict]): if 'augmentor' in config: @@ -166,15 +166,15 @@ def training_step(self, batch, batch_idx): correct_count = correct_counts[ki] total_count = total_counts[ki] top_k = self._accuracy.top_k[ki] + self.accuracy = (correct_count / float(total_count)) * 100 - tensorboard_logs['training_batch_accuracy_top@{}'.format(top_k)] = correct_count / float(total_count) + tensorboard_logs['training_batch_accuracy_top@{}'.format(top_k)] = self.accuracy return {'loss': self.loss_value, 'log': tensorboard_logs} - + # def training_epoch_end(self,outputs): # val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean() # logging.info("") - def validation_step(self, batch, batch_idx, dataloader_idx: int = 0): audio_signal, audio_signal_len, labels, _ = batch @@ -184,17 +184,16 @@ def validation_step(self, batch, batch_idx, dataloader_idx: int = 0): return {'val_loss': self.loss_value, 'val_correct_counts': correct_counts, 'val_total_counts': total_counts} def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0): - val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean() + self.val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean() correct_counts = torch.stack([x['val_correct_counts'] for x in outputs]) total_counts = torch.stack([x['val_total_counts'] for x in outputs]) topk_scores = compute_topk_accuracy(correct_counts, total_counts) - tensorboard_log = {'val_loss': val_loss_mean} - logging.info("val_loss {}".format(val_loss_mean)) + tensorboard_log = {'val_loss': self.val_loss_mean} for top_k, score in zip(self._accuracy.top_k, topk_scores): tensorboard_log['val_epoch_top@{}'.format(top_k)] = score - logging.info("val_epoch_top@{}: {}".format(top_k, score)) + self.accuracy = score * 100 return {'log': tensorboard_log} diff --git a/nemo/collections/common/callbacks/__init__.py b/nemo/collections/common/callbacks/__init__.py index ef03e582cba7..92393d04ec53 100644 --- a/nemo/collections/common/callbacks/__init__.py +++ b/nemo/collections/common/callbacks/__init__.py @@ -12,5 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.collections.common.callbacks.callbacks import LogEpochTimeCallback,LogTrainValidLossCallback - +from nemo.collections.common.callbacks.callbacks import ( + CallbackManager, + LogEpochTimeCallback, + LogTrainValidLossCallback, +) diff --git a/nemo/collections/common/callbacks/callbacks.py b/nemo/collections/common/callbacks/callbacks.py index 153d7ce2a844..46ead373cf6a 100644 --- a/nemo/collections/common/callbacks/callbacks.py +++ b/nemo/collections/common/callbacks/callbacks.py @@ -12,12 +12,35 @@ # See the License for the specific language governing permissions and # limitations under the License. import time +from typing import List, Union from pytorch_lightning.callbacks.base import Callback from pytorch_lightning.utilities import rank_zero_only + from nemo.utils import logging +class CallbackManager: + def __init__(self) -> None: + self.callbacks = set(['LogEpochTimeCallback()', 'LogTrainValidLossCallback()']) + + def get_callback(self, callback_name: str): + if callback_name in self.callbacks: + return eval(callback_name) + else: + raise NameError("Provided Callback name is not part of nemo Callback system") + + def add_callback(self, callback_names: Union[str, List]): + if type(callback_names) is str: + callback_names = callback_names.split(',') + + callbacks = [] + for name in callback_names: + callbacks.append(self.get_callback(name)) + + return callbacks + + class LogEpochTimeCallback(Callback): """Simple callback that logs how long each epoch takes, in seconds, to a pytorch lightning log """ @@ -32,16 +55,30 @@ def on_epoch_end(self, trainer, pl_module): duration = curr_time - self.epoch_start trainer.logger.log_metrics({"epoch_time": duration}, step=trainer.global_step) -class LogTrainValidLossCallback(Callback): +class LogTrainValidLossCallback(Callback): @rank_zero_only - def on_epoch_start(self, trainer, pl_module): - logging.info(" Training started") - + def on_train_start(self, trainer, pl_module): + logging.info("Training started") + @rank_zero_only - def on_train_epoch_start(self,trainer,pl_module): + def on_train_batch_end(self, trainer, pl_module, batch, batch_idx, dataloader_idx): print_freq = trainer.row_log_interval - logging.info("batch_idx") - if 4 % print_freq == 0: - logging.info("Epoch: {} batch: {} train_loss: {}".format(trainer.current_epoch,1,pl_module.loss)) + if batch_idx % print_freq == 0: + logging.info( + "Epoch: {}/{} batch: {}/{} train_loss: {:.3f} train_acc: {:.2f}".format( + trainer.current_epoch + 1, + trainer.max_epochs, + batch_idx + 1, + trainer.num_training_batches, + pl_module.loss_value, + pl_module.accuracy, + ) + ) + def on_validation_epoch_end(self, trainer, pl_module): + logging.info( + "----> Epoch: {}/{} val_loss: {:.3f} val_acc: {:.2f} <----".format( + trainer.current_epoch + 1, trainer.max_epochs, pl_module.val_loss_mean, pl_module.accuracy + ) + ) From 7d8bdf65b55afa00cf7cae10261b9ea548f3cfdf Mon Sep 17 00:00:00 2001 From: nithinraok Date: Tue, 1 Sep 2020 11:36:50 -0700 Subject: [PATCH 06/12] Updated spkr_get_emb to support diarization Signed-off-by: nithinraok --- nemo/collections/asr/data/audio_to_label.py | 53 +++++++++++++++++++ nemo/collections/asr/models/label_models.py | 16 +++--- .../collections/common/callbacks/callbacks.py | 26 ++------- nemo/utils/exp_manager.py | 22 ++++++++ 4 files changed, 87 insertions(+), 30 deletions(-) diff --git a/nemo/collections/asr/data/audio_to_label.py b/nemo/collections/asr/data/audio_to_label.py index 1945470bc915..c04649752b30 100644 --- a/nemo/collections/asr/data/audio_to_label.py +++ b/nemo/collections/asr/data/audio_to_label.py @@ -154,6 +154,59 @@ def fixed_seq_collate_fn(self, batch): tokens_lengths = torch.stack(tokens_lengths) return audio_signal, audio_lengths, tokens, tokens_lengths + + def sliced_seq_collate_fn(self, batch): + """collate batch of audio sig, audio len, tokens, tokens len + Args: + batch (Optional[FloatTensor], Optional[LongTensor], LongTensor, + LongTensor): A tuple of tuples of signal, signal lengths, + encoded tokens, and encoded tokens length. This collate func + assumes the signals are 1d torch tensors (i.e. mono audio). + fixed_length (Optional[int]): length of input signal to be considered + """ + slice_length = self.featurizer.sample_rate * self.time_length + _, audio_lengths, _, tokens_lengths = zip(*batch) + slice_length = min(slice_length, max(audio_lengths)) + shift = 1 * 16000 + has_audio = audio_lengths[0] is not None + + audio_signal, num_slices, tokens, audio_lengths = [], [], [], [] + for sig, sig_len, tokens_i, _ in batch: + if has_audio: + sig_len = sig_len.item() + slices = sig_len // slice_length + if slices <= 0: + + repeat = slice_length // sig_len + rem = slice_length % sig_len + sub = sig[-rem:] if rem > 0 else torch.tensor([]) + rep_sig = torch.cat(repeat * [sig]) + signal = torch.cat((rep_sig, sub)) + audio_signal.append(signal) + num_slices.append(1) # single embedding + tokens.extend([tokens_i] * 1) + audio_lengths.extend([slice_length] * 1) + else: + slices = (sig_len - slice_length) // shift + 1 + for slice_id in range(slices): + start_idx = slice_id * shift + end_idx = start_idx + slice_length + signal = sig[start_idx:end_idx] + audio_signal.append(signal) + + num_slices.append(slices) + tokens.extend([tokens_i] * slices) + audio_lengths.extend([slice_length] * slices) + + if has_audio: + audio_signal = torch.stack(audio_signal) + audio_lengths = torch.tensor(audio_lengths) + else: + audio_signal, audio_lengths = None, None + tokens = torch.stack(tokens) + tokens_lengths = torch.tensor(num_slices) # each embedding length + + return audio_signal, audio_lengths, tokens, tokens_lengths def __len__(self): return len(self.collection) diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py index 50ec7ee6d66f..56ab2200959a 100644 --- a/nemo/collections/asr/models/label_models.py +++ b/nemo/collections/asr/models/label_models.py @@ -172,10 +172,6 @@ def training_step(self, batch, batch_idx): return {'loss': self.loss_value, 'log': tensorboard_logs} - # def training_epoch_end(self,outputs): - # val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean() - # logging.info("") - def validation_step(self, batch, batch_idx, dataloader_idx: int = 0): audio_signal, audio_signal_len, labels, _ = batch logits, _ = self.forward(input_signal=audio_signal, input_signal_length=audio_signal_len) @@ -213,16 +209,17 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): super().__init__(cfg=cfg, trainer=trainer) def test_step(self, batch, batch_ix): - audio_signal, audio_signal_len, labels, _ = batch + audio_signal, audio_signal_len, labels, slices = batch _, embs = self.forward(input_signal=audio_signal, input_signal_length=audio_signal_len) - return {'embs': embs, 'labels': labels} + return {'embs': embs, 'labels': labels, 'slices': slices} def test_epoch_end(self, outputs): embs = torch.cat([x['embs'] for x in outputs]) + slices = torch.cat([x['slices'] for x in outputs]) emb_shape = embs.shape[-1] embs = embs.view(-1, emb_shape).cpu().numpy() out_embeddings = {} - + start_idx=0 with open(self.test_manifest, 'r') as manifest: for idx, line in enumerate(manifest.readlines()): line = line.strip() @@ -231,7 +228,10 @@ def test_epoch_end(self, outputs): uniq_name = '@'.join(structure) if uniq_name in out_embeddings: raise KeyError("Embeddings for label {} already present in emb dictionary".format(uniq_name)) - out_embeddings[uniq_name] = embs[idx] + num_slices = slices[idx] + end_idx = start_idx+num_slices + out_embeddings[uniq_name] = embs[start_idx:end_idx].mean(axis=0) + start_idx = end_idx embedding_dir = os.path.join(self.embedding_dir, 'embeddings') if not os.path.exists(embedding_dir): diff --git a/nemo/collections/common/callbacks/callbacks.py b/nemo/collections/common/callbacks/callbacks.py index 46ead373cf6a..e890727bcc17 100644 --- a/nemo/collections/common/callbacks/callbacks.py +++ b/nemo/collections/common/callbacks/callbacks.py @@ -20,27 +20,6 @@ from nemo.utils import logging -class CallbackManager: - def __init__(self) -> None: - self.callbacks = set(['LogEpochTimeCallback()', 'LogTrainValidLossCallback()']) - - def get_callback(self, callback_name: str): - if callback_name in self.callbacks: - return eval(callback_name) - else: - raise NameError("Provided Callback name is not part of nemo Callback system") - - def add_callback(self, callback_names: Union[str, List]): - if type(callback_names) is str: - callback_names = callback_names.split(',') - - callbacks = [] - for name in callback_names: - callbacks.append(self.get_callback(name)) - - return callbacks - - class LogEpochTimeCallback(Callback): """Simple callback that logs how long each epoch takes, in seconds, to a pytorch lightning log """ @@ -64,13 +43,16 @@ def on_train_start(self, trainer, pl_module): @rank_zero_only def on_train_batch_end(self, trainer, pl_module, batch, batch_idx, dataloader_idx): print_freq = trainer.row_log_interval + total_batches = trainer.num_training_batches + if 0 < print_freq < 1: + print_freq = int(total_batches*print_freq) if batch_idx % print_freq == 0: logging.info( "Epoch: {}/{} batch: {}/{} train_loss: {:.3f} train_acc: {:.2f}".format( trainer.current_epoch + 1, trainer.max_epochs, batch_idx + 1, - trainer.num_training_batches, + total_batches, pl_module.loss_value, pl_module.accuracy, ) diff --git a/nemo/utils/exp_manager.py b/nemo/utils/exp_manager.py index 11a7c3ea3ba7..4433dd664b83 100644 --- a/nemo/utils/exp_manager.py +++ b/nemo/utils/exp_manager.py @@ -36,6 +36,28 @@ from nemo.utils.lightning_logger_patch import add_filehandlers_to_pl_logger +class CallbackManager: + def __init__(self) -> None: + self.callbacks = set(['LogEpochTimeCallback()', 'LogTrainValidLossCallback()']) + + def get_callback(self, callback_name: str): + if callback_name in self.callbacks: + return eval(callback_name) + else: + raise NameError("Provided Callback name is not part of nemo Callback system") + + def add_callback(self, callback_names: Union[str, List]): + if type(callback_names) is str: + callback_names = callback_names.split(',') + + callbacks = [] + for name in callback_names: + callbacks.append(self.get_callback(name)) + + return callbacks + + + class NotFoundError(NeMoBaseException): """ Raised when a file or folder is not found""" From 965a5176474871d924caed3ffe77f4904e7e23da Mon Sep 17 00:00:00 2001 From: nithinraok Date: Tue, 1 Sep 2020 12:46:29 -0700 Subject: [PATCH 07/12] callback in expmanager Signed-off-by: nithinraok --- examples/speaker_recognition/speaker_reco.py | 7 ---- nemo/collections/asr/data/audio_to_label.py | 2 +- nemo/collections/asr/models/label_models.py | 4 +- nemo/collections/common/callbacks/__init__.py | 2 +- .../collections/common/callbacks/callbacks.py | 9 ++++- nemo/core/classes/modelPT.py | 1 + nemo/utils/exp_manager.py | 37 ++++++++----------- 7 files changed, 27 insertions(+), 35 deletions(-) diff --git a/examples/speaker_recognition/speaker_reco.py b/examples/speaker_recognition/speaker_reco.py index 2cf7fd0553d4..3259a6512b56 100644 --- a/examples/speaker_recognition/speaker_reco.py +++ b/examples/speaker_recognition/speaker_reco.py @@ -18,7 +18,6 @@ from pytorch_lightning import seed_everything from nemo.collections.asr.models import EncDecSpeakerLabelModel -from nemo.collections.common.callbacks import CallbackManager from nemo.core.config import hydra_runner from nemo.utils import logging from nemo.utils.exp_manager import exp_manager @@ -51,12 +50,6 @@ def main(cfg): logging.info(f'Hydra config: {cfg.pretty()}') trainer = pl.Trainer(**cfg.trainer) - - callbacks = ['LogEpochTimeCallback()', 'LogTrainValidLossCallback()'] - callback_mgr = CallbackManager() - callbacks = callback_mgr.add_callback(callbacks) - trainer.callbacks.extend(callbacks) - log_dir = exp_manager(trainer, cfg.get("exp_manager", None)) speaker_model = EncDecSpeakerLabelModel(cfg=cfg.model, trainer=trainer) trainer.fit(speaker_model) diff --git a/nemo/collections/asr/data/audio_to_label.py b/nemo/collections/asr/data/audio_to_label.py index c04649752b30..28929375c741 100644 --- a/nemo/collections/asr/data/audio_to_label.py +++ b/nemo/collections/asr/data/audio_to_label.py @@ -154,7 +154,7 @@ def fixed_seq_collate_fn(self, batch): tokens_lengths = torch.stack(tokens_lengths) return audio_signal, audio_lengths, tokens, tokens_lengths - + def sliced_seq_collate_fn(self, batch): """collate batch of audio sig, audio len, tokens, tokens len Args: diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py index 56ab2200959a..4dd3ea9ad1b0 100644 --- a/nemo/collections/asr/models/label_models.py +++ b/nemo/collections/asr/models/label_models.py @@ -219,7 +219,7 @@ def test_epoch_end(self, outputs): emb_shape = embs.shape[-1] embs = embs.view(-1, emb_shape).cpu().numpy() out_embeddings = {} - start_idx=0 + start_idx = 0 with open(self.test_manifest, 'r') as manifest: for idx, line in enumerate(manifest.readlines()): line = line.strip() @@ -229,7 +229,7 @@ def test_epoch_end(self, outputs): if uniq_name in out_embeddings: raise KeyError("Embeddings for label {} already present in emb dictionary".format(uniq_name)) num_slices = slices[idx] - end_idx = start_idx+num_slices + end_idx = start_idx + num_slices out_embeddings[uniq_name] = embs[start_idx:end_idx].mean(axis=0) start_idx = end_idx diff --git a/nemo/collections/common/callbacks/__init__.py b/nemo/collections/common/callbacks/__init__.py index 92393d04ec53..96e2bff8d4d6 100644 --- a/nemo/collections/common/callbacks/__init__.py +++ b/nemo/collections/common/callbacks/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. from nemo.collections.common.callbacks.callbacks import ( - CallbackManager, + AVAILABLE_CALLBACKS, LogEpochTimeCallback, LogTrainValidLossCallback, ) diff --git a/nemo/collections/common/callbacks/callbacks.py b/nemo/collections/common/callbacks/callbacks.py index e890727bcc17..96a4cc21063d 100644 --- a/nemo/collections/common/callbacks/callbacks.py +++ b/nemo/collections/common/callbacks/callbacks.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import time -from typing import List, Union from pytorch_lightning.callbacks.base import Callback from pytorch_lightning.utilities import rank_zero_only @@ -45,7 +44,7 @@ def on_train_batch_end(self, trainer, pl_module, batch, batch_idx, dataloader_id print_freq = trainer.row_log_interval total_batches = trainer.num_training_batches if 0 < print_freq < 1: - print_freq = int(total_batches*print_freq) + print_freq = int(total_batches * print_freq) if batch_idx % print_freq == 0: logging.info( "Epoch: {}/{} batch: {}/{} train_loss: {:.3f} train_acc: {:.2f}".format( @@ -64,3 +63,9 @@ def on_validation_epoch_end(self, trainer, pl_module): trainer.current_epoch + 1, trainer.max_epochs, pl_module.val_loss_mean, pl_module.accuracy ) ) + + +AVAILABLE_CALLBACKS = { + 'LogEpochTimeCallback': LogEpochTimeCallback(), + 'LogTrainValidLossCallback': LogTrainValidLossCallback(), +} diff --git a/nemo/core/classes/modelPT.py b/nemo/core/classes/modelPT.py index 489fdee0020b..d8811263cfd6 100644 --- a/nemo/core/classes/modelPT.py +++ b/nemo/core/classes/modelPT.py @@ -27,6 +27,7 @@ from omegaconf import DictConfig, OmegaConf from pytorch_lightning import LightningModule, Trainer +from nemo.collections.common import callbacks from nemo.core import optim from nemo.core.classes.common import Model from nemo.core.optim import prepare_lr_scheduler diff --git a/nemo/utils/exp_manager.py b/nemo/utils/exp_manager.py index 4433dd664b83..baa14ef10438 100644 --- a/nemo/utils/exp_manager.py +++ b/nemo/utils/exp_manager.py @@ -29,6 +29,7 @@ from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger from pytorch_lightning.utilities import rank_zero_only +from nemo.collections.common import callbacks from nemo.constants import NEMO_ENV_VARNAME_VERSION from nemo.utils import logging from nemo.utils.exceptions import NeMoBaseException @@ -36,28 +37,6 @@ from nemo.utils.lightning_logger_patch import add_filehandlers_to_pl_logger -class CallbackManager: - def __init__(self) -> None: - self.callbacks = set(['LogEpochTimeCallback()', 'LogTrainValidLossCallback()']) - - def get_callback(self, callback_name: str): - if callback_name in self.callbacks: - return eval(callback_name) - else: - raise NameError("Provided Callback name is not part of nemo Callback system") - - def add_callback(self, callback_names: Union[str, List]): - if type(callback_names) is str: - callback_names = callback_names.split(',') - - callbacks = [] - for name in callback_names: - callbacks.append(self.get_callback(name)) - - return callbacks - - - class NotFoundError(NeMoBaseException): """ Raised when a file or folder is not found""" @@ -97,6 +76,7 @@ class ExpManagerConfig: create_checkpoint_callback: Optional[bool] = True # Additional exp_manager arguments files_to_copy: Optional[List[str]] = None + callbacks: Optional[str] = None def exp_manager(trainer: 'pytorch_lightning.Trainer', cfg: Optional[Union[DictConfig, Dict]] = None) -> Path: @@ -216,6 +196,10 @@ def exp_manager(trainer: 'pytorch_lightning.Trainer', cfg: Optional[Union[DictCo if cfg.create_checkpoint_callback: configure_checkpointing(trainer, log_dir, checkpoint_name) + # Add nemo callbacks + if cfg.callbacks: + add_callbacks(trainer, cfg.callbacks) + # Move files_to_copy to folder and add git information if present if cfg.files_to_copy: for _file in cfg.files_to_copy: @@ -577,3 +561,12 @@ def on_train_end(self, trainer, pl_module): trainer.configure_checkpoint_callback(checkpoint_callback) trainer.callbacks.append(checkpoint_callback) trainer.checkpoint_callback = checkpoint_callback + + +def add_callbacks(trainer: 'pytorch_lightning.Trainer', nemo_callbacks: Optional[List[str]]): + + for callback in nemo_callbacks: + if callback in callbacks.AVAILABLE_CALLBACKS: + trainer.callbacks.append(callbacks.AVAILABLE_CALLBACKS[callback]) + else: + raise NameError(" Request callback is not part of nemo callbacks please check callback name") From f593c381a641b323ca932a78bddc5fba9ec33709 Mon Sep 17 00:00:00 2001 From: nithinraok Date: Wed, 2 Sep 2020 10:32:41 -0700 Subject: [PATCH 08/12] Removed callback from exp_mager will push another PR for it Signed-off-by: nithinraok --- nemo/core/classes/modelPT.py | 1 - nemo/utils/exp_manager.py | 15 --------------- 2 files changed, 16 deletions(-) diff --git a/nemo/core/classes/modelPT.py b/nemo/core/classes/modelPT.py index d8811263cfd6..489fdee0020b 100644 --- a/nemo/core/classes/modelPT.py +++ b/nemo/core/classes/modelPT.py @@ -27,7 +27,6 @@ from omegaconf import DictConfig, OmegaConf from pytorch_lightning import LightningModule, Trainer -from nemo.collections.common import callbacks from nemo.core import optim from nemo.core.classes.common import Model from nemo.core.optim import prepare_lr_scheduler diff --git a/nemo/utils/exp_manager.py b/nemo/utils/exp_manager.py index baa14ef10438..11a7c3ea3ba7 100644 --- a/nemo/utils/exp_manager.py +++ b/nemo/utils/exp_manager.py @@ -29,7 +29,6 @@ from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger from pytorch_lightning.utilities import rank_zero_only -from nemo.collections.common import callbacks from nemo.constants import NEMO_ENV_VARNAME_VERSION from nemo.utils import logging from nemo.utils.exceptions import NeMoBaseException @@ -76,7 +75,6 @@ class ExpManagerConfig: create_checkpoint_callback: Optional[bool] = True # Additional exp_manager arguments files_to_copy: Optional[List[str]] = None - callbacks: Optional[str] = None def exp_manager(trainer: 'pytorch_lightning.Trainer', cfg: Optional[Union[DictConfig, Dict]] = None) -> Path: @@ -196,10 +194,6 @@ def exp_manager(trainer: 'pytorch_lightning.Trainer', cfg: Optional[Union[DictCo if cfg.create_checkpoint_callback: configure_checkpointing(trainer, log_dir, checkpoint_name) - # Add nemo callbacks - if cfg.callbacks: - add_callbacks(trainer, cfg.callbacks) - # Move files_to_copy to folder and add git information if present if cfg.files_to_copy: for _file in cfg.files_to_copy: @@ -561,12 +555,3 @@ def on_train_end(self, trainer, pl_module): trainer.configure_checkpoint_callback(checkpoint_callback) trainer.callbacks.append(checkpoint_callback) trainer.checkpoint_callback = checkpoint_callback - - -def add_callbacks(trainer: 'pytorch_lightning.Trainer', nemo_callbacks: Optional[List[str]]): - - for callback in nemo_callbacks: - if callback in callbacks.AVAILABLE_CALLBACKS: - trainer.callbacks.append(callbacks.AVAILABLE_CALLBACKS[callback]) - else: - raise NameError(" Request callback is not part of nemo callbacks please check callback name") From 2a621f442556590810482361917cefed48008abf Mon Sep 17 00:00:00 2001 From: nithinraok Date: Wed, 2 Sep 2020 12:51:37 -0700 Subject: [PATCH 09/12] LGTM Signed-off-by: nithinraok --- examples/speaker_recognition/speaker_reco.py | 15 ++++++++------- nemo/collections/asr/losses/angularloss.py | 1 - nemo/collections/asr/modules/conv_asr.py | 5 +++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/examples/speaker_recognition/speaker_reco.py b/examples/speaker_recognition/speaker_reco.py index 3259a6512b56..346b8d01d3c8 100644 --- a/examples/speaker_recognition/speaker_reco.py +++ b/examples/speaker_recognition/speaker_reco.py @@ -24,13 +24,14 @@ """ Basic run (on CPU for 50 epochs): - python examples/speaker_recognition/speaker_reco.py \ - model.train_ds.manifest_filepath="" \ - model.validation_ds.manifest_filepath="" \ - hydra.run.dir="." \ - trainer.gpus=0 \ - trainer.max_epochs=50 - +EXP_NAME=sample_run +python ./speaker_reco.py --config-path='/mnt/ngc_ws/new_configs/' --config-name=$CONFIG_PATH \ + trainer.max_epochs=10 \ + model.train_ds.batch_size=64 model.validation_ds.batch_size=64 \ + trainer.gpus=0 \ + model.decoder.params.num_classes=2 \ + exp_manager.name=$EXP_NAME +exp_manager.use_datetime_version=False \ + exp_manager.exp_dir='./speaker_exps' Add PyTorch Lightning Trainer arguments from CLI: python speaker_reco.py \ diff --git a/nemo/collections/asr/losses/angularloss.py b/nemo/collections/asr/losses/angularloss.py index a6f9ff081f78..73235854948c 100644 --- a/nemo/collections/asr/losses/angularloss.py +++ b/nemo/collections/asr/losses/angularloss.py @@ -22,7 +22,6 @@ __all__ = ['AngularSoftmaxLoss'] -@experimental class AngularSoftmaxLoss(Loss, Typing): @property def input_types(self): diff --git a/nemo/collections/asr/modules/conv_asr.py b/nemo/collections/asr/modules/conv_asr.py index 2c4d6bf4da33..8c40814381d8 100644 --- a/nemo/collections/asr/modules/conv_asr.py +++ b/nemo/collections/asr/modules/conv_asr.py @@ -367,6 +367,7 @@ def __init__( ): super().__init__() self.angular = angular + self.emb_id = 2 if self.angular: bias = False else: @@ -411,12 +412,12 @@ def forward(self, encoder_output): embs = [] for layer in self.emb_layers: - pool, emb = layer(pool), layer[:2](pool) + pool, emb = layer(pool), layer[: self.emb_id](pool) embs.append(emb) if self.angular: for W in self.final.parameters(): - W = F.normalize(W, p=2, dim=1) + _ = F.normalize(W, p=2, dim=1) pool = F.normalize(pool, p=2, dim=1) out = self.final(pool) From a84195e34c2180c68539dbb76ad81efa43aaf927 Mon Sep 17 00:00:00 2001 From: nithinraok Date: Wed, 2 Sep 2020 13:06:04 -0700 Subject: [PATCH 10/12] experimenta flag Signed-off-by: nithinraok --- nemo/collections/asr/losses/angularloss.py | 1 - nemo/collections/asr/modules/conv_asr.py | 1 - 2 files changed, 2 deletions(-) diff --git a/nemo/collections/asr/losses/angularloss.py b/nemo/collections/asr/losses/angularloss.py index 73235854948c..20bbc5b62dce 100644 --- a/nemo/collections/asr/losses/angularloss.py +++ b/nemo/collections/asr/losses/angularloss.py @@ -17,7 +17,6 @@ from nemo.core.classes import Loss, Typing, typecheck from nemo.core.neural_types import LabelsType, LogitsType, LossType, NeuralType -from nemo.utils.decorators import experimental __all__ = ['AngularSoftmaxLoss'] diff --git a/nemo/collections/asr/modules/conv_asr.py b/nemo/collections/asr/modules/conv_asr.py index 8c40814381d8..c71d1c793246 100644 --- a/nemo/collections/asr/modules/conv_asr.py +++ b/nemo/collections/asr/modules/conv_asr.py @@ -37,7 +37,6 @@ SpectrogramType, ) from nemo.utils import logging -from nemo.utils.decorators import experimental __all__ = ['ConvASRDecoder', 'ConvASREncoder', 'ConvASRDecoderClassification'] From df8f8c27f0b0b5e6ecf01b7fab0b35ec6819e65d Mon Sep 17 00:00:00 2001 From: nithinraok Date: Thu, 3 Sep 2020 16:13:57 -0700 Subject: [PATCH 11/12] moved callback to new PR added doc strings Signed-off-by: nithinraok --- examples/speaker_recognition/spkr_get_emb.py | 23 ++++------- nemo/collections/asr/data/audio_to_label.py | 2 +- nemo/collections/asr/losses/angularloss.py | 22 +++++++---- nemo/collections/asr/models/label_models.py | 10 ++--- nemo/collections/asr/modules/conv_asr.py | 12 ++---- nemo/collections/common/callbacks/__init__.py | 6 +-- .../collections/common/callbacks/callbacks.py | 39 ------------------- 7 files changed, 33 insertions(+), 81 deletions(-) diff --git a/examples/speaker_recognition/spkr_get_emb.py b/examples/speaker_recognition/spkr_get_emb.py index c2560748348e..a7599d88be8e 100644 --- a/examples/speaker_recognition/spkr_get_emb.py +++ b/examples/speaker_recognition/spkr_get_emb.py @@ -24,22 +24,11 @@ """ Basic run (on CPU for 50 epochs): - python examples/speaker_recognition/speaker_reco.py \ + python examples/speaker_recognition/spkr_get_emb.py \ model.train_ds.manifest_filepath="" \ - model.validation_ds.manifest_filepath="" \ + model.test_ds.manifest_filepath="" \ hydra.run.dir="." \ - trainer.gpus=0 \ - trainer.max_epochs=50 - - -Add PyTorch Lightning Trainer arguments from CLI: - python speaker_reco.py \ - ... \ - +trainer.fast_dev_run=true - -Hydra logs will be found in "$(./outputs/$(date +"%y-%m-%d")/$(date +"%H-%M-%S")/.hydra)" -PTL logs will be found in "$(./outputs/$(date +"%y-%m-%d")/$(date +"%H-%M-%S")/lightning_logs)" - + trainer.gpus=1 """ seed_everything(42) @@ -49,7 +38,11 @@ def main(cfg): logging.info(f'Hydra config: {cfg.pretty()}') - trainer = pl.Trainer(logger=False, checkpoint_callback=False) + if cfg.trainer.gpus > 1: + logging.info("changing gpus to 1 to minimize DDP issues while extracting embeddings") + cfg.trainer.gpus = 1 + cfg.trainer.distributed_backend = None + trainer = pl.Trainer(**cfg.trainer) log_dir = exp_manager(trainer, cfg.get("exp_manager", None)) model_path = os.path.join(log_dir, '..', 'spkr.nemo') speaker_model = ExtractSpeakerEmbeddingsModel.restore_from(model_path) diff --git a/nemo/collections/asr/data/audio_to_label.py b/nemo/collections/asr/data/audio_to_label.py index 28929375c741..ac5aed51f819 100644 --- a/nemo/collections/asr/data/audio_to_label.py +++ b/nemo/collections/asr/data/audio_to_label.py @@ -167,7 +167,7 @@ def sliced_seq_collate_fn(self, batch): slice_length = self.featurizer.sample_rate * self.time_length _, audio_lengths, _, tokens_lengths = zip(*batch) slice_length = min(slice_length, max(audio_lengths)) - shift = 1 * 16000 + shift = 1 * self.featurizer.sample_rate has_audio = audio_lengths[0] is not None audio_signal, num_slices, tokens, audio_lengths = [], [], [], [] diff --git a/nemo/collections/asr/losses/angularloss.py b/nemo/collections/asr/losses/angularloss.py index 20bbc5b62dce..e2aee9bba6ea 100644 --- a/nemo/collections/asr/losses/angularloss.py +++ b/nemo/collections/asr/losses/angularloss.py @@ -22,6 +22,14 @@ class AngularSoftmaxLoss(Loss, Typing): + """ + Computes ArcFace Angular softmax angle loss + reference: https://openaccess.thecvf.com/content_CVPR_2019/papers/Deng_ArcFace_Additive_Angular_Margin_Loss_for_Deep_Face_Recognition_CVPR_2019_paper.pdf + args: + scale: scale value for cosine angle + margin: margin value added to cosine angle + """ + @property def input_types(self): """Input types definitions for AnguarLoss. @@ -33,28 +41,28 @@ def input_types(self): @property def output_types(self): - """Output types definitions for CTCLoss. + """Output types definitions for AngularLoss. loss: NeuralType(None) """ return {"loss": NeuralType(elements_type=LossType())} - def __init__(self, s=20.0, m=1.35): + def __init__(self, scale=20.0, margin=1.35): super().__init__() self.eps = 1e-7 - self.s = s - self.m = m + self.scale = scale + self.margin = margin @typecheck() def forward(self, logits, labels): - numerator = self.s * torch.cos( + numerator = self.scale * torch.cos( torch.acos(torch.clamp(torch.diagonal(logits.transpose(0, 1)[labels]), -1.0 + self.eps, 1 - self.eps)) - + self.m + + self.margin ) excl = torch.cat( [torch.cat((logits[i, :y], logits[i, y + 1 :])).unsqueeze(0) for i, y in enumerate(labels)], dim=0 ) - denominator = torch.exp(numerator) + torch.sum(torch.exp(self.s * excl), dim=1) + denominator = torch.exp(numerator) + torch.sum(torch.exp(self.scale * excl), dim=1) L = numerator - torch.log(denominator) return -torch.mean(L) diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py index 4dd3ea9ad1b0..9837d1066e15 100644 --- a/nemo/collections/asr/models/label_models.py +++ b/nemo/collections/asr/models/label_models.py @@ -31,7 +31,6 @@ from nemo.core.classes.common import typecheck from nemo.core.neural_types import * from nemo.utils import logging -from nemo.utils.decorators import experimental __all__ = ['EncDecSpeakerLabelModel', 'ExtractSpeakerEmbeddingsModel'] @@ -53,9 +52,9 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): self.decoder = EncDecSpeakerLabelModel.from_config_dict(cfg.decoder) if 'angular' in cfg.decoder.params and cfg.decoder.params['angular']: logging.info("Training with Angular Softmax Loss") - s = cfg.loss.s - m = cfg.loss.m - self.loss = AngularSoftmaxLoss(s=s, m=m) + scale = cfg.loss.scale + margin = cfg.loss.margin + self.loss = AngularSoftmaxLoss(scale=scale, margin=margin) else: logging.info("Training with Softmax-CrossEntropy loss") self.loss = CELoss() @@ -150,7 +149,6 @@ def forward(self, input_signal, input_signal_length): # PTL-specific methods def training_step(self, batch, batch_idx): - # import ipdb; ipdb.set_trace() audio_signal, audio_signal_len, labels, _ = batch logits, _ = self.forward(input_signal=audio_signal, input_signal_length=audio_signal_len) self.loss_value = self.loss(logits=logits, labels=labels) @@ -185,7 +183,7 @@ def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0): total_counts = torch.stack([x['val_total_counts'] for x in outputs]) topk_scores = compute_topk_accuracy(correct_counts, total_counts) - + logging.info("val_loss: {:.3f}".format(self.val_loss_mean)) tensorboard_log = {'val_loss': self.val_loss_mean} for top_k, score in zip(self._accuracy.top_k, topk_scores): tensorboard_log['val_epoch_top@{}'.format(top_k)] = score diff --git a/nemo/collections/asr/modules/conv_asr.py b/nemo/collections/asr/modules/conv_asr.py index c71d1c793246..8ff66e5a347a 100644 --- a/nemo/collections/asr/modules/conv_asr.py +++ b/nemo/collections/asr/modules/conv_asr.py @@ -356,13 +356,7 @@ def output_types(self): ) def __init__( - self, - feat_in, - num_classes, - emb_sizes=[1024, 1024], - pool_mode='xvector', - angular=False, - init_mode="xavier_uniform", + self, feat_in, num_classes, emb_sizes=None, pool_mode='xvector', angular=False, init_mode="xavier_uniform", ): super().__init__() self.angular = angular @@ -374,6 +368,8 @@ def __init__( if type(emb_sizes) is str: emb_sizes = emb_sizes.split(',') + elif emb_sizes == None: + emb_sizes = [512, 512] else: emb_sizes = list(emb_sizes) @@ -416,7 +412,7 @@ def forward(self, encoder_output): if self.angular: for W in self.final.parameters(): - _ = F.normalize(W, p=2, dim=1) + W = F.normalize(W, p=2, dim=1) pool = F.normalize(pool, p=2, dim=1) out = self.final(pool) diff --git a/nemo/collections/common/callbacks/__init__.py b/nemo/collections/common/callbacks/__init__.py index 96e2bff8d4d6..9ad5c9c85a5f 100644 --- a/nemo/collections/common/callbacks/__init__.py +++ b/nemo/collections/common/callbacks/__init__.py @@ -12,8 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo.collections.common.callbacks.callbacks import ( - AVAILABLE_CALLBACKS, - LogEpochTimeCallback, - LogTrainValidLossCallback, -) +from nemo.collections.common.callbacks.callbacks import LogEpochTimeCallback diff --git a/nemo/collections/common/callbacks/callbacks.py b/nemo/collections/common/callbacks/callbacks.py index 96a4cc21063d..55fa5c50a1c5 100644 --- a/nemo/collections/common/callbacks/callbacks.py +++ b/nemo/collections/common/callbacks/callbacks.py @@ -16,8 +16,6 @@ from pytorch_lightning.callbacks.base import Callback from pytorch_lightning.utilities import rank_zero_only -from nemo.utils import logging - class LogEpochTimeCallback(Callback): """Simple callback that logs how long each epoch takes, in seconds, to a pytorch lightning log @@ -32,40 +30,3 @@ def on_epoch_end(self, trainer, pl_module): curr_time = time.time() duration = curr_time - self.epoch_start trainer.logger.log_metrics({"epoch_time": duration}, step=trainer.global_step) - - -class LogTrainValidLossCallback(Callback): - @rank_zero_only - def on_train_start(self, trainer, pl_module): - logging.info("Training started") - - @rank_zero_only - def on_train_batch_end(self, trainer, pl_module, batch, batch_idx, dataloader_idx): - print_freq = trainer.row_log_interval - total_batches = trainer.num_training_batches - if 0 < print_freq < 1: - print_freq = int(total_batches * print_freq) - if batch_idx % print_freq == 0: - logging.info( - "Epoch: {}/{} batch: {}/{} train_loss: {:.3f} train_acc: {:.2f}".format( - trainer.current_epoch + 1, - trainer.max_epochs, - batch_idx + 1, - total_batches, - pl_module.loss_value, - pl_module.accuracy, - ) - ) - - def on_validation_epoch_end(self, trainer, pl_module): - logging.info( - "----> Epoch: {}/{} val_loss: {:.3f} val_acc: {:.2f} <----".format( - trainer.current_epoch + 1, trainer.max_epochs, pl_module.val_loss_mean, pl_module.accuracy - ) - ) - - -AVAILABLE_CALLBACKS = { - 'LogEpochTimeCallback': LogEpochTimeCallback(), - 'LogTrainValidLossCallback': LogTrainValidLossCallback(), -} From fdd898d156444aaff5cfa8b76358a4e05767d032 Mon Sep 17 00:00:00 2001 From: nithinraok Date: Thu, 3 Sep 2020 16:19:58 -0700 Subject: [PATCH 12/12] style fix Signed-off-by: nithinraok --- examples/speaker_recognition/speaker_reco.py | 5 +---- examples/speaker_recognition/spkr_get_emb.py | 5 +++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/examples/speaker_recognition/speaker_reco.py b/examples/speaker_recognition/speaker_reco.py index 346b8d01d3c8..0a30a93a2160 100644 --- a/examples/speaker_recognition/speaker_reco.py +++ b/examples/speaker_recognition/speaker_reco.py @@ -25,7 +25,7 @@ """ Basic run (on CPU for 50 epochs): EXP_NAME=sample_run -python ./speaker_reco.py --config-path='/mnt/ngc_ws/new_configs/' --config-name=$CONFIG_PATH \ +python ./speaker_reco.py --config-path='conf' --config-name='config.yaml' \ trainer.max_epochs=10 \ model.train_ds.batch_size=64 model.validation_ds.batch_size=64 \ trainer.gpus=0 \ @@ -38,9 +38,6 @@ ... \ +trainer.fast_dev_run=true -Hydra logs will be found in "$(./outputs/$(date +"%y-%m-%d")/$(date +"%H-%M-%S")/.hydra)" -PTL logs will be found in "$(./outputs/$(date +"%y-%m-%d")/$(date +"%H-%M-%S")/lightning_logs)" - """ seed_everything(42) diff --git a/examples/speaker_recognition/spkr_get_emb.py b/examples/speaker_recognition/spkr_get_emb.py index a7599d88be8e..218c23817ca8 100644 --- a/examples/speaker_recognition/spkr_get_emb.py +++ b/examples/speaker_recognition/spkr_get_emb.py @@ -23,10 +23,11 @@ from nemo.utils.exp_manager import exp_manager """ -Basic run (on CPU for 50 epochs): +To extract embeddings python examples/speaker_recognition/spkr_get_emb.py \ - model.train_ds.manifest_filepath="" \ model.test_ds.manifest_filepath="" \ + exp_manager.exp_name="" + exp_manager.exp_dir="" hydra.run.dir="." \ trainer.gpus=1 """