From 09a0d859543af014fc2a39328add03b540c3844a Mon Sep 17 00:00:00 2001
From: nithinraok <nithinrao.koluguri@gmail.com>
Date: Mon, 24 Aug 2020 20:21:57 -0700
Subject: [PATCH 01/12] angular loss for 1.0

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>
---
 nemo/collections/asr/data/audio_to_label.py |  8 ++-
 nemo/collections/asr/losses/angularloss.py  | 62 +++++++++++++++++++++
 nemo/collections/asr/models/label_models.py | 10 +++-
 nemo/collections/asr/modules/conv_asr.py    | 25 ++++++++-
 4 files changed, 98 insertions(+), 7 deletions(-)
 create mode 100644 nemo/collections/asr/losses/angularloss.py

diff --git a/nemo/collections/asr/data/audio_to_label.py b/nemo/collections/asr/data/audio_to_label.py
index 3d35b45611db..5a55210fa0bb 100644
--- a/nemo/collections/asr/data/audio_to_label.py
+++ b/nemo/collections/asr/data/audio_to_label.py
@@ -120,9 +120,9 @@ def fixed_seq_collate_fn(self, batch):
         _, audio_lengths, _, tokens_lengths = zip(*batch)
 
         has_audio = audio_lengths[0] is not None
-        fixed_length = min(fixed_length, max(audio_lengths))
+        fixed_length = int(min(fixed_length, max(audio_lengths)))
 
-        audio_signal, tokens = [], []
+        audio_signal, tokens, new_audio_lengths = [], [], []
         for sig, sig_len, tokens_i, _ in batch:
             if has_audio:
                 sig_len = sig_len.item()
@@ -134,17 +134,19 @@ def fixed_seq_collate_fn(self, batch):
                     sub = sig[-rem:] if rem > 0 else torch.tensor([])
                     rep_sig = torch.cat(repeat * [sig])
                     signal = torch.cat((rep_sig, sub))
+                    new_audio_lengths.append(torch.tensor(fixed_length))
                 else:
                     start_idx = torch.randint(0, chunck_len, (1,)) if chunck_len else torch.tensor(0)
                     end_idx = start_idx + fixed_length
                     signal = sig[start_idx:end_idx]
+                    new_audio_lengths.append(torch.tensor(fixed_length))
 
                 audio_signal.append(signal)
             tokens.append(tokens_i)
 
         if has_audio:
             audio_signal = torch.stack(audio_signal)
-            audio_lengths = torch.stack(audio_lengths)
+            audio_lengths = torch.stack(new_audio_lengths)
         else:
             audio_signal, audio_lengths = None, None
         tokens = torch.stack(tokens)
diff --git a/nemo/collections/asr/losses/angularloss.py b/nemo/collections/asr/losses/angularloss.py
new file mode 100644
index 000000000000..a6f9ff081f78
--- /dev/null
+++ b/nemo/collections/asr/losses/angularloss.py
@@ -0,0 +1,62 @@
+# ! /usr/bin/python
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+
+from nemo.core.classes import Loss, Typing, typecheck
+from nemo.core.neural_types import LabelsType, LogitsType, LossType, NeuralType
+from nemo.utils.decorators import experimental
+
+__all__ = ['AngularSoftmaxLoss']
+
+
+@experimental
+class AngularSoftmaxLoss(Loss, Typing):
+    @property
+    def input_types(self):
+        """Input types definitions for AnguarLoss.
+        """
+        return {
+            "logits": NeuralType(('B', 'D'), LogitsType()),
+            "labels": NeuralType(('B',), LabelsType()),
+        }
+
+    @property
+    def output_types(self):
+        """Output types definitions for CTCLoss.
+        loss:
+            NeuralType(None)
+        """
+        return {"loss": NeuralType(elements_type=LossType())}
+
+    def __init__(self, s=20.0, m=1.35):
+        super().__init__()
+
+        self.eps = 1e-7
+        self.s = s
+        self.m = m
+
+    @typecheck()
+    def forward(self, logits, labels):
+        numerator = self.s * torch.cos(
+            torch.acos(torch.clamp(torch.diagonal(logits.transpose(0, 1)[labels]), -1.0 + self.eps, 1 - self.eps))
+            + self.m
+        )
+        excl = torch.cat(
+            [torch.cat((logits[i, :y], logits[i, y + 1 :])).unsqueeze(0) for i, y in enumerate(labels)], dim=0
+        )
+        denominator = torch.exp(numerator) + torch.sum(torch.exp(self.s * excl), dim=1)
+        L = numerator - torch.log(denominator)
+        return -torch.mean(L)
diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py
index e6eb1961a0c5..f7d363a44438 100644
--- a/nemo/collections/asr/models/label_models.py
+++ b/nemo/collections/asr/models/label_models.py
@@ -23,6 +23,7 @@
 from pytorch_lightning import Trainer
 
 from nemo.collections.asr.data.audio_to_label import AudioToSpeechLabelDataSet
+from nemo.collections.asr.losses.angularloss import AngularSoftmaxLoss
 from nemo.collections.asr.parts.features import WaveformFeaturizer
 from nemo.collections.asr.parts.perturb import process_augmentations
 from nemo.collections.common.losses import CrossEntropyLoss as CELoss
@@ -50,7 +51,14 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None):
         self.preprocessor = EncDecSpeakerLabelModel.from_config_dict(cfg.preprocessor)
         self.encoder = EncDecSpeakerLabelModel.from_config_dict(cfg.encoder)
         self.decoder = EncDecSpeakerLabelModel.from_config_dict(cfg.decoder)
-        self.loss = CELoss()
+        if 'angular' in cfg.decoder.params and cfg.decoder.params['angular']:
+            logging.info("Training with Angular Softmax Loss")
+            s = cfg.loss.s
+            m = cfg.loss.m
+            self.loss = AngularSoftmaxLoss(s=s, m=m)
+        else:
+            logging.info("Training with Softmax-CrossEntropy loss")
+            self.loss = CELoss()
 
     def __setup_dataloader_from_config(self, config: Optional[Dict]):
         if 'augmentor' in config:
diff --git a/nemo/collections/asr/modules/conv_asr.py b/nemo/collections/asr/modules/conv_asr.py
index 28b3431d5ce6..68d497a0e15b 100644
--- a/nemo/collections/asr/modules/conv_asr.py
+++ b/nemo/collections/asr/modules/conv_asr.py
@@ -15,6 +15,7 @@
 
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 from omegaconf import ListConfig, OmegaConf
 
 from nemo.collections.asr.parts.jasper import (
@@ -356,9 +357,20 @@ def output_types(self):
         )
 
     def __init__(
-        self, feat_in, num_classes, emb_sizes=[1024, 1024], pool_mode='xvector', init_mode="xavier_uniform",
+        self,
+        feat_in,
+        num_classes,
+        emb_sizes=[1024, 1024],
+        pool_mode='xvector',
+        angular=False,
+        init_mode="xavier_uniform",
     ):
         super().__init__()
+        self.angular = angular
+        if self.angular:
+            bias = False
+        else:
+            bias = True
 
         if type(emb_sizes) is str:
             emb_sizes = emb_sizes.split(',')
@@ -380,7 +392,7 @@ def __init__(
 
         self.emb_layers = nn.ModuleList(emb_layers)
 
-        self.final = nn.Linear(shapes[-1], self._num_classes)
+        self.final = nn.Linear(shapes[-1], self._num_classes, bias=bias)
 
         self.apply(lambda x: init_weights(x, mode=init_mode))
 
@@ -402,6 +414,13 @@ def forward(self, encoder_output):
             pool, emb = layer(pool), layer[:2](pool)
             embs.append(emb)
 
-        out = self.final(pool)
+        if self.angular:
+            for W in self.final.parameters():
+                W = F.normalize(W, p=2, dim=1)
+            out = F.normalize(pool, p=2, dim=1)
+            out = self.final(out)
+
+        else:
+            out = self.final(pool)
 
         return out, embs[-1]

From 41081d892649ef2379562261440e54ee9ab1e3b8 Mon Sep 17 00:00:00 2001
From: nithinraok <nithinrao.koluguri@gmail.com>
Date: Wed, 26 Aug 2020 13:32:56 -0700
Subject: [PATCH 02/12] metrics update

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>
---
 examples/speaker_recognition/spkr_get_emb.py |  4 +-
 nemo/collections/asr/models/label_models.py  | 52 ++++++++++++--------
 nemo/collections/asr/modules/conv_asr.py     |  8 ++-
 3 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/examples/speaker_recognition/spkr_get_emb.py b/examples/speaker_recognition/spkr_get_emb.py
index de4887c0a262..adff12cbbe60 100644
--- a/examples/speaker_recognition/spkr_get_emb.py
+++ b/examples/speaker_recognition/spkr_get_emb.py
@@ -54,8 +54,8 @@ def main(cfg):
     model_path = os.path.join(log_dir, '..', 'spkr.nemo')
     speaker_model = ExtractSpeakerEmbeddingsModel.restore_from(model_path)
     speaker_model.setup_test_data(cfg.model.test_ds)
-
-    trainer.test(speaker_model)
+    if speaker_model.prepare_test(trainer):
+        trainer.test(speaker_model)
 
 
 if __name__ == '__main__':
diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py
index f7d363a44438..1aefbfe31280 100644
--- a/nemo/collections/asr/models/label_models.py
+++ b/nemo/collections/asr/models/label_models.py
@@ -17,7 +17,6 @@
 import pickle as pkl
 from typing import Dict, Optional, Union
 
-import numpy as np
 import torch
 from omegaconf import DictConfig
 from pytorch_lightning import Trainer
@@ -27,6 +26,7 @@
 from nemo.collections.asr.parts.features import WaveformFeaturizer
 from nemo.collections.asr.parts.perturb import process_augmentations
 from nemo.collections.common.losses import CrossEntropyLoss as CELoss
+from nemo.collections.common.metrics import TopKClassificationAccuracy, compute_topk_accuracy
 from nemo.core.classes import ModelPT
 from nemo.core.classes.common import typecheck
 from nemo.core.neural_types import *
@@ -59,6 +59,8 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None):
         else:
             logging.info("Training with Softmax-CrossEntropy loss")
             self.loss = CELoss()
+        
+        self._accuracy = TopKClassificationAccuracy()
 
     def __setup_dataloader_from_config(self, config: Optional[Dict]):
         if 'augmentor' in config:
@@ -147,38 +149,48 @@ def forward(self, input_signal, input_signal_length):
         return logits, embs
 
     # PTL-specific methods
-    def training_step(self, batch, batch_nb):
+    def training_step(self, batch, batch_idx ):
         audio_signal, audio_signal_len, labels, _ = batch
         logits, _ = self.forward(input_signal=audio_signal, input_signal_length=audio_signal_len)
         loss_value = self.loss(logits=logits, labels=labels)
-        labels_hat = torch.argmax(logits, dim=1)
-        n_correct_pred = torch.sum(labels == labels_hat, dim=0).item()
-        tensorboard_logs = {'train_loss': loss_value, 'training_batch_acc': (n_correct_pred / len(labels)) * 100}
 
-        return {'loss': loss_value, 'log': tensorboard_logs, "n_correct_pred": n_correct_pred, "n_pred": len(labels)}
+        tensorboard_logs = {
+            'train_loss': loss_value,
+            'learning_rate': self._optimizer.param_groups[0]['lr'],
+        }
+
+        correct_counts, total_counts = self._accuracy(logits=logits, labels=labels)
+
+        for ki in range(correct_counts.shape[-1]):
+            correct_count = correct_counts[ki]
+            total_count = total_counts[ki]
+            top_k = self._accuracy.top_k[ki]
 
-    def training_epoch_end(self, outputs):
-        train_acc = (sum([x['n_correct_pred'] for x in outputs]) / sum(x['n_pred'] for x in outputs)) * 100
-        tensorboard_logs = {'train_acc': train_acc}
+            tensorboard_logs['training_batch_accuracy_top@{}'.format(top_k)] = correct_count / float(total_count)
 
-        return {'train_acc': train_acc, 'log': tensorboard_logs}
+        return {'loss': loss_value, 'log': tensorboard_logs}
 
-    def validation_step(self, batch, batch_idx):
+    def validation_step(self, batch, batch_idx, dataloader_idx: int =0):
         audio_signal, audio_signal_len, labels, _ = batch
         logits, _ = self.forward(input_signal=audio_signal, input_signal_length=audio_signal_len)
         loss_value = self.loss(logits=logits, labels=labels)
-        labels_hat = torch.argmax(logits, dim=1)
-        n_correct_pred = torch.sum(labels == labels_hat, dim=0).item()
+        correct_counts, total_counts = self._accuracy(logits=logits, labels=labels)
+        return {'val_loss': loss_value, 'val_correct_counts': correct_counts, 'val_total_counts': total_counts}
 
-        return {'val_loss': loss_value, "n_correct_pred": n_correct_pred, "n_pred": len(labels)}
-
-    def validation_epoch_end(self, outputs):
+    def multi_validation_epoch_end(self, outputs, dataloader_idx: int=0):
         val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean()
-        val_acc = (sum([x['n_correct_pred'] for x in outputs]) / sum(x['n_pred'] for x in outputs)) * 100
-        logging.info("validation accuracy {:.3f}".format(val_acc))
-        tensorboard_logs = {'validation_loss': val_loss_mean, 'validation_acc': val_acc}
+        correct_counts = torch.stack([x['val_correct_counts'] for x in outputs])
+        total_counts = torch.stack([x['val_total_counts'] for x in outputs])
+
+        topk_scores = compute_topk_accuracy(correct_counts, total_counts)
+
+        tensorboard_log = {'val_loss': val_loss_mean}
+        logging.info("val_loss{}".format(val_loss_mean)) 
+        for top_k, score in zip(self._accuracy.top_k, topk_scores):
+            tensorboard_log['val_epoch_top@{}'.format(top_k)] = score
+            logging.info("val_epoch_top@{}: {}".format(top_k,score))
 
-        return {'val_loss': val_loss_mean, 'log': tensorboard_logs}
+        return {'log': tensorboard_log}
 
     def test_step(self, batch, batch_ix):
         audio_signal, audio_signal_len, labels, _ = batch
diff --git a/nemo/collections/asr/modules/conv_asr.py b/nemo/collections/asr/modules/conv_asr.py
index 68d497a0e15b..40233a4fba1b 100644
--- a/nemo/collections/asr/modules/conv_asr.py
+++ b/nemo/collections/asr/modules/conv_asr.py
@@ -417,10 +417,8 @@ def forward(self, encoder_output):
         if self.angular:
             for W in self.final.parameters():
                 W = F.normalize(W, p=2, dim=1)
-            out = F.normalize(pool, p=2, dim=1)
-            out = self.final(out)
-
-        else:
-            out = self.final(pool)
+            pool = F.normalize(pool, p=2, dim=1)
+        
+        out = self.final(pool)
 
         return out, embs[-1]

From b97b9408c911de788e7f21f025024d8129ff9777 Mon Sep 17 00:00:00 2001
From: nithinraok <nithinrao.koluguri@gmail.com>
Date: Wed, 26 Aug 2020 14:51:22 -0700
Subject: [PATCH 03/12] metric update

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>
---
 examples/speaker_recognition/spkr_get_emb.py |  3 +--
 nemo/collections/asr/data/audio_to_label.py  |  1 +
 nemo/collections/asr/models/label_models.py  | 12 ++++++------
 nemo/collections/asr/modules/conv_asr.py     |  2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/examples/speaker_recognition/spkr_get_emb.py b/examples/speaker_recognition/spkr_get_emb.py
index adff12cbbe60..c2560748348e 100644
--- a/examples/speaker_recognition/spkr_get_emb.py
+++ b/examples/speaker_recognition/spkr_get_emb.py
@@ -54,8 +54,7 @@ def main(cfg):
     model_path = os.path.join(log_dir, '..', 'spkr.nemo')
     speaker_model = ExtractSpeakerEmbeddingsModel.restore_from(model_path)
     speaker_model.setup_test_data(cfg.model.test_ds)
-    if speaker_model.prepare_test(trainer):
-        trainer.test(speaker_model)
+    trainer.test(speaker_model)
 
 
 if __name__ == '__main__':
diff --git a/nemo/collections/asr/data/audio_to_label.py b/nemo/collections/asr/data/audio_to_label.py
index 5a55210fa0bb..1945470bc915 100644
--- a/nemo/collections/asr/data/audio_to_label.py
+++ b/nemo/collections/asr/data/audio_to_label.py
@@ -96,6 +96,7 @@ def __init__(
         self.trim = trim
         self.load_audio = load_audio
         self.time_length = time_length
+        logging.info("Timelength considered for collate func is {}".format(time_length))
 
         self.labels = labels if labels else self.collection.uniq_labels
         self.num_classes = len(self.labels)
diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py
index 1aefbfe31280..a152422381f3 100644
--- a/nemo/collections/asr/models/label_models.py
+++ b/nemo/collections/asr/models/label_models.py
@@ -59,7 +59,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None):
         else:
             logging.info("Training with Softmax-CrossEntropy loss")
             self.loss = CELoss()
-        
+
         self._accuracy = TopKClassificationAccuracy()
 
     def __setup_dataloader_from_config(self, config: Optional[Dict]):
@@ -149,7 +149,7 @@ def forward(self, input_signal, input_signal_length):
         return logits, embs
 
     # PTL-specific methods
-    def training_step(self, batch, batch_idx ):
+    def training_step(self, batch, batch_idx):
         audio_signal, audio_signal_len, labels, _ = batch
         logits, _ = self.forward(input_signal=audio_signal, input_signal_length=audio_signal_len)
         loss_value = self.loss(logits=logits, labels=labels)
@@ -170,14 +170,14 @@ def training_step(self, batch, batch_idx ):
 
         return {'loss': loss_value, 'log': tensorboard_logs}
 
-    def validation_step(self, batch, batch_idx, dataloader_idx: int =0):
+    def validation_step(self, batch, batch_idx, dataloader_idx: int = 0):
         audio_signal, audio_signal_len, labels, _ = batch
         logits, _ = self.forward(input_signal=audio_signal, input_signal_length=audio_signal_len)
         loss_value = self.loss(logits=logits, labels=labels)
         correct_counts, total_counts = self._accuracy(logits=logits, labels=labels)
         return {'val_loss': loss_value, 'val_correct_counts': correct_counts, 'val_total_counts': total_counts}
 
-    def multi_validation_epoch_end(self, outputs, dataloader_idx: int=0):
+    def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0):
         val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean()
         correct_counts = torch.stack([x['val_correct_counts'] for x in outputs])
         total_counts = torch.stack([x['val_total_counts'] for x in outputs])
@@ -185,10 +185,10 @@ def multi_validation_epoch_end(self, outputs, dataloader_idx: int=0):
         topk_scores = compute_topk_accuracy(correct_counts, total_counts)
 
         tensorboard_log = {'val_loss': val_loss_mean}
-        logging.info("val_loss{}".format(val_loss_mean)) 
+        logging.info("val_loss{}".format(val_loss_mean))
         for top_k, score in zip(self._accuracy.top_k, topk_scores):
             tensorboard_log['val_epoch_top@{}'.format(top_k)] = score
-            logging.info("val_epoch_top@{}: {}".format(top_k,score))
+            logging.info("val_epoch_top@{}: {}".format(top_k, score))
 
         return {'log': tensorboard_log}
 
diff --git a/nemo/collections/asr/modules/conv_asr.py b/nemo/collections/asr/modules/conv_asr.py
index 40233a4fba1b..2c4d6bf4da33 100644
--- a/nemo/collections/asr/modules/conv_asr.py
+++ b/nemo/collections/asr/modules/conv_asr.py
@@ -418,7 +418,7 @@ def forward(self, encoder_output):
             for W in self.final.parameters():
                 W = F.normalize(W, p=2, dim=1)
             pool = F.normalize(pool, p=2, dim=1)
-        
+
         out = self.final(pool)
 
         return out, embs[-1]

From 2ba342edd519511e8576be12d49a2d3d4e551e08 Mon Sep 17 00:00:00 2001
From: nithinraok <nithinrao.koluguri@gmail.com>
Date: Fri, 28 Aug 2020 14:10:13 -0700
Subject: [PATCH 04/12] logvallcallback

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>
---
 examples/speaker_recognition/speaker_reco.py   |  2 ++
 nemo/collections/asr/models/label_models.py    | 18 ++++++++++++------
 nemo/collections/common/callbacks/__init__.py  |  3 ++-
 nemo/collections/common/callbacks/callbacks.py | 15 +++++++++++++++
 4 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/examples/speaker_recognition/speaker_reco.py b/examples/speaker_recognition/speaker_reco.py
index 3259a6512b56..50440f8115d0 100644
--- a/examples/speaker_recognition/speaker_reco.py
+++ b/examples/speaker_recognition/speaker_reco.py
@@ -21,6 +21,7 @@
 from nemo.core.config import hydra_runner
 from nemo.utils import logging
 from nemo.utils.exp_manager import exp_manager
+from nemo.collections.common.callbacks import LogTrainValidLossCallback
 
 """
 Basic run (on CPU for 50 epochs):
@@ -52,6 +53,7 @@ def main(cfg):
     trainer = pl.Trainer(**cfg.trainer)
     log_dir = exp_manager(trainer, cfg.get("exp_manager", None))
     speaker_model = EncDecSpeakerLabelModel(cfg=cfg.model, trainer=trainer)
+    trainer.callbacks.extend([LogTrainValidLossCallback()])
     trainer.fit(speaker_model)
     model_path = os.path.join(log_dir, '..', 'spkr.nemo')
     speaker_model.save_to(model_path)
diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py
index a152422381f3..ef37d04bcebd 100644
--- a/nemo/collections/asr/models/label_models.py
+++ b/nemo/collections/asr/models/label_models.py
@@ -150,12 +150,13 @@ def forward(self, input_signal, input_signal_length):
 
     # PTL-specific methods
     def training_step(self, batch, batch_idx):
+        # import ipdb; ipdb.set_trace()
         audio_signal, audio_signal_len, labels, _ = batch
         logits, _ = self.forward(input_signal=audio_signal, input_signal_length=audio_signal_len)
-        loss_value = self.loss(logits=logits, labels=labels)
+        self.loss_value = self.loss(logits=logits, labels=labels)
 
         tensorboard_logs = {
-            'train_loss': loss_value,
+            'train_loss': self.loss_value,
             'learning_rate': self._optimizer.param_groups[0]['lr'],
         }
 
@@ -168,14 +169,19 @@ def training_step(self, batch, batch_idx):
 
             tensorboard_logs['training_batch_accuracy_top@{}'.format(top_k)] = correct_count / float(total_count)
 
-        return {'loss': loss_value, 'log': tensorboard_logs}
+        return {'loss': self.loss_value, 'log': tensorboard_logs}
+    
+    # def training_epoch_end(self,outputs):
+    #     val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean()
+    #     logging.info("")
+        
 
     def validation_step(self, batch, batch_idx, dataloader_idx: int = 0):
         audio_signal, audio_signal_len, labels, _ = batch
         logits, _ = self.forward(input_signal=audio_signal, input_signal_length=audio_signal_len)
-        loss_value = self.loss(logits=logits, labels=labels)
+        self.loss_value = self.loss(logits=logits, labels=labels)
         correct_counts, total_counts = self._accuracy(logits=logits, labels=labels)
-        return {'val_loss': loss_value, 'val_correct_counts': correct_counts, 'val_total_counts': total_counts}
+        return {'val_loss': self.loss_value, 'val_correct_counts': correct_counts, 'val_total_counts': total_counts}
 
     def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0):
         val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean()
@@ -185,7 +191,7 @@ def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0):
         topk_scores = compute_topk_accuracy(correct_counts, total_counts)
 
         tensorboard_log = {'val_loss': val_loss_mean}
-        logging.info("val_loss{}".format(val_loss_mean))
+        logging.info("val_loss {}".format(val_loss_mean))
         for top_k, score in zip(self._accuracy.top_k, topk_scores):
             tensorboard_log['val_epoch_top@{}'.format(top_k)] = score
             logging.info("val_epoch_top@{}: {}".format(top_k, score))
diff --git a/nemo/collections/common/callbacks/__init__.py b/nemo/collections/common/callbacks/__init__.py
index 9ad5c9c85a5f..ef03e582cba7 100644
--- a/nemo/collections/common/callbacks/__init__.py
+++ b/nemo/collections/common/callbacks/__init__.py
@@ -12,4 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from nemo.collections.common.callbacks.callbacks import LogEpochTimeCallback
+from nemo.collections.common.callbacks.callbacks import LogEpochTimeCallback,LogTrainValidLossCallback
+
diff --git a/nemo/collections/common/callbacks/callbacks.py b/nemo/collections/common/callbacks/callbacks.py
index 55fa5c50a1c5..153d7ce2a844 100644
--- a/nemo/collections/common/callbacks/callbacks.py
+++ b/nemo/collections/common/callbacks/callbacks.py
@@ -15,6 +15,7 @@
 
 from pytorch_lightning.callbacks.base import Callback
 from pytorch_lightning.utilities import rank_zero_only
+from nemo.utils import logging
 
 
 class LogEpochTimeCallback(Callback):
@@ -30,3 +31,17 @@ def on_epoch_end(self, trainer, pl_module):
         curr_time = time.time()
         duration = curr_time - self.epoch_start
         trainer.logger.log_metrics({"epoch_time": duration}, step=trainer.global_step)
+
+class LogTrainValidLossCallback(Callback):
+
+    @rank_zero_only
+    def on_epoch_start(self, trainer, pl_module):
+        logging.info(" Training started")
+    
+    @rank_zero_only
+    def on_train_epoch_start(self,trainer,pl_module):
+        print_freq = trainer.row_log_interval
+        logging.info("batch_idx")
+        if 4 % print_freq == 0:
+            logging.info("Epoch: {} batch: {} train_loss: {}".format(trainer.current_epoch,1,pl_module.loss))
+

From 0bb0092a130d3ac81d01e90e9f68151b03a624fe Mon Sep 17 00:00:00 2001
From: nithinraok <nithinrao.koluguri@gmail.com>
Date: Fri, 28 Aug 2020 18:04:00 -0700
Subject: [PATCH 05/12] CallbackManager

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>
---
 examples/speaker_recognition/speaker_reco.py  |  9 +++-
 nemo/collections/asr/models/label_models.py   | 15 +++---
 nemo/collections/common/callbacks/__init__.py |  7 ++-
 .../collections/common/callbacks/callbacks.py | 53 ++++++++++++++++---
 4 files changed, 64 insertions(+), 20 deletions(-)

diff --git a/examples/speaker_recognition/speaker_reco.py b/examples/speaker_recognition/speaker_reco.py
index 50440f8115d0..2cf7fd0553d4 100644
--- a/examples/speaker_recognition/speaker_reco.py
+++ b/examples/speaker_recognition/speaker_reco.py
@@ -18,10 +18,10 @@
 from pytorch_lightning import seed_everything
 
 from nemo.collections.asr.models import EncDecSpeakerLabelModel
+from nemo.collections.common.callbacks import CallbackManager
 from nemo.core.config import hydra_runner
 from nemo.utils import logging
 from nemo.utils.exp_manager import exp_manager
-from nemo.collections.common.callbacks import LogTrainValidLossCallback
 
 """
 Basic run (on CPU for 50 epochs):
@@ -51,9 +51,14 @@ def main(cfg):
 
     logging.info(f'Hydra config: {cfg.pretty()}')
     trainer = pl.Trainer(**cfg.trainer)
+
+    callbacks = ['LogEpochTimeCallback()', 'LogTrainValidLossCallback()']
+    callback_mgr = CallbackManager()
+    callbacks = callback_mgr.add_callback(callbacks)
+    trainer.callbacks.extend(callbacks)
+
     log_dir = exp_manager(trainer, cfg.get("exp_manager", None))
     speaker_model = EncDecSpeakerLabelModel(cfg=cfg.model, trainer=trainer)
-    trainer.callbacks.extend([LogTrainValidLossCallback()])
     trainer.fit(speaker_model)
     model_path = os.path.join(log_dir, '..', 'spkr.nemo')
     speaker_model.save_to(model_path)
diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py
index ef37d04bcebd..50ec7ee6d66f 100644
--- a/nemo/collections/asr/models/label_models.py
+++ b/nemo/collections/asr/models/label_models.py
@@ -60,7 +60,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None):
             logging.info("Training with Softmax-CrossEntropy loss")
             self.loss = CELoss()
 
-        self._accuracy = TopKClassificationAccuracy()
+        self._accuracy = TopKClassificationAccuracy(top_k=[1])
 
     def __setup_dataloader_from_config(self, config: Optional[Dict]):
         if 'augmentor' in config:
@@ -166,15 +166,15 @@ def training_step(self, batch, batch_idx):
             correct_count = correct_counts[ki]
             total_count = total_counts[ki]
             top_k = self._accuracy.top_k[ki]
+            self.accuracy = (correct_count / float(total_count)) * 100
 
-            tensorboard_logs['training_batch_accuracy_top@{}'.format(top_k)] = correct_count / float(total_count)
+            tensorboard_logs['training_batch_accuracy_top@{}'.format(top_k)] = self.accuracy
 
         return {'loss': self.loss_value, 'log': tensorboard_logs}
-    
+
     # def training_epoch_end(self,outputs):
     #     val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean()
     #     logging.info("")
-        
 
     def validation_step(self, batch, batch_idx, dataloader_idx: int = 0):
         audio_signal, audio_signal_len, labels, _ = batch
@@ -184,17 +184,16 @@ def validation_step(self, batch, batch_idx, dataloader_idx: int = 0):
         return {'val_loss': self.loss_value, 'val_correct_counts': correct_counts, 'val_total_counts': total_counts}
 
     def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0):
-        val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean()
+        self.val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean()
         correct_counts = torch.stack([x['val_correct_counts'] for x in outputs])
         total_counts = torch.stack([x['val_total_counts'] for x in outputs])
 
         topk_scores = compute_topk_accuracy(correct_counts, total_counts)
 
-        tensorboard_log = {'val_loss': val_loss_mean}
-        logging.info("val_loss {}".format(val_loss_mean))
+        tensorboard_log = {'val_loss': self.val_loss_mean}
         for top_k, score in zip(self._accuracy.top_k, topk_scores):
             tensorboard_log['val_epoch_top@{}'.format(top_k)] = score
-            logging.info("val_epoch_top@{}: {}".format(top_k, score))
+            self.accuracy = score * 100
 
         return {'log': tensorboard_log}
 
diff --git a/nemo/collections/common/callbacks/__init__.py b/nemo/collections/common/callbacks/__init__.py
index ef03e582cba7..92393d04ec53 100644
--- a/nemo/collections/common/callbacks/__init__.py
+++ b/nemo/collections/common/callbacks/__init__.py
@@ -12,5 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from nemo.collections.common.callbacks.callbacks import LogEpochTimeCallback,LogTrainValidLossCallback
-
+from nemo.collections.common.callbacks.callbacks import (
+    CallbackManager,
+    LogEpochTimeCallback,
+    LogTrainValidLossCallback,
+)
diff --git a/nemo/collections/common/callbacks/callbacks.py b/nemo/collections/common/callbacks/callbacks.py
index 153d7ce2a844..46ead373cf6a 100644
--- a/nemo/collections/common/callbacks/callbacks.py
+++ b/nemo/collections/common/callbacks/callbacks.py
@@ -12,12 +12,35 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import time
+from typing import List, Union
 
 from pytorch_lightning.callbacks.base import Callback
 from pytorch_lightning.utilities import rank_zero_only
+
 from nemo.utils import logging
 
 
+class CallbackManager:
+    def __init__(self) -> None:
+        self.callbacks = set(['LogEpochTimeCallback()', 'LogTrainValidLossCallback()'])
+
+    def get_callback(self, callback_name: str):
+        if callback_name in self.callbacks:
+            return eval(callback_name)
+        else:
+            raise NameError("Provided Callback name is not part of nemo Callback system")
+
+    def add_callback(self, callback_names: Union[str, List]):
+        if type(callback_names) is str:
+            callback_names = callback_names.split(',')
+
+        callbacks = []
+        for name in callback_names:
+            callbacks.append(self.get_callback(name))
+
+        return callbacks
+
+
 class LogEpochTimeCallback(Callback):
     """Simple callback that logs how long each epoch takes, in seconds, to a pytorch lightning log
     """
@@ -32,16 +55,30 @@ def on_epoch_end(self, trainer, pl_module):
         duration = curr_time - self.epoch_start
         trainer.logger.log_metrics({"epoch_time": duration}, step=trainer.global_step)
 
-class LogTrainValidLossCallback(Callback):
 
+class LogTrainValidLossCallback(Callback):
     @rank_zero_only
-    def on_epoch_start(self, trainer, pl_module):
-        logging.info(" Training started")
-    
+    def on_train_start(self, trainer, pl_module):
+        logging.info("Training started")
+
     @rank_zero_only
-    def on_train_epoch_start(self,trainer,pl_module):
+    def on_train_batch_end(self, trainer, pl_module, batch, batch_idx, dataloader_idx):
         print_freq = trainer.row_log_interval
-        logging.info("batch_idx")
-        if 4 % print_freq == 0:
-            logging.info("Epoch: {} batch: {} train_loss: {}".format(trainer.current_epoch,1,pl_module.loss))
+        if batch_idx % print_freq == 0:
+            logging.info(
+                "Epoch: {}/{} batch: {}/{} train_loss: {:.3f} train_acc: {:.2f}".format(
+                    trainer.current_epoch + 1,
+                    trainer.max_epochs,
+                    batch_idx + 1,
+                    trainer.num_training_batches,
+                    pl_module.loss_value,
+                    pl_module.accuracy,
+                )
+            )
 
+    def on_validation_epoch_end(self, trainer, pl_module):
+        logging.info(
+            "----> Epoch: {}/{} val_loss: {:.3f} val_acc: {:.2f} <----".format(
+                trainer.current_epoch + 1, trainer.max_epochs, pl_module.val_loss_mean, pl_module.accuracy
+            )
+        )

From 7d8bdf65b55afa00cf7cae10261b9ea548f3cfdf Mon Sep 17 00:00:00 2001
From: nithinraok <nithinrao.koluguri@gmail.com>
Date: Tue, 1 Sep 2020 11:36:50 -0700
Subject: [PATCH 06/12] Updated spkr_get_emb to support diarization

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>
---
 nemo/collections/asr/data/audio_to_label.py   | 53 +++++++++++++++++++
 nemo/collections/asr/models/label_models.py   | 16 +++---
 .../collections/common/callbacks/callbacks.py | 26 ++-------
 nemo/utils/exp_manager.py                     | 22 ++++++++
 4 files changed, 87 insertions(+), 30 deletions(-)

diff --git a/nemo/collections/asr/data/audio_to_label.py b/nemo/collections/asr/data/audio_to_label.py
index 1945470bc915..c04649752b30 100644
--- a/nemo/collections/asr/data/audio_to_label.py
+++ b/nemo/collections/asr/data/audio_to_label.py
@@ -154,6 +154,59 @@ def fixed_seq_collate_fn(self, batch):
         tokens_lengths = torch.stack(tokens_lengths)
 
         return audio_signal, audio_lengths, tokens, tokens_lengths
+    
+    def sliced_seq_collate_fn(self, batch):
+        """collate batch of audio sig, audio len, tokens, tokens len
+        Args:
+            batch (Optional[FloatTensor], Optional[LongTensor], LongTensor,
+                LongTensor):  A tuple of tuples of signal, signal lengths,
+                encoded tokens, and encoded tokens length.  This collate func
+                assumes the signals are 1d torch tensors (i.e. mono audio).
+            fixed_length (Optional[int]): length of input signal to be considered
+        """
+        slice_length = self.featurizer.sample_rate * self.time_length
+        _, audio_lengths, _, tokens_lengths = zip(*batch)
+        slice_length = min(slice_length, max(audio_lengths))
+        shift = 1 * 16000
+        has_audio = audio_lengths[0] is not None
+
+        audio_signal, num_slices, tokens, audio_lengths = [], [], [], []
+        for sig, sig_len, tokens_i, _ in batch:
+            if has_audio:
+                sig_len = sig_len.item()
+                slices = sig_len // slice_length
+                if slices <= 0:
+
+                    repeat = slice_length // sig_len
+                    rem = slice_length % sig_len
+                    sub = sig[-rem:] if rem > 0 else torch.tensor([])
+                    rep_sig = torch.cat(repeat * [sig])
+                    signal = torch.cat((rep_sig, sub))
+                    audio_signal.append(signal)
+                    num_slices.append(1)  # single embedding
+                    tokens.extend([tokens_i] * 1)
+                    audio_lengths.extend([slice_length] * 1)
+                else:
+                    slices = (sig_len - slice_length) // shift + 1
+                    for slice_id in range(slices):
+                        start_idx = slice_id * shift
+                        end_idx = start_idx + slice_length
+                        signal = sig[start_idx:end_idx]
+                        audio_signal.append(signal)
+
+                    num_slices.append(slices)
+                    tokens.extend([tokens_i] * slices)
+                    audio_lengths.extend([slice_length] * slices)
+
+        if has_audio:
+            audio_signal = torch.stack(audio_signal)
+            audio_lengths = torch.tensor(audio_lengths)
+        else:
+            audio_signal, audio_lengths = None, None
+        tokens = torch.stack(tokens)
+        tokens_lengths = torch.tensor(num_slices)  # each embedding length
+
+        return audio_signal, audio_lengths, tokens, tokens_lengths
 
     def __len__(self):
         return len(self.collection)
diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py
index 50ec7ee6d66f..56ab2200959a 100644
--- a/nemo/collections/asr/models/label_models.py
+++ b/nemo/collections/asr/models/label_models.py
@@ -172,10 +172,6 @@ def training_step(self, batch, batch_idx):
 
         return {'loss': self.loss_value, 'log': tensorboard_logs}
 
-    # def training_epoch_end(self,outputs):
-    #     val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean()
-    #     logging.info("")
-
     def validation_step(self, batch, batch_idx, dataloader_idx: int = 0):
         audio_signal, audio_signal_len, labels, _ = batch
         logits, _ = self.forward(input_signal=audio_signal, input_signal_length=audio_signal_len)
@@ -213,16 +209,17 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None):
         super().__init__(cfg=cfg, trainer=trainer)
 
     def test_step(self, batch, batch_ix):
-        audio_signal, audio_signal_len, labels, _ = batch
+        audio_signal, audio_signal_len, labels, slices = batch
         _, embs = self.forward(input_signal=audio_signal, input_signal_length=audio_signal_len)
-        return {'embs': embs, 'labels': labels}
+        return {'embs': embs, 'labels': labels, 'slices': slices}
 
     def test_epoch_end(self, outputs):
         embs = torch.cat([x['embs'] for x in outputs])
+        slices = torch.cat([x['slices'] for x in outputs])
         emb_shape = embs.shape[-1]
         embs = embs.view(-1, emb_shape).cpu().numpy()
         out_embeddings = {}
-
+        start_idx=0
         with open(self.test_manifest, 'r') as manifest:
             for idx, line in enumerate(manifest.readlines()):
                 line = line.strip()
@@ -231,7 +228,10 @@ def test_epoch_end(self, outputs):
                 uniq_name = '@'.join(structure)
                 if uniq_name in out_embeddings:
                     raise KeyError("Embeddings for label {} already present in emb dictionary".format(uniq_name))
-                out_embeddings[uniq_name] = embs[idx]
+                num_slices = slices[idx]
+                end_idx = start_idx+num_slices
+                out_embeddings[uniq_name] = embs[start_idx:end_idx].mean(axis=0)
+                start_idx = end_idx
 
         embedding_dir = os.path.join(self.embedding_dir, 'embeddings')
         if not os.path.exists(embedding_dir):
diff --git a/nemo/collections/common/callbacks/callbacks.py b/nemo/collections/common/callbacks/callbacks.py
index 46ead373cf6a..e890727bcc17 100644
--- a/nemo/collections/common/callbacks/callbacks.py
+++ b/nemo/collections/common/callbacks/callbacks.py
@@ -20,27 +20,6 @@
 from nemo.utils import logging
 
 
-class CallbackManager:
-    def __init__(self) -> None:
-        self.callbacks = set(['LogEpochTimeCallback()', 'LogTrainValidLossCallback()'])
-
-    def get_callback(self, callback_name: str):
-        if callback_name in self.callbacks:
-            return eval(callback_name)
-        else:
-            raise NameError("Provided Callback name is not part of nemo Callback system")
-
-    def add_callback(self, callback_names: Union[str, List]):
-        if type(callback_names) is str:
-            callback_names = callback_names.split(',')
-
-        callbacks = []
-        for name in callback_names:
-            callbacks.append(self.get_callback(name))
-
-        return callbacks
-
-
 class LogEpochTimeCallback(Callback):
     """Simple callback that logs how long each epoch takes, in seconds, to a pytorch lightning log
     """
@@ -64,13 +43,16 @@ def on_train_start(self, trainer, pl_module):
     @rank_zero_only
     def on_train_batch_end(self, trainer, pl_module, batch, batch_idx, dataloader_idx):
         print_freq = trainer.row_log_interval
+        total_batches = trainer.num_training_batches
+        if 0 < print_freq < 1:
+            print_freq = int(total_batches*print_freq)
         if batch_idx % print_freq == 0:
             logging.info(
                 "Epoch: {}/{} batch: {}/{} train_loss: {:.3f} train_acc: {:.2f}".format(
                     trainer.current_epoch + 1,
                     trainer.max_epochs,
                     batch_idx + 1,
-                    trainer.num_training_batches,
+                    total_batches,
                     pl_module.loss_value,
                     pl_module.accuracy,
                 )
diff --git a/nemo/utils/exp_manager.py b/nemo/utils/exp_manager.py
index 11a7c3ea3ba7..4433dd664b83 100644
--- a/nemo/utils/exp_manager.py
+++ b/nemo/utils/exp_manager.py
@@ -36,6 +36,28 @@
 from nemo.utils.lightning_logger_patch import add_filehandlers_to_pl_logger
 
 
+class CallbackManager:
+    def __init__(self) -> None:
+        self.callbacks = set(['LogEpochTimeCallback()', 'LogTrainValidLossCallback()'])
+
+    def get_callback(self, callback_name: str):
+        if callback_name in self.callbacks:
+            return eval(callback_name)
+        else:
+            raise NameError("Provided Callback name is not part of nemo Callback system")
+
+    def add_callback(self, callback_names: Union[str, List]):
+        if type(callback_names) is str:
+            callback_names = callback_names.split(',')
+
+        callbacks = []
+        for name in callback_names:
+            callbacks.append(self.get_callback(name))
+
+        return callbacks
+
+
+
 class NotFoundError(NeMoBaseException):
     """ Raised when a file or folder is not found"""
 

From 965a5176474871d924caed3ffe77f4904e7e23da Mon Sep 17 00:00:00 2001
From: nithinraok <nithinrao.koluguri@gmail.com>
Date: Tue, 1 Sep 2020 12:46:29 -0700
Subject: [PATCH 07/12] callback in expmanager

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>
---
 examples/speaker_recognition/speaker_reco.py  |  7 ----
 nemo/collections/asr/data/audio_to_label.py   |  2 +-
 nemo/collections/asr/models/label_models.py   |  4 +-
 nemo/collections/common/callbacks/__init__.py |  2 +-
 .../collections/common/callbacks/callbacks.py |  9 ++++-
 nemo/core/classes/modelPT.py                  |  1 +
 nemo/utils/exp_manager.py                     | 37 ++++++++-----------
 7 files changed, 27 insertions(+), 35 deletions(-)

diff --git a/examples/speaker_recognition/speaker_reco.py b/examples/speaker_recognition/speaker_reco.py
index 2cf7fd0553d4..3259a6512b56 100644
--- a/examples/speaker_recognition/speaker_reco.py
+++ b/examples/speaker_recognition/speaker_reco.py
@@ -18,7 +18,6 @@
 from pytorch_lightning import seed_everything
 
 from nemo.collections.asr.models import EncDecSpeakerLabelModel
-from nemo.collections.common.callbacks import CallbackManager
 from nemo.core.config import hydra_runner
 from nemo.utils import logging
 from nemo.utils.exp_manager import exp_manager
@@ -51,12 +50,6 @@ def main(cfg):
 
     logging.info(f'Hydra config: {cfg.pretty()}')
     trainer = pl.Trainer(**cfg.trainer)
-
-    callbacks = ['LogEpochTimeCallback()', 'LogTrainValidLossCallback()']
-    callback_mgr = CallbackManager()
-    callbacks = callback_mgr.add_callback(callbacks)
-    trainer.callbacks.extend(callbacks)
-
     log_dir = exp_manager(trainer, cfg.get("exp_manager", None))
     speaker_model = EncDecSpeakerLabelModel(cfg=cfg.model, trainer=trainer)
     trainer.fit(speaker_model)
diff --git a/nemo/collections/asr/data/audio_to_label.py b/nemo/collections/asr/data/audio_to_label.py
index c04649752b30..28929375c741 100644
--- a/nemo/collections/asr/data/audio_to_label.py
+++ b/nemo/collections/asr/data/audio_to_label.py
@@ -154,7 +154,7 @@ def fixed_seq_collate_fn(self, batch):
         tokens_lengths = torch.stack(tokens_lengths)
 
         return audio_signal, audio_lengths, tokens, tokens_lengths
-    
+
     def sliced_seq_collate_fn(self, batch):
         """collate batch of audio sig, audio len, tokens, tokens len
         Args:
diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py
index 56ab2200959a..4dd3ea9ad1b0 100644
--- a/nemo/collections/asr/models/label_models.py
+++ b/nemo/collections/asr/models/label_models.py
@@ -219,7 +219,7 @@ def test_epoch_end(self, outputs):
         emb_shape = embs.shape[-1]
         embs = embs.view(-1, emb_shape).cpu().numpy()
         out_embeddings = {}
-        start_idx=0
+        start_idx = 0
         with open(self.test_manifest, 'r') as manifest:
             for idx, line in enumerate(manifest.readlines()):
                 line = line.strip()
@@ -229,7 +229,7 @@ def test_epoch_end(self, outputs):
                 if uniq_name in out_embeddings:
                     raise KeyError("Embeddings for label {} already present in emb dictionary".format(uniq_name))
                 num_slices = slices[idx]
-                end_idx = start_idx+num_slices
+                end_idx = start_idx + num_slices
                 out_embeddings[uniq_name] = embs[start_idx:end_idx].mean(axis=0)
                 start_idx = end_idx
 
diff --git a/nemo/collections/common/callbacks/__init__.py b/nemo/collections/common/callbacks/__init__.py
index 92393d04ec53..96e2bff8d4d6 100644
--- a/nemo/collections/common/callbacks/__init__.py
+++ b/nemo/collections/common/callbacks/__init__.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 from nemo.collections.common.callbacks.callbacks import (
-    CallbackManager,
+    AVAILABLE_CALLBACKS,
     LogEpochTimeCallback,
     LogTrainValidLossCallback,
 )
diff --git a/nemo/collections/common/callbacks/callbacks.py b/nemo/collections/common/callbacks/callbacks.py
index e890727bcc17..96a4cc21063d 100644
--- a/nemo/collections/common/callbacks/callbacks.py
+++ b/nemo/collections/common/callbacks/callbacks.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import time
-from typing import List, Union
 
 from pytorch_lightning.callbacks.base import Callback
 from pytorch_lightning.utilities import rank_zero_only
@@ -45,7 +44,7 @@ def on_train_batch_end(self, trainer, pl_module, batch, batch_idx, dataloader_id
         print_freq = trainer.row_log_interval
         total_batches = trainer.num_training_batches
         if 0 < print_freq < 1:
-            print_freq = int(total_batches*print_freq)
+            print_freq = int(total_batches * print_freq)
         if batch_idx % print_freq == 0:
             logging.info(
                 "Epoch: {}/{} batch: {}/{} train_loss: {:.3f} train_acc: {:.2f}".format(
@@ -64,3 +63,9 @@ def on_validation_epoch_end(self, trainer, pl_module):
                 trainer.current_epoch + 1, trainer.max_epochs, pl_module.val_loss_mean, pl_module.accuracy
             )
         )
+
+
+AVAILABLE_CALLBACKS = {
+    'LogEpochTimeCallback': LogEpochTimeCallback(),
+    'LogTrainValidLossCallback': LogTrainValidLossCallback(),
+}
diff --git a/nemo/core/classes/modelPT.py b/nemo/core/classes/modelPT.py
index 489fdee0020b..d8811263cfd6 100644
--- a/nemo/core/classes/modelPT.py
+++ b/nemo/core/classes/modelPT.py
@@ -27,6 +27,7 @@
 from omegaconf import DictConfig, OmegaConf
 from pytorch_lightning import LightningModule, Trainer
 
+from nemo.collections.common import callbacks
 from nemo.core import optim
 from nemo.core.classes.common import Model
 from nemo.core.optim import prepare_lr_scheduler
diff --git a/nemo/utils/exp_manager.py b/nemo/utils/exp_manager.py
index 4433dd664b83..baa14ef10438 100644
--- a/nemo/utils/exp_manager.py
+++ b/nemo/utils/exp_manager.py
@@ -29,6 +29,7 @@
 from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger
 from pytorch_lightning.utilities import rank_zero_only
 
+from nemo.collections.common import callbacks
 from nemo.constants import NEMO_ENV_VARNAME_VERSION
 from nemo.utils import logging
 from nemo.utils.exceptions import NeMoBaseException
@@ -36,28 +37,6 @@
 from nemo.utils.lightning_logger_patch import add_filehandlers_to_pl_logger
 
 
-class CallbackManager:
-    def __init__(self) -> None:
-        self.callbacks = set(['LogEpochTimeCallback()', 'LogTrainValidLossCallback()'])
-
-    def get_callback(self, callback_name: str):
-        if callback_name in self.callbacks:
-            return eval(callback_name)
-        else:
-            raise NameError("Provided Callback name is not part of nemo Callback system")
-
-    def add_callback(self, callback_names: Union[str, List]):
-        if type(callback_names) is str:
-            callback_names = callback_names.split(',')
-
-        callbacks = []
-        for name in callback_names:
-            callbacks.append(self.get_callback(name))
-
-        return callbacks
-
-
-
 class NotFoundError(NeMoBaseException):
     """ Raised when a file or folder is not found"""
 
@@ -97,6 +76,7 @@ class ExpManagerConfig:
     create_checkpoint_callback: Optional[bool] = True
     # Additional exp_manager arguments
     files_to_copy: Optional[List[str]] = None
+    callbacks: Optional[str] = None
 
 
 def exp_manager(trainer: 'pytorch_lightning.Trainer', cfg: Optional[Union[DictConfig, Dict]] = None) -> Path:
@@ -216,6 +196,10 @@ def exp_manager(trainer: 'pytorch_lightning.Trainer', cfg: Optional[Union[DictCo
         if cfg.create_checkpoint_callback:
             configure_checkpointing(trainer, log_dir, checkpoint_name)
 
+        # Add nemo callbacks
+        if cfg.callbacks:
+            add_callbacks(trainer, cfg.callbacks)
+
         # Move files_to_copy to folder and add git information if present
         if cfg.files_to_copy:
             for _file in cfg.files_to_copy:
@@ -577,3 +561,12 @@ def on_train_end(self, trainer, pl_module):
     trainer.configure_checkpoint_callback(checkpoint_callback)
     trainer.callbacks.append(checkpoint_callback)
     trainer.checkpoint_callback = checkpoint_callback
+
+
+def add_callbacks(trainer: 'pytorch_lightning.Trainer', nemo_callbacks: Optional[List[str]]):
+
+    for callback in nemo_callbacks:
+        if callback in callbacks.AVAILABLE_CALLBACKS:
+            trainer.callbacks.append(callbacks.AVAILABLE_CALLBACKS[callback])
+        else:
+            raise NameError(" Request callback is not part of nemo callbacks please check callback name")

From f593c381a641b323ca932a78bddc5fba9ec33709 Mon Sep 17 00:00:00 2001
From: nithinraok <nithinrao.koluguri@gmail.com>
Date: Wed, 2 Sep 2020 10:32:41 -0700
Subject: [PATCH 08/12] Removed callback from exp_mager will push another PR
 for it

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>
---
 nemo/core/classes/modelPT.py |  1 -
 nemo/utils/exp_manager.py    | 15 ---------------
 2 files changed, 16 deletions(-)

diff --git a/nemo/core/classes/modelPT.py b/nemo/core/classes/modelPT.py
index d8811263cfd6..489fdee0020b 100644
--- a/nemo/core/classes/modelPT.py
+++ b/nemo/core/classes/modelPT.py
@@ -27,7 +27,6 @@
 from omegaconf import DictConfig, OmegaConf
 from pytorch_lightning import LightningModule, Trainer
 
-from nemo.collections.common import callbacks
 from nemo.core import optim
 from nemo.core.classes.common import Model
 from nemo.core.optim import prepare_lr_scheduler
diff --git a/nemo/utils/exp_manager.py b/nemo/utils/exp_manager.py
index baa14ef10438..11a7c3ea3ba7 100644
--- a/nemo/utils/exp_manager.py
+++ b/nemo/utils/exp_manager.py
@@ -29,7 +29,6 @@
 from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger
 from pytorch_lightning.utilities import rank_zero_only
 
-from nemo.collections.common import callbacks
 from nemo.constants import NEMO_ENV_VARNAME_VERSION
 from nemo.utils import logging
 from nemo.utils.exceptions import NeMoBaseException
@@ -76,7 +75,6 @@ class ExpManagerConfig:
     create_checkpoint_callback: Optional[bool] = True
     # Additional exp_manager arguments
     files_to_copy: Optional[List[str]] = None
-    callbacks: Optional[str] = None
 
 
 def exp_manager(trainer: 'pytorch_lightning.Trainer', cfg: Optional[Union[DictConfig, Dict]] = None) -> Path:
@@ -196,10 +194,6 @@ def exp_manager(trainer: 'pytorch_lightning.Trainer', cfg: Optional[Union[DictCo
         if cfg.create_checkpoint_callback:
             configure_checkpointing(trainer, log_dir, checkpoint_name)
 
-        # Add nemo callbacks
-        if cfg.callbacks:
-            add_callbacks(trainer, cfg.callbacks)
-
         # Move files_to_copy to folder and add git information if present
         if cfg.files_to_copy:
             for _file in cfg.files_to_copy:
@@ -561,12 +555,3 @@ def on_train_end(self, trainer, pl_module):
     trainer.configure_checkpoint_callback(checkpoint_callback)
     trainer.callbacks.append(checkpoint_callback)
     trainer.checkpoint_callback = checkpoint_callback
-
-
-def add_callbacks(trainer: 'pytorch_lightning.Trainer', nemo_callbacks: Optional[List[str]]):
-
-    for callback in nemo_callbacks:
-        if callback in callbacks.AVAILABLE_CALLBACKS:
-            trainer.callbacks.append(callbacks.AVAILABLE_CALLBACKS[callback])
-        else:
-            raise NameError(" Request callback is not part of nemo callbacks please check callback name")

From 2a621f442556590810482361917cefed48008abf Mon Sep 17 00:00:00 2001
From: nithinraok <nithinrao.koluguri@gmail.com>
Date: Wed, 2 Sep 2020 12:51:37 -0700
Subject: [PATCH 09/12] LGTM

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>
---
 examples/speaker_recognition/speaker_reco.py | 15 ++++++++-------
 nemo/collections/asr/losses/angularloss.py   |  1 -
 nemo/collections/asr/modules/conv_asr.py     |  5 +++--
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/examples/speaker_recognition/speaker_reco.py b/examples/speaker_recognition/speaker_reco.py
index 3259a6512b56..346b8d01d3c8 100644
--- a/examples/speaker_recognition/speaker_reco.py
+++ b/examples/speaker_recognition/speaker_reco.py
@@ -24,13 +24,14 @@
 
 """
 Basic run (on CPU for 50 epochs):
-    python examples/speaker_recognition/speaker_reco.py \
-        model.train_ds.manifest_filepath="<train_manifest_file>" \
-        model.validation_ds.manifest_filepath="<validation_manifest_file>" \
-        hydra.run.dir="." \
-        trainer.gpus=0 \
-        trainer.max_epochs=50
-
+EXP_NAME=sample_run
+python ./speaker_reco.py --config-path='/mnt/ngc_ws/new_configs/' --config-name=$CONFIG_PATH \
+    trainer.max_epochs=10  \
+    model.train_ds.batch_size=64 model.validation_ds.batch_size=64 \
+    trainer.gpus=0 \
+    model.decoder.params.num_classes=2 \
+    exp_manager.name=$EXP_NAME +exp_manager.use_datetime_version=False \
+    exp_manager.exp_dir='./speaker_exps'
 
 Add PyTorch Lightning Trainer arguments from CLI:
     python speaker_reco.py \
diff --git a/nemo/collections/asr/losses/angularloss.py b/nemo/collections/asr/losses/angularloss.py
index a6f9ff081f78..73235854948c 100644
--- a/nemo/collections/asr/losses/angularloss.py
+++ b/nemo/collections/asr/losses/angularloss.py
@@ -22,7 +22,6 @@
 __all__ = ['AngularSoftmaxLoss']
 
 
-@experimental
 class AngularSoftmaxLoss(Loss, Typing):
     @property
     def input_types(self):
diff --git a/nemo/collections/asr/modules/conv_asr.py b/nemo/collections/asr/modules/conv_asr.py
index 2c4d6bf4da33..8c40814381d8 100644
--- a/nemo/collections/asr/modules/conv_asr.py
+++ b/nemo/collections/asr/modules/conv_asr.py
@@ -367,6 +367,7 @@ def __init__(
     ):
         super().__init__()
         self.angular = angular
+        self.emb_id = 2
         if self.angular:
             bias = False
         else:
@@ -411,12 +412,12 @@ def forward(self, encoder_output):
         embs = []
 
         for layer in self.emb_layers:
-            pool, emb = layer(pool), layer[:2](pool)
+            pool, emb = layer(pool), layer[: self.emb_id](pool)
             embs.append(emb)
 
         if self.angular:
             for W in self.final.parameters():
-                W = F.normalize(W, p=2, dim=1)
+                _ = F.normalize(W, p=2, dim=1)
             pool = F.normalize(pool, p=2, dim=1)
 
         out = self.final(pool)

From a84195e34c2180c68539dbb76ad81efa43aaf927 Mon Sep 17 00:00:00 2001
From: nithinraok <nithinrao.koluguri@gmail.com>
Date: Wed, 2 Sep 2020 13:06:04 -0700
Subject: [PATCH 10/12] experimenta flag

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>
---
 nemo/collections/asr/losses/angularloss.py | 1 -
 nemo/collections/asr/modules/conv_asr.py   | 1 -
 2 files changed, 2 deletions(-)

diff --git a/nemo/collections/asr/losses/angularloss.py b/nemo/collections/asr/losses/angularloss.py
index 73235854948c..20bbc5b62dce 100644
--- a/nemo/collections/asr/losses/angularloss.py
+++ b/nemo/collections/asr/losses/angularloss.py
@@ -17,7 +17,6 @@
 
 from nemo.core.classes import Loss, Typing, typecheck
 from nemo.core.neural_types import LabelsType, LogitsType, LossType, NeuralType
-from nemo.utils.decorators import experimental
 
 __all__ = ['AngularSoftmaxLoss']
 
diff --git a/nemo/collections/asr/modules/conv_asr.py b/nemo/collections/asr/modules/conv_asr.py
index 8c40814381d8..c71d1c793246 100644
--- a/nemo/collections/asr/modules/conv_asr.py
+++ b/nemo/collections/asr/modules/conv_asr.py
@@ -37,7 +37,6 @@
     SpectrogramType,
 )
 from nemo.utils import logging
-from nemo.utils.decorators import experimental
 
 __all__ = ['ConvASRDecoder', 'ConvASREncoder', 'ConvASRDecoderClassification']
 

From df8f8c27f0b0b5e6ecf01b7fab0b35ec6819e65d Mon Sep 17 00:00:00 2001
From: nithinraok <nithinrao.koluguri@gmail.com>
Date: Thu, 3 Sep 2020 16:13:57 -0700
Subject: [PATCH 11/12] moved callback to new PR added doc strings

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>
---
 examples/speaker_recognition/spkr_get_emb.py  | 23 ++++-------
 nemo/collections/asr/data/audio_to_label.py   |  2 +-
 nemo/collections/asr/losses/angularloss.py    | 22 +++++++----
 nemo/collections/asr/models/label_models.py   | 10 ++---
 nemo/collections/asr/modules/conv_asr.py      | 12 ++----
 nemo/collections/common/callbacks/__init__.py |  6 +--
 .../collections/common/callbacks/callbacks.py | 39 -------------------
 7 files changed, 33 insertions(+), 81 deletions(-)

diff --git a/examples/speaker_recognition/spkr_get_emb.py b/examples/speaker_recognition/spkr_get_emb.py
index c2560748348e..a7599d88be8e 100644
--- a/examples/speaker_recognition/spkr_get_emb.py
+++ b/examples/speaker_recognition/spkr_get_emb.py
@@ -24,22 +24,11 @@
 
 """
 Basic run (on CPU for 50 epochs):
-    python examples/speaker_recognition/speaker_reco.py \
+    python examples/speaker_recognition/spkr_get_emb.py \
         model.train_ds.manifest_filepath="<train_manifest_file>" \
-        model.validation_ds.manifest_filepath="<validation_manifest_file>" \
+        model.test_ds.manifest_filepath="<validation_manifest_file>" \
         hydra.run.dir="." \
-        trainer.gpus=0 \
-        trainer.max_epochs=50
-
-
-Add PyTorch Lightning Trainer arguments from CLI:
-    python speaker_reco.py \
-        ... \
-        +trainer.fast_dev_run=true
-
-Hydra logs will be found in "$(./outputs/$(date +"%y-%m-%d")/$(date +"%H-%M-%S")/.hydra)"
-PTL logs will be found in "$(./outputs/$(date +"%y-%m-%d")/$(date +"%H-%M-%S")/lightning_logs)"
-
+        trainer.gpus=1 
 """
 
 seed_everything(42)
@@ -49,7 +38,11 @@
 def main(cfg):
 
     logging.info(f'Hydra config: {cfg.pretty()}')
-    trainer = pl.Trainer(logger=False, checkpoint_callback=False)
+    if cfg.trainer.gpus > 1:
+        logging.info("changing gpus to 1 to minimize DDP issues while extracting embeddings")
+        cfg.trainer.gpus = 1
+        cfg.trainer.distributed_backend = None
+    trainer = pl.Trainer(**cfg.trainer)
     log_dir = exp_manager(trainer, cfg.get("exp_manager", None))
     model_path = os.path.join(log_dir, '..', 'spkr.nemo')
     speaker_model = ExtractSpeakerEmbeddingsModel.restore_from(model_path)
diff --git a/nemo/collections/asr/data/audio_to_label.py b/nemo/collections/asr/data/audio_to_label.py
index 28929375c741..ac5aed51f819 100644
--- a/nemo/collections/asr/data/audio_to_label.py
+++ b/nemo/collections/asr/data/audio_to_label.py
@@ -167,7 +167,7 @@ def sliced_seq_collate_fn(self, batch):
         slice_length = self.featurizer.sample_rate * self.time_length
         _, audio_lengths, _, tokens_lengths = zip(*batch)
         slice_length = min(slice_length, max(audio_lengths))
-        shift = 1 * 16000
+        shift = 1 * self.featurizer.sample_rate
         has_audio = audio_lengths[0] is not None
 
         audio_signal, num_slices, tokens, audio_lengths = [], [], [], []
diff --git a/nemo/collections/asr/losses/angularloss.py b/nemo/collections/asr/losses/angularloss.py
index 20bbc5b62dce..e2aee9bba6ea 100644
--- a/nemo/collections/asr/losses/angularloss.py
+++ b/nemo/collections/asr/losses/angularloss.py
@@ -22,6 +22,14 @@
 
 
 class AngularSoftmaxLoss(Loss, Typing):
+    """
+    Computes ArcFace Angular softmax angle loss
+    reference: https://openaccess.thecvf.com/content_CVPR_2019/papers/Deng_ArcFace_Additive_Angular_Margin_Loss_for_Deep_Face_Recognition_CVPR_2019_paper.pdf
+    args:
+    scale: scale value for cosine angle
+    margin: margin value added to cosine angle 
+    """
+
     @property
     def input_types(self):
         """Input types definitions for AnguarLoss.
@@ -33,28 +41,28 @@ def input_types(self):
 
     @property
     def output_types(self):
-        """Output types definitions for CTCLoss.
+        """Output types definitions for AngularLoss.
         loss:
             NeuralType(None)
         """
         return {"loss": NeuralType(elements_type=LossType())}
 
-    def __init__(self, s=20.0, m=1.35):
+    def __init__(self, scale=20.0, margin=1.35):
         super().__init__()
 
         self.eps = 1e-7
-        self.s = s
-        self.m = m
+        self.scale = scale
+        self.margin = margin
 
     @typecheck()
     def forward(self, logits, labels):
-        numerator = self.s * torch.cos(
+        numerator = self.scale * torch.cos(
             torch.acos(torch.clamp(torch.diagonal(logits.transpose(0, 1)[labels]), -1.0 + self.eps, 1 - self.eps))
-            + self.m
+            + self.margin
         )
         excl = torch.cat(
             [torch.cat((logits[i, :y], logits[i, y + 1 :])).unsqueeze(0) for i, y in enumerate(labels)], dim=0
         )
-        denominator = torch.exp(numerator) + torch.sum(torch.exp(self.s * excl), dim=1)
+        denominator = torch.exp(numerator) + torch.sum(torch.exp(self.scale * excl), dim=1)
         L = numerator - torch.log(denominator)
         return -torch.mean(L)
diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py
index 4dd3ea9ad1b0..9837d1066e15 100644
--- a/nemo/collections/asr/models/label_models.py
+++ b/nemo/collections/asr/models/label_models.py
@@ -31,7 +31,6 @@
 from nemo.core.classes.common import typecheck
 from nemo.core.neural_types import *
 from nemo.utils import logging
-from nemo.utils.decorators import experimental
 
 __all__ = ['EncDecSpeakerLabelModel', 'ExtractSpeakerEmbeddingsModel']
 
@@ -53,9 +52,9 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None):
         self.decoder = EncDecSpeakerLabelModel.from_config_dict(cfg.decoder)
         if 'angular' in cfg.decoder.params and cfg.decoder.params['angular']:
             logging.info("Training with Angular Softmax Loss")
-            s = cfg.loss.s
-            m = cfg.loss.m
-            self.loss = AngularSoftmaxLoss(s=s, m=m)
+            scale = cfg.loss.scale
+            margin = cfg.loss.margin
+            self.loss = AngularSoftmaxLoss(scale=scale, margin=margin)
         else:
             logging.info("Training with Softmax-CrossEntropy loss")
             self.loss = CELoss()
@@ -150,7 +149,6 @@ def forward(self, input_signal, input_signal_length):
 
     # PTL-specific methods
     def training_step(self, batch, batch_idx):
-        # import ipdb; ipdb.set_trace()
         audio_signal, audio_signal_len, labels, _ = batch
         logits, _ = self.forward(input_signal=audio_signal, input_signal_length=audio_signal_len)
         self.loss_value = self.loss(logits=logits, labels=labels)
@@ -185,7 +183,7 @@ def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0):
         total_counts = torch.stack([x['val_total_counts'] for x in outputs])
 
         topk_scores = compute_topk_accuracy(correct_counts, total_counts)
-
+        logging.info("val_loss: {:.3f}".format(self.val_loss_mean))
         tensorboard_log = {'val_loss': self.val_loss_mean}
         for top_k, score in zip(self._accuracy.top_k, topk_scores):
             tensorboard_log['val_epoch_top@{}'.format(top_k)] = score
diff --git a/nemo/collections/asr/modules/conv_asr.py b/nemo/collections/asr/modules/conv_asr.py
index c71d1c793246..8ff66e5a347a 100644
--- a/nemo/collections/asr/modules/conv_asr.py
+++ b/nemo/collections/asr/modules/conv_asr.py
@@ -356,13 +356,7 @@ def output_types(self):
         )
 
     def __init__(
-        self,
-        feat_in,
-        num_classes,
-        emb_sizes=[1024, 1024],
-        pool_mode='xvector',
-        angular=False,
-        init_mode="xavier_uniform",
+        self, feat_in, num_classes, emb_sizes=None, pool_mode='xvector', angular=False, init_mode="xavier_uniform",
     ):
         super().__init__()
         self.angular = angular
@@ -374,6 +368,8 @@ def __init__(
 
         if type(emb_sizes) is str:
             emb_sizes = emb_sizes.split(',')
+        elif emb_sizes == None:
+            emb_sizes = [512, 512]
         else:
             emb_sizes = list(emb_sizes)
 
@@ -416,7 +412,7 @@ def forward(self, encoder_output):
 
         if self.angular:
             for W in self.final.parameters():
-                _ = F.normalize(W, p=2, dim=1)
+                W = F.normalize(W, p=2, dim=1)
             pool = F.normalize(pool, p=2, dim=1)
 
         out = self.final(pool)
diff --git a/nemo/collections/common/callbacks/__init__.py b/nemo/collections/common/callbacks/__init__.py
index 96e2bff8d4d6..9ad5c9c85a5f 100644
--- a/nemo/collections/common/callbacks/__init__.py
+++ b/nemo/collections/common/callbacks/__init__.py
@@ -12,8 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from nemo.collections.common.callbacks.callbacks import (
-    AVAILABLE_CALLBACKS,
-    LogEpochTimeCallback,
-    LogTrainValidLossCallback,
-)
+from nemo.collections.common.callbacks.callbacks import LogEpochTimeCallback
diff --git a/nemo/collections/common/callbacks/callbacks.py b/nemo/collections/common/callbacks/callbacks.py
index 96a4cc21063d..55fa5c50a1c5 100644
--- a/nemo/collections/common/callbacks/callbacks.py
+++ b/nemo/collections/common/callbacks/callbacks.py
@@ -16,8 +16,6 @@
 from pytorch_lightning.callbacks.base import Callback
 from pytorch_lightning.utilities import rank_zero_only
 
-from nemo.utils import logging
-
 
 class LogEpochTimeCallback(Callback):
     """Simple callback that logs how long each epoch takes, in seconds, to a pytorch lightning log
@@ -32,40 +30,3 @@ def on_epoch_end(self, trainer, pl_module):
         curr_time = time.time()
         duration = curr_time - self.epoch_start
         trainer.logger.log_metrics({"epoch_time": duration}, step=trainer.global_step)
-
-
-class LogTrainValidLossCallback(Callback):
-    @rank_zero_only
-    def on_train_start(self, trainer, pl_module):
-        logging.info("Training started")
-
-    @rank_zero_only
-    def on_train_batch_end(self, trainer, pl_module, batch, batch_idx, dataloader_idx):
-        print_freq = trainer.row_log_interval
-        total_batches = trainer.num_training_batches
-        if 0 < print_freq < 1:
-            print_freq = int(total_batches * print_freq)
-        if batch_idx % print_freq == 0:
-            logging.info(
-                "Epoch: {}/{} batch: {}/{} train_loss: {:.3f} train_acc: {:.2f}".format(
-                    trainer.current_epoch + 1,
-                    trainer.max_epochs,
-                    batch_idx + 1,
-                    total_batches,
-                    pl_module.loss_value,
-                    pl_module.accuracy,
-                )
-            )
-
-    def on_validation_epoch_end(self, trainer, pl_module):
-        logging.info(
-            "----> Epoch: {}/{} val_loss: {:.3f} val_acc: {:.2f} <----".format(
-                trainer.current_epoch + 1, trainer.max_epochs, pl_module.val_loss_mean, pl_module.accuracy
-            )
-        )
-
-
-AVAILABLE_CALLBACKS = {
-    'LogEpochTimeCallback': LogEpochTimeCallback(),
-    'LogTrainValidLossCallback': LogTrainValidLossCallback(),
-}

From fdd898d156444aaff5cfa8b76358a4e05767d032 Mon Sep 17 00:00:00 2001
From: nithinraok <nithinrao.koluguri@gmail.com>
Date: Thu, 3 Sep 2020 16:19:58 -0700
Subject: [PATCH 12/12] style fix

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>
---
 examples/speaker_recognition/speaker_reco.py | 5 +----
 examples/speaker_recognition/spkr_get_emb.py | 5 +++--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/examples/speaker_recognition/speaker_reco.py b/examples/speaker_recognition/speaker_reco.py
index 346b8d01d3c8..0a30a93a2160 100644
--- a/examples/speaker_recognition/speaker_reco.py
+++ b/examples/speaker_recognition/speaker_reco.py
@@ -25,7 +25,7 @@
 """
 Basic run (on CPU for 50 epochs):
 EXP_NAME=sample_run
-python ./speaker_reco.py --config-path='/mnt/ngc_ws/new_configs/' --config-name=$CONFIG_PATH \
+python ./speaker_reco.py --config-path='conf' --config-name='config.yaml' \
     trainer.max_epochs=10  \
     model.train_ds.batch_size=64 model.validation_ds.batch_size=64 \
     trainer.gpus=0 \
@@ -38,9 +38,6 @@
         ... \
         +trainer.fast_dev_run=true
 
-Hydra logs will be found in "$(./outputs/$(date +"%y-%m-%d")/$(date +"%H-%M-%S")/.hydra)"
-PTL logs will be found in "$(./outputs/$(date +"%y-%m-%d")/$(date +"%H-%M-%S")/lightning_logs)"
-
 """
 
 seed_everything(42)
diff --git a/examples/speaker_recognition/spkr_get_emb.py b/examples/speaker_recognition/spkr_get_emb.py
index a7599d88be8e..218c23817ca8 100644
--- a/examples/speaker_recognition/spkr_get_emb.py
+++ b/examples/speaker_recognition/spkr_get_emb.py
@@ -23,10 +23,11 @@
 from nemo.utils.exp_manager import exp_manager
 
 """
-Basic run (on CPU for 50 epochs):
+To extract embeddings
     python examples/speaker_recognition/spkr_get_emb.py \
-        model.train_ds.manifest_filepath="<train_manifest_file>" \
         model.test_ds.manifest_filepath="<validation_manifest_file>" \
+        exp_manager.exp_name="<trained_model_name>"
+        exp_manager.exp_dir="<path to model chckpoint directories>"
         hydra.run.dir="." \
         trainer.gpus=1 
 """