Merge 1.2 bugfixes into main (#2588)

* update jenkinsfile Signed-off-by: ericharper <[email protected]> * update BRANCH Signed-off-by: ericharper <[email protected]> * Fix onnx for ASR notebook (#2542) * Update onnx version Signed-off-by: smajumdar <[email protected]> * Fix onnx Signed-off-by: smajumdar <[email protected]> * Fix onnx Signed-off-by: smajumdar <[email protected]> * Fix typos and MeCab import (#2541) Signed-off-by: MaximumEntropy <[email protected]> * Fix branch for ASR notebooks (#2549) Signed-off-by: smajumdar <[email protected]> * rmtok (#2559) Signed-off-by: Abhinav Khattar <[email protected]> * Add xxhash dependency (#2564) Signed-off-by: MaximumEntropy <[email protected]> * fix (#2566) * fix Signed-off-by: nithinraok <[email protected]> * doc add Signed-off-by: nithinraok <[email protected]> * style fix Signed-off-by: nithinraok <[email protected]> * Fix moses path issue (#2573) Signed-off-by: MaximumEntropy <[email protected]> * More moses data path fixes (#2575) Signed-off-by: MaximumEntropy <[email protected]> * Path fixes (#2580) Signed-off-by: MaximumEntropy <[email protected]> * Upper bound transformers for 1.2 (#2584) * upper bound transformers and name change jarvis to riva Signed-off-by: ericharper <[email protected]> * upper bound transformers and name change jarvis to riva Signed-off-by: ericharper <[email protected]> * update jenkinsfile Signed-off-by: ericharper <[email protected]> * update notebooks branch Signed-off-by: ericharper <[email protected]> * update notebooks branch Signed-off-by: ericharper <[email protected]> * update notebooks branch Signed-off-by: ericharper <[email protected]> Co-authored-by: Somshubra Majumdar <[email protected]> Co-authored-by: Sandeep Subramanian <[email protected]> Co-authored-by: Abhinav Khattar <[email protected]> Co-authored-by: Nithin Rao <[email protected]>
NVIDIA · Jul 30, 2021 · 4a7c3e3 · 4a7c3e3
1 parent b5b29a6
commit 4a7c3e3
Show file tree

Hide file tree

Showing 8 changed files with 169 additions and 136 deletions.
diff --git a/nemo/collections/asr/parts/utils/nmse_clustering.py b/nemo/collections/asr/parts/utils/nmse_clustering.py
@@ -40,6 +40,16 @@
 
 scaler = MinMaxScaler(feature_range=(0, 1))
 
+try:
+    from torch.linalg import eigh as eigh
+
+    TORCH_EIGN = True
+except ImportError:
+    TORCH_EIGN = False
+    from scipy.linalg import eigh as eigh
+
+    logging.warning("Using eigen decomposition from scipy, upgrade torch to 1.9 or higher for faster clustering")
+
 
 def isGraphFullyConnected(affinity_mat):
     return getTheLargestComponent(affinity_mat, 0).sum() == affinity_mat.shape[0]
@@ -120,7 +130,7 @@ def getCosAffinityMatrix(emb):
 
 def getLaplacian(X):
     """
-    Calculates a Laplacian matrix from an affinity matrix X.
+    Calculates a laplacian matrix from an affinity matrix X.
     """
     X[np.diag_indices(X.shape[0])] = 0
     A = X
@@ -130,19 +140,46 @@ def getLaplacian(X):
     return L
 
 
-def eigDecompose(Laplacian, cuda, device=None):
-    if cuda:
-        if device == None:
-            device = torch.cuda.current_device()
-        laplacian_torch = torch.from_numpy(Laplacian).float().to(device)
+def eigDecompose(laplacian, cuda, device=None):
+    if TORCH_EIGN:
+        if cuda:
+            if device is None:
+                device = torch.cuda.current_device()
+            laplacian = torch.from_numpy(laplacian).float().to(device)
+        else:
+            laplacian = torch.from_numpy(laplacian).float()
+        lambdas, diffusion_map = eigh(laplacian)
+        lambdas = lambdas.cpu().numpy()
+        diffusion_map = diffusion_map.cpu().numpy()
     else:
-        laplacian_torch = torch.from_numpy(Laplacian).float()
-    lambdas_torch, diffusion_map_torch = torch.linalg.eigh(laplacian_torch)
-    lambdas = lambdas_torch.cpu().numpy()
-    diffusion_map = diffusion_map_torch.cpu().numpy()
+        lambdas, diffusion_map = eigh(laplacian)
+
     return lambdas, diffusion_map
 
 
+def getLamdaGaplist(lambdas):
+    lambdas = np.real(lambdas)
+    return list(lambdas[1:] - lambdas[:-1])
+
+
+def estimateNumofSpeakers(affinity_mat, max_num_speaker, is_cuda=False):
+    """
+    Estimates the number of speakers using eigen decompose on laplacian Matrix.
+    affinity_mat: (array)
+        NxN affitnity matrix
+    max_num_speaker: (int)
+        Maximum number of clusters to consider for each session
+    is_cuda: (bool)
+        if cuda availble eigh decomposition would be computed on GPUs
+    """
+    laplacian = getLaplacian(affinity_mat)
+    lambdas, _ = eigDecompose(laplacian, is_cuda)
+    lambdas = np.sort(lambdas)
+    lambda_gap_list = getLamdaGaplist(lambdas)
+    num_of_spk = np.argmax(lambda_gap_list[: min(max_num_speaker, len(lambda_gap_list))]) + 1
+    return num_of_spk, lambdas, lambda_gap_list
+
+
 class _SpectralClustering:
     def __init__(self, n_clusters=8, random_state=0, n_init=10, p_value=10, n_jobs=None, cuda=False):
         self.n_clusters = n_clusters
@@ -170,8 +207,8 @@ def getSpectralEmbeddings(self, affinity_mat, n_spks=8, drop_first=True, cuda=Fa
         if not isGraphFullyConnected(affinity_mat):
             logging.warning("Graph is not fully connected and the clustering result might not be accurate.")
 
-        Laplacian = getLaplacian(affinity_mat)
-        lambdas_, diffusion_map_ = eigDecompose(Laplacian, cuda)
+        laplacian = getLaplacian(affinity_mat)
+        lambdas_, diffusion_map_ = eigDecompose(laplacian, cuda)
         lambdas = lambdas_[:n_spks]
         diffusion_map = diffusion_map_[:, :n_spks]
         embedding = diffusion_map.T[n_spks::-1]
@@ -363,7 +400,7 @@ def getEigRatio(self, p_neighbors):
         """
 
         affinity_mat = getAffinityGraphMat(self.mat, p_neighbors)
-        est_num_of_spk, lambdas, lambda_gap_list = self.estimateNumofSpeakers(affinity_mat)
+        est_num_of_spk, lambdas, lambda_gap_list = estimateNumofSpeakers(affinity_mat, self.max_num_speaker, self.cuda)
         arg_sorted_idx = np.argsort(lambda_gap_list[: self.max_num_speaker])[::-1]
         max_key = arg_sorted_idx[0]
         max_eig_gap = lambda_gap_list[max_key] / (max(lambdas) + self.eps)
@@ -388,21 +425,6 @@ def getPvalueList(self):
 
         return p_value_list
 
-    def getLamdaGaplist(self, lambdas):
-        lambdas = np.real(lambdas)
-        return list(lambdas[1:] - lambdas[:-1])
-
-    def estimateNumofSpeakers(self, affinity_mat):
-        """
-        Estimates the number of speakers using eigen decompose on Laplacian Matrix.
-        """
-        Laplacian = getLaplacian(affinity_mat)
-        lambdas, _ = eigDecompose(Laplacian, self.cuda)
-        lambdas = np.sort(lambdas)
-        lambda_gap_list = self.getLamdaGaplist(lambdas)
-        num_of_spk = np.argmax(lambda_gap_list[: min(self.max_num_speaker, len(lambda_gap_list))]) + 1
-        return num_of_spk, lambdas, lambda_gap_list
-
 
 def COSclustering(key, emb, oracle_num_speakers=None, max_num_speaker=8, min_samples=6, fixed_thres=None, cuda=False):
     """
@@ -423,8 +445,9 @@ def COSclustering(key, emb, oracle_num_speakers=None, max_num_speaker=8, min_sam
 
         min_samples: (int)
             Minimum number of samples required for NME clustering, this avoids
-            zero p_neighbour_lists. Default of 6 is selected since  (1/rp_threshold) >= 4.
-
+            zero p_neighbour_lists. Default of 6 is selected since (1/rp_threshold) >= 4
+            when max_rp_threshold = 0.25. Thus, NME analysis is skipped for matrices
+            smaller than (min_samples)x(min_samples).
     Returns:
         Y: (List[int])
             Speaker label for each segment.
@@ -443,12 +466,13 @@ def COSclustering(key, emb, oracle_num_speakers=None, max_num_speaker=8, min_sam
         NME_mat_size=300,
         cuda=cuda,
     )
-    est_num_of_spk, p_hat_value = nmesc.NMEanalysis()
 
     if emb.shape[0] > min_samples:
+        est_num_of_spk, p_hat_value = nmesc.NMEanalysis()
         affinity_mat = getAffinityGraphMat(mat, p_hat_value)
     else:
         affinity_mat = mat
+        est_num_of_spk, _, _ = estimateNumofSpeakers(affinity_mat, max_num_speaker, cuda)
 
     if oracle_num_speakers:
         est_num_of_spk = oracle_num_speakers

diff --git a/nemo/collections/nlp/models/machine_translation/mt_enc_dec_model.py b/nemo/collections/nlp/models/machine_translation/mt_enc_dec_model.py
@@ -103,7 +103,7 @@ def __init__(self, cfg: MTEncDecModelConfig, trainer: Trainer = None):
                 )
             elif isinstance(self.src_language, ListConfig):
                 for lng in self.src_language:
-                    self.multilingual_ids.append(self.encoder_tokenizer.token_to_id("<" + lng + ">"))
+                    self.multilingual_ids.append(None)
             elif isinstance(self.tgt_language, ListConfig):
                 for lng in self.tgt_language:
                     self.multilingual_ids.append(self.encoder_tokenizer.token_to_id("<" + lng + ">"))
@@ -773,7 +773,11 @@ def translate(
                 raise ValueError("Expect source_lang and target_lang to infer for multilingual model.")
             src_symbol = self.encoder_tokenizer.token_to_id('<' + source_lang + '>')
             tgt_symbol = self.encoder_tokenizer.token_to_id('<' + target_lang + '>')
-            prepend_ids = [src_symbol if src_symbol in self.multilingual_ids else tgt_symbol]
+            if src_symbol in self.multilingual_ids:
+                prepend_ids = [src_symbol]
+            elif tgt_symbol in self.multilingual_ids:
+                prepend_ids = [tgt_symbol]
+
         try:
             self.eval()
             src, src_mask = self.prepare_inference_batch(text, prepend_ids)