Skip to content

Commit

Permalink
Merge 1.2 bugfixes into main (#2588)
Browse files Browse the repository at this point in the history
* update jenkinsfile

Signed-off-by: ericharper <[email protected]>

* update BRANCH

Signed-off-by: ericharper <[email protected]>

* Fix onnx for ASR notebook (#2542)

* Update onnx version

Signed-off-by: smajumdar <[email protected]>

* Fix onnx

Signed-off-by: smajumdar <[email protected]>

* Fix onnx

Signed-off-by: smajumdar <[email protected]>

* Fix typos and MeCab import (#2541)

Signed-off-by: MaximumEntropy <[email protected]>

* Fix branch for ASR notebooks (#2549)

Signed-off-by: smajumdar <[email protected]>

* rmtok (#2559)

Signed-off-by: Abhinav Khattar <[email protected]>

* Add xxhash dependency (#2564)

Signed-off-by: MaximumEntropy <[email protected]>

* fix (#2566)

* fix

Signed-off-by: nithinraok <[email protected]>

* doc add

Signed-off-by: nithinraok <[email protected]>

* style fix

Signed-off-by: nithinraok <[email protected]>

* Fix moses path issue (#2573)

Signed-off-by: MaximumEntropy <[email protected]>

* More moses data path fixes (#2575)

Signed-off-by: MaximumEntropy <[email protected]>

* Path fixes (#2580)

Signed-off-by: MaximumEntropy <[email protected]>

* Upper bound transformers for 1.2 (#2584)

* upper bound transformers and name change jarvis to riva

Signed-off-by: ericharper <[email protected]>

* upper bound transformers and name change jarvis to riva

Signed-off-by: ericharper <[email protected]>

* update jenkinsfile

Signed-off-by: ericharper <[email protected]>

* update notebooks branch

Signed-off-by: ericharper <[email protected]>

* update notebooks branch

Signed-off-by: ericharper <[email protected]>

* update notebooks branch

Signed-off-by: ericharper <[email protected]>

Co-authored-by: Somshubra Majumdar <[email protected]>
Co-authored-by: Sandeep Subramanian <[email protected]>
Co-authored-by: Abhinav Khattar <[email protected]>
Co-authored-by: Nithin Rao <[email protected]>
  • Loading branch information
5 people authored Jul 30, 2021
1 parent b5b29a6 commit 4a7c3e3
Show file tree
Hide file tree
Showing 8 changed files with 169 additions and 136 deletions.
86 changes: 55 additions & 31 deletions nemo/collections/asr/parts/utils/nmse_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,16 @@

scaler = MinMaxScaler(feature_range=(0, 1))

try:
from torch.linalg import eigh as eigh

TORCH_EIGN = True
except ImportError:
TORCH_EIGN = False
from scipy.linalg import eigh as eigh

logging.warning("Using eigen decomposition from scipy, upgrade torch to 1.9 or higher for faster clustering")


def isGraphFullyConnected(affinity_mat):
return getTheLargestComponent(affinity_mat, 0).sum() == affinity_mat.shape[0]
Expand Down Expand Up @@ -120,7 +130,7 @@ def getCosAffinityMatrix(emb):

def getLaplacian(X):
"""
Calculates a Laplacian matrix from an affinity matrix X.
Calculates a laplacian matrix from an affinity matrix X.
"""
X[np.diag_indices(X.shape[0])] = 0
A = X
Expand All @@ -130,19 +140,46 @@ def getLaplacian(X):
return L


def eigDecompose(Laplacian, cuda, device=None):
if cuda:
if device == None:
device = torch.cuda.current_device()
laplacian_torch = torch.from_numpy(Laplacian).float().to(device)
def eigDecompose(laplacian, cuda, device=None):
if TORCH_EIGN:
if cuda:
if device is None:
device = torch.cuda.current_device()
laplacian = torch.from_numpy(laplacian).float().to(device)
else:
laplacian = torch.from_numpy(laplacian).float()
lambdas, diffusion_map = eigh(laplacian)
lambdas = lambdas.cpu().numpy()
diffusion_map = diffusion_map.cpu().numpy()
else:
laplacian_torch = torch.from_numpy(Laplacian).float()
lambdas_torch, diffusion_map_torch = torch.linalg.eigh(laplacian_torch)
lambdas = lambdas_torch.cpu().numpy()
diffusion_map = diffusion_map_torch.cpu().numpy()
lambdas, diffusion_map = eigh(laplacian)

return lambdas, diffusion_map


def getLamdaGaplist(lambdas):
lambdas = np.real(lambdas)
return list(lambdas[1:] - lambdas[:-1])


def estimateNumofSpeakers(affinity_mat, max_num_speaker, is_cuda=False):
"""
Estimates the number of speakers using eigen decompose on laplacian Matrix.
affinity_mat: (array)
NxN affitnity matrix
max_num_speaker: (int)
Maximum number of clusters to consider for each session
is_cuda: (bool)
if cuda availble eigh decomposition would be computed on GPUs
"""
laplacian = getLaplacian(affinity_mat)
lambdas, _ = eigDecompose(laplacian, is_cuda)
lambdas = np.sort(lambdas)
lambda_gap_list = getLamdaGaplist(lambdas)
num_of_spk = np.argmax(lambda_gap_list[: min(max_num_speaker, len(lambda_gap_list))]) + 1
return num_of_spk, lambdas, lambda_gap_list


class _SpectralClustering:
def __init__(self, n_clusters=8, random_state=0, n_init=10, p_value=10, n_jobs=None, cuda=False):
self.n_clusters = n_clusters
Expand Down Expand Up @@ -170,8 +207,8 @@ def getSpectralEmbeddings(self, affinity_mat, n_spks=8, drop_first=True, cuda=Fa
if not isGraphFullyConnected(affinity_mat):
logging.warning("Graph is not fully connected and the clustering result might not be accurate.")

Laplacian = getLaplacian(affinity_mat)
lambdas_, diffusion_map_ = eigDecompose(Laplacian, cuda)
laplacian = getLaplacian(affinity_mat)
lambdas_, diffusion_map_ = eigDecompose(laplacian, cuda)
lambdas = lambdas_[:n_spks]
diffusion_map = diffusion_map_[:, :n_spks]
embedding = diffusion_map.T[n_spks::-1]
Expand Down Expand Up @@ -363,7 +400,7 @@ def getEigRatio(self, p_neighbors):
"""

affinity_mat = getAffinityGraphMat(self.mat, p_neighbors)
est_num_of_spk, lambdas, lambda_gap_list = self.estimateNumofSpeakers(affinity_mat)
est_num_of_spk, lambdas, lambda_gap_list = estimateNumofSpeakers(affinity_mat, self.max_num_speaker, self.cuda)
arg_sorted_idx = np.argsort(lambda_gap_list[: self.max_num_speaker])[::-1]
max_key = arg_sorted_idx[0]
max_eig_gap = lambda_gap_list[max_key] / (max(lambdas) + self.eps)
Expand All @@ -388,21 +425,6 @@ def getPvalueList(self):

return p_value_list

def getLamdaGaplist(self, lambdas):
lambdas = np.real(lambdas)
return list(lambdas[1:] - lambdas[:-1])

def estimateNumofSpeakers(self, affinity_mat):
"""
Estimates the number of speakers using eigen decompose on Laplacian Matrix.
"""
Laplacian = getLaplacian(affinity_mat)
lambdas, _ = eigDecompose(Laplacian, self.cuda)
lambdas = np.sort(lambdas)
lambda_gap_list = self.getLamdaGaplist(lambdas)
num_of_spk = np.argmax(lambda_gap_list[: min(self.max_num_speaker, len(lambda_gap_list))]) + 1
return num_of_spk, lambdas, lambda_gap_list


def COSclustering(key, emb, oracle_num_speakers=None, max_num_speaker=8, min_samples=6, fixed_thres=None, cuda=False):
"""
Expand All @@ -423,8 +445,9 @@ def COSclustering(key, emb, oracle_num_speakers=None, max_num_speaker=8, min_sam
min_samples: (int)
Minimum number of samples required for NME clustering, this avoids
zero p_neighbour_lists. Default of 6 is selected since (1/rp_threshold) >= 4.
zero p_neighbour_lists. Default of 6 is selected since (1/rp_threshold) >= 4
when max_rp_threshold = 0.25. Thus, NME analysis is skipped for matrices
smaller than (min_samples)x(min_samples).
Returns:
Y: (List[int])
Speaker label for each segment.
Expand All @@ -443,12 +466,13 @@ def COSclustering(key, emb, oracle_num_speakers=None, max_num_speaker=8, min_sam
NME_mat_size=300,
cuda=cuda,
)
est_num_of_spk, p_hat_value = nmesc.NMEanalysis()

if emb.shape[0] > min_samples:
est_num_of_spk, p_hat_value = nmesc.NMEanalysis()
affinity_mat = getAffinityGraphMat(mat, p_hat_value)
else:
affinity_mat = mat
est_num_of_spk, _, _ = estimateNumofSpeakers(affinity_mat, max_num_speaker, cuda)

if oracle_num_speakers:
est_num_of_spk = oracle_num_speakers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def __init__(self, cfg: MTEncDecModelConfig, trainer: Trainer = None):
)
elif isinstance(self.src_language, ListConfig):
for lng in self.src_language:
self.multilingual_ids.append(self.encoder_tokenizer.token_to_id("<" + lng + ">"))
self.multilingual_ids.append(None)
elif isinstance(self.tgt_language, ListConfig):
for lng in self.tgt_language:
self.multilingual_ids.append(self.encoder_tokenizer.token_to_id("<" + lng + ">"))
Expand Down Expand Up @@ -773,7 +773,11 @@ def translate(
raise ValueError("Expect source_lang and target_lang to infer for multilingual model.")
src_symbol = self.encoder_tokenizer.token_to_id('<' + source_lang + '>')
tgt_symbol = self.encoder_tokenizer.token_to_id('<' + target_lang + '>')
prepend_ids = [src_symbol if src_symbol in self.multilingual_ids else tgt_symbol]
if src_symbol in self.multilingual_ids:
prepend_ids = [src_symbol]
elif tgt_symbol in self.multilingual_ids:
prepend_ids = [tgt_symbol]

try:
self.eval()
src, src_mask = self.prepare_inference_batch(text, prepend_ids)
Expand Down
Loading

0 comments on commit 4a7c3e3

Please sign in to comment.