Skip to content

Commit

Permalink
Merge pull request #692 from gkucsko/c_lev_package
Browse files Browse the repository at this point in the history
editdistance package for fast WER calculation
  • Loading branch information
okuchaiev authored Jun 3, 2020
2 parents f6f8b47 + f9ebb4c commit 9a1366c
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 21 deletions.
25 changes: 4 additions & 21 deletions nemo/collections/asr/metrics.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,13 @@
# Copyright (c) 2019 NVIDIA Corporation
from typing import List, Optional

import editdistance
import torch


def __levenshtein(a: List, b: List) -> int:
"""Calculates the Levenshtein distance between a and b.
The code was copied from: http://hetland.org/coding/python/levenshtein.py
"""
n, m = len(a), len(b)
if n > m:
# Make sure n <= m, to use O(min(n,m)) space
a, b = b, a
n, m = m, n

current = list(range(n + 1))
for i in range(1, m + 1):
previous, current = current, [i] + [0] * n
for j in range(1, n + 1):
add, delete = previous[j] + 1, current[j - 1] + 1
change = previous[j - 1]
if a[j - 1] != b[i - 1]:
change = change + 1
current[j] = min(add, delete, change)

return current[n]
def __levenshtein(a: List[str], b: List[str]) -> int:
"""Calculates the Levenshtein distance between a and b."""
return editdistance.eval(a, b)


def word_error_rate(hypotheses: List[str], references: List[str], use_cer=False) -> float:
Expand Down
1 change: 1 addition & 0 deletions requirements/requirements_asr.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
braceexpand
editdistance
frozendict
inflect
kaldi-io
Expand Down

0 comments on commit 9a1366c

Please sign in to comment.