Skip to content

Commit

Permalink
docstrings, space fix, init file
Browse files Browse the repository at this point in the history
Signed-off-by: ekmb <[email protected]>
  • Loading branch information
ekmb committed Jun 14, 2021
1 parent 31c220a commit bd37b1e
Show file tree
Hide file tree
Showing 9 changed files with 25 additions and 11 deletions.
13 changes: 13 additions & 0 deletions nemo_text_processing/text_normalization/data/roman/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
1 change: 0 additions & 1 deletion nemo_text_processing/text_normalization/data/whitelist.tsv
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
Ph.D. p h d
Hon. honorable
& and
&Co. and
Mt. Mount
Maj. Major
Rev. Reverend
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ Mrs. Misses
Ms. Miss
Mr Mister
Mrs Misses
Ms Miss
Ms Miss
&Co. and Co.
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ def post_process_punctuation(text: str) -> str:
.replace('“', '"')
.replace("‘", "'")
.replace('`', "'")
.replace('- -', "--")
)

for punct in "!,.:;?":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ def normalize(
normalized_texts = []
for tagged_text in tagged_texts:
self._verbalize(tagged_text, normalized_texts)

if len(normalized_texts) == 0:
raise ValueError()
if punct_post_process:
Expand Down Expand Up @@ -193,8 +192,7 @@ def calculate_cer(normalized_texts: List[str], transcript: str, remove_punct=Fal
text_clean = text.replace('-', ' ').lower()
if remove_punct:
for punct in "!?:;,.-()*+-/<=>@^_":
text_clean = text_clean.replace(punct, " ")
text_clean = re.sub(r' +', ' ', text_clean)
text_clean = text_clean.replace(punct, "")
cer = round(word_error_rate([transcript], [text_clean], use_cer=True) * 100, 2)
normalized_options.append((text, cer))
return normalized_options
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class FractionFst(GraphFst):
"""
Finite state transducer for classifying fraction
"23 4/5" ->
tokens { fraction { numerator: "four" denominator: "five" } }
tokens { fraction { integer: "twenty three" numerator: "four" denominator: "five" } }
Args:
deterministic: if True will provide a single transduction option,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
class FractionFst(GraphFst):
"""
Finite state transducer for verbalizing fraction
e.g. tokens { fraction { integer: "twenty three" numerator: "four" denominator: "five" } } ->
twenty three four fifth
Args:
deterministic: if True will provide a single transduction option,
Expand Down
6 changes: 3 additions & 3 deletions nemo_text_processing/text_normalization/verbalizers/roman.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@

class RomanFst(GraphFst):
"""
Finite state transducer for verbalizing electronic
e.g. tokens { electronic { username: "cdf1" domain: "abc.edu" } } -> c d f one at a b c dot e d u
Finite state transducer for verbalizing roman numerals
e.g. tokens { roman { integer: "one" } } -> one
Args:
deterministic: if True will provide a single transduction option,
for False multiple transduction are generated (used for audio-based normalization)
for False multiple transduction are generated (used for audio-based normalization)
"""

def __init__(self, deterministic: bool = True):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ It seemed to her that the jacket Oswald wore was darker than Commission Exhibit
It seemed to her that the jacket Oswald wore was darker than Commission Exhibit number one hundred and sixty two.
It seemed to her that the jacket Oswald wore was darker than Commission Exhibit number one six two.
~"Father, let this cup pass." He prayed--was heard. What cup was it that passed away from him? Sure not the death-cup, now filled to the brim! There was no quailing in the awful word; He still was king of kings, of lords the lord:-- He feared lest, in the suffering waste and grim, His faith might grow too faint and sickly dim."
"Father, let this cup pass." He prayed--was heard. What cup was it that passed away from him? Sure not the death-cup, now filled to the brim! There was no quailing in the awful word; He still was king of kings, of lords the lord:-- He feared lest, in the suffering waste and grim, His faith might grow too faint and sickly dim."
"Father, let this cup pass." He prayed -- was heard. What cup was it that passed away from him? Sure not the death-cup, now filled to the brim! There was no quailing in the awful word; He still was king of kings, of lords the lord: -- He feared lest, in the suffering waste and grim, His faith might grow too faint and sickly dim."
~1970-2010
nineteen seventy to twenty ten
one thousand nine seventy to two thousand ten
Expand Down Expand Up @@ -104,4 +104,4 @@ twenty five.]
~Francis I--test
Francis the first -- test
Francis one -- test
Francis first --test
Francis first -- test

0 comments on commit bd37b1e

Please sign in to comment.