Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Code format checkers #286

Merged
merged 8 commits into from
Jan 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,16 @@ pipeline {
sh 'python -c "import torch; print(torch.__version__)"'
}
}
stage('PEP8 Checks') {
stage('Install test requirements') {
steps {
sh 'pycodestyle . --exclude=./tests/other/jasper.py,./tests/other/jasper_zero_dl.py,setup.py,./nemo/collections/nlp/utils/metrics/sacrebleu.py,./docs/sources/source/conf.py,./collections/nemo_nlp/build,./tests/test_squad.py,./nemo/package_info.py,./examples/asr/jasper_aishell_infer.py,./examples/asr/jasper_eval.py,./examples/nlp/asr_postprocessor.py,./examples/nlp/sentence_classification_with_bert.py,./examples/nlp/transformer_lm.py'
sh 'pip install -r requirements/requirements_test.txt'
}
}
}
stage('Code formatting checks') {
steps {
sh 'python setup.py check_style'
}
}

stage('Unittests') {
steps {
Expand Down
44 changes: 18 additions & 26 deletions docs/docs_zh/sources/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import os
import sys
from unittest.mock import MagicMock

import nemo

sys.path.insert(0, os.path.abspath("."))
Expand Down Expand Up @@ -49,13 +50,20 @@ def __getattr__(cls, name):

# ---- Mocking up the python modules. -----

MOCK_MODULES = ['torch', 'torch.nn', 'torch.utils', 'torch.optim',
'torch.utils.data', 'torch.utils.data.sampler',
'torchvision', 'torchvision.models',
'torchtext',
'h5py', 'kaldi_io',
'transformers'
]
MOCK_MODULES = [
'torch',
'torch.nn',
'torch.utils',
'torch.optim',
'torch.utils.data',
'torch.utils.data.sampler',
'torchvision',
'torchvision.models',
'torchtext',
'h5py',
'kaldi_io',
'transformers',
]

sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)

Expand Down Expand Up @@ -163,12 +171,7 @@ def __getattr__(cls, name):
#
# This is required for the alabaster theme
# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
html_sidebars = {
"**": [
"relations.html", # needs 'show_related': True theme option to display
"searchbox.html",
]
}
html_sidebars = {"**": ["relations.html", "searchbox.html",]} # needs 'show_related': True theme option to display

html_theme_options = {
"canonical_url": "",
Expand Down Expand Up @@ -207,10 +210,7 @@ def __getattr__(cls, name):
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, "nemo.tex", "nemo Documentation", "AI App Design team",
"manual")
]
latex_documents = [(master_doc, "nemo.tex", "nemo Documentation", "AI App Design team", "manual",)]

# -- Options for manual page output ---------------------------------------

Expand All @@ -224,13 +224,5 @@ def __getattr__(cls, name):
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(
master_doc,
"nemo",
"nemo Documentation",
author,
"nemo",
"One line description of project.",
"Miscellaneous",
)
(master_doc, "nemo", "nemo Documentation", author, "nemo", "One line description of project.", "Miscellaneous",)
]
56 changes: 23 additions & 33 deletions docs/sources/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,15 @@
# All configuration values have a default; values that are commented out
# infer to show the default.

import os
import sys
from unittest.mock import MagicMock

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import nemo
import os
import sys
from unittest.mock import MagicMock

sys.path.insert(0, os.path.abspath("."))
sys.path.insert(0, os.path.abspath("../../../"))
Expand Down Expand Up @@ -52,13 +53,20 @@ def __getattr__(cls, name):

# ---- Mocking up the python modules. -----

MOCK_MODULES = ['torch', 'torch.nn', 'torch.utils', 'torch.optim',
'torch.utils.data', 'torch.utils.data.sampler',
'torchvision', 'torchvision.models',
'torchtext',
'h5py', 'kaldi_io',
'transformers'
]
MOCK_MODULES = [
'torch',
'torch.nn',
'torch.utils',
'torch.optim',
'torch.utils.data',
'torch.utils.data.sampler',
'torchvision',
'torchvision.models',
'torchtext',
'h5py',
'kaldi_io',
'transformers',
]

sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)

Expand All @@ -83,10 +91,8 @@ def __getattr__(cls, name):
"sphinxcontrib.bibtex",
]


locale_dirs = ['locale/'] # path is example but recommended.
gettext_compact = False # optional.

locale_dirs = ['locale/'] # path is example but recommended.
gettext_compact = False # optional.

# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
Expand Down Expand Up @@ -171,12 +177,7 @@ def __getattr__(cls, name):
#
# This is required for the alabaster theme
# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
html_sidebars = {
"**": [
"relations.html", # needs 'show_related': True theme option to display
"searchbox.html",
]
}
html_sidebars = {"**": ["relations.html", "searchbox.html",]} # needs 'show_related': True theme option to display

html_theme_options = {
"canonical_url": "",
Expand Down Expand Up @@ -215,10 +216,7 @@ def __getattr__(cls, name):
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, "nemo.tex", "nemo Documentation", "AI App Design team",
"manual")
]
latex_documents = [(master_doc, "nemo.tex", "nemo Documentation", "AI App Design team", "manual",)]

# -- Options for manual page output ---------------------------------------

Expand All @@ -232,13 +230,5 @@ def __getattr__(cls, name):
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(
master_doc,
"nemo",
"nemo Documentation",
author,
"nemo",
"One line description of project.",
"Miscellaneous",
)
(master_doc, "nemo", "nemo Documentation", author, "nemo", "One line description of project.", "Miscellaneous",)
]
27 changes: 9 additions & 18 deletions examples/applications/asr_service/app/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Copyright (c) 2019 NVIDIA Corporation
import os

from app import routes # noqa
from flask import Flask
from ruamel.yaml import YAML

Expand All @@ -19,7 +21,6 @@
# This is only necessary if ENABLE_NGRAM = True. Otherwise, set to empty string
LM_PATH = "<PATH_TO_KENLM_BINARY>"


# Read model YAML
yaml = YAML(typ="safe")
with open(MODEL_YAML) as f:
Expand All @@ -28,34 +29,24 @@

# Instantiate necessary Neural Modules
# Note that data layer is missing from here
neural_factory = nemo.core.NeuralModuleFactory(
placement=nemo.core.DeviceType.GPU,
backend=nemo.core.Backend.PyTorch)
data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
factory=neural_factory)
neural_factory = nemo.core.NeuralModuleFactory(placement=nemo.core.DeviceType.GPU, backend=nemo.core.Backend.PyTorch)
data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(factory=neural_factory)
jasper_encoder = nemo_asr.JasperEncoder(
jasper=jasper_model_definition['JasperEncoder']['jasper'],
activation=jasper_model_definition['JasperEncoder']['activation'],
feat_in=jasper_model_definition[
'AudioToMelSpectrogramPreprocessor']['features'])
feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'],
)
jasper_encoder.restore_from(CHECKPOINT_ENCODER, local_rank=0)
jasper_decoder = nemo_asr.JasperDecoderForCTC(
feat_in=1024,
num_classes=len(labels))
jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(labels))
jasper_decoder.restore_from(CHECKPOINT_DECODER, local_rank=0)
greedy_decoder = nemo_asr.GreedyCTCDecoder()

if ENABLE_NGRAM and os.path.isfile(LM_PATH):
beam_search_with_lm = nemo_asr.BeamSearchDecoderWithLM(
vocab=labels,
beam_width=64,
alpha=2.0,
beta=1.0,
lm_path=LM_PATH,
num_cpus=max(os.cpu_count(), 1))
vocab=labels, beam_width=64, alpha=2.0, beta=1.0, lm_path=LM_PATH, num_cpus=max(os.cpu_count(), 1),
)
else:
print("Beam search is not enabled")

from app import routes # noqa
if __name__ == '__main__':
app.run()
40 changes: 22 additions & 18 deletions examples/applications/asr_service/app/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,47 +3,51 @@
import os
import time

from app import (
ENABLE_NGRAM,
MODEL_YAML,
WORK_DIR,
app,
data_preprocessor,
greedy_decoder,
jasper_decoder,
jasper_encoder,
neural_factory,
)
from flask import request
from werkzeug.utils import secure_filename

from app import app, data_preprocessor, jasper_encoder, jasper_decoder, \
greedy_decoder, neural_factory, MODEL_YAML, WORK_DIR, ENABLE_NGRAM
import nemo
import nemo.collections.asr as nemo_asr

try:
from app import beam_search_with_lm
except ImportError:
print("Not using Beam Search Decoder with LM")
ENABLE_NGRAM = False
import nemo
import nemo.collections.asr as nemo_asr


def wav_to_text(manifest, greedy=True):
from ruamel.yaml import YAML

yaml = YAML(typ="safe")
with open(MODEL_YAML) as f:
jasper_model_definition = yaml.load(f)
labels = jasper_model_definition['labels']

# Instantiate necessary neural modules
data_layer = nemo_asr.AudioToTextDataLayer(
shuffle=False,
manifest_filepath=manifest,
labels=labels, batch_size=1)
data_layer = nemo_asr.AudioToTextDataLayer(shuffle=False, manifest_filepath=manifest, labels=labels, batch_size=1)

# Define inference DAG
audio_signal, audio_signal_len, _, _ = data_layer()
processed_signal, processed_signal_len = data_preprocessor(
input_signal=audio_signal,
length=audio_signal_len)
encoded, encoded_len = jasper_encoder(audio_signal=processed_signal,
length=processed_signal_len)
processed_signal, processed_signal_len = data_preprocessor(input_signal=audio_signal, length=audio_signal_len)
encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=processed_signal_len)
log_probs = jasper_decoder(encoder_output=encoded)
predictions = greedy_decoder(log_probs=log_probs)

if ENABLE_NGRAM:
print('Running with beam search')
beam_predictions = beam_search_with_lm(
log_probs=log_probs, log_probs_length=encoded_len)
beam_predictions = beam_search_with_lm(log_probs=log_probs, log_probs_length=encoded_len)
eval_tensors = [beam_predictions]

if greedy:
Expand All @@ -52,6 +56,7 @@ def wav_to_text(manifest, greedy=True):
tensors = neural_factory.infer(tensors=eval_tensors)
if greedy:
from nemo.collections.asr.helpers import post_process_predictions

prediction = post_process_predictions(tensors[0], labels)
else:
prediction = tensors[0][0][0][0][1]
Expand Down Expand Up @@ -79,8 +84,7 @@ def transcribe_file():
greedy = True
if request.form.get('beam'):
if not ENABLE_NGRAM:
return ("Error: Beam Search with ngram LM is not enabled "
"on this server")
return "Error: Beam Search with ngram LM is not enabled " "on this server"
greedy = False
file_path = os.path.join(WORK_DIR, secure_filename(f.filename))
f.save(file_path)
Expand All @@ -89,7 +93,7 @@ def transcribe_file():
manifest['audio_filepath'] = file_path
manifest['duration'] = 18000
manifest['text'] = 'todo'
with open(file_path+".json", 'w') as fout:
with open(file_path + ".json", 'w') as fout:
fout.write(json.dumps(manifest))
start_t = time.time()
transcription = wav_to_text(file_path + ".json", greedy=greedy)
Expand Down
22 changes: 11 additions & 11 deletions examples/applications/asr_service/recognize.html
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
<html>
<h1 align="center">Japer Speech Recognizer</h1>
<h3 align="center">powered by NeMo</h3>
<body style="border:3px solid green">
<div align="center">
<p>Upload your .wav single-channel 16kHz file for transcription!</p>
<form action = "http://<flask_service_ip>:5000/transcribe_file" method = "POST"
enctype = "multipart/form-data">
<input type = "file" name = "file" />
<input type = "checkbox" name="beam"> Rescore with ngram
<input type = "submit"/>
</form>
</div>
</body>
<body style="border:3px solid green">
<div align="center">
<p>Upload your .wav single-channel 16kHz file for transcription!</p>
<form action="http://<flask_service_ip>:5000/transcribe_file" method="POST"
enctype="multipart/form-data">
<input type="file" name="file"/>
<input type="checkbox" name="beam"> Rescore with ngram
<input type="submit"/>
</form>
</div>
</body>
</html>
Loading