Skip to content

Commit

Permalink
Add style checkers commands to setup.py script.
Browse files Browse the repository at this point in the history
Signed-off-by: Stanislav Beliaev <[email protected]>
  • Loading branch information
stasbel committed Jan 24, 2020
1 parent ad829a9 commit b371e31
Show file tree
Hide file tree
Showing 186 changed files with 8,565 additions and 11,194 deletions.
7 changes: 6 additions & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,14 @@ pipeline {
sh 'python -c "import torch; print(torch.__version__)"'
}
}
stage('Code formatting checks') {
steps {
sh 'python setup.py check_style'
}
}
stage('PEP8 Checks') {
steps {
sh 'pycodestyle . --exclude=./tests/other/jasper.py,./tests/other/jasper_zero_dl.py,setup.py,./nemo/collections/nlp/utils/metrics/sacrebleu.py,./docs/sources/source/conf.py,./collections/nemo_nlp/build,./tests/test_squad.py,./nemo/package_info.py,./examples/asr/jasper_aishell_infer.py,./examples/asr/jasper_eval.py,./examples/nlp/asr_postprocessor.py,./examples/nlp/sentence_classification_with_bert.py,./examples/nlp/transformer_lm.py'
sh 'pycodestyle . --max-line-length=119 --exclude=./tests/other/jasper.py,./tests/other/jasper_zero_dl.py,setup.py,./nemo/collections/nlp/utils/metrics/sacrebleu.py,./docs/sources/source/conf.py,./collections/nemo_nlp/build,./tests/test_squad.py,./nemo/package_info.py,./examples/asr/jasper_aishell_infer.py,./examples/asr/jasper_eval.py,./examples/nlp/asr_postprocessor.py,./examples/nlp/sentence_classification_with_bert.py,./examples/nlp/transformer_lm.py'
}
}

Expand Down
44 changes: 18 additions & 26 deletions docs/docs_zh/sources/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import os
import sys
from unittest.mock import MagicMock

import nemo

sys.path.insert(0, os.path.abspath("."))
Expand Down Expand Up @@ -49,13 +50,20 @@ def __getattr__(cls, name):

# ---- Mocking up the python modules. -----

MOCK_MODULES = ['torch', 'torch.nn', 'torch.utils', 'torch.optim',
'torch.utils.data', 'torch.utils.data.sampler',
'torchvision', 'torchvision.models',
'torchtext',
'h5py', 'kaldi_io',
'transformers'
]
MOCK_MODULES = [
'torch',
'torch.nn',
'torch.utils',
'torch.optim',
'torch.utils.data',
'torch.utils.data.sampler',
'torchvision',
'torchvision.models',
'torchtext',
'h5py',
'kaldi_io',
'transformers',
]

sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)

Expand Down Expand Up @@ -163,12 +171,7 @@ def __getattr__(cls, name):
#
# This is required for the alabaster theme
# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
html_sidebars = {
"**": [
"relations.html", # needs 'show_related': True theme option to display
"searchbox.html",
]
}
html_sidebars = {"**": ["relations.html", "searchbox.html",]} # needs 'show_related': True theme option to display

html_theme_options = {
"canonical_url": "",
Expand Down Expand Up @@ -207,10 +210,7 @@ def __getattr__(cls, name):
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, "nemo.tex", "nemo Documentation", "AI App Design team",
"manual")
]
latex_documents = [(master_doc, "nemo.tex", "nemo Documentation", "AI App Design team", "manual",)]

# -- Options for manual page output ---------------------------------------

Expand All @@ -224,13 +224,5 @@ def __getattr__(cls, name):
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(
master_doc,
"nemo",
"nemo Documentation",
author,
"nemo",
"One line description of project.",
"Miscellaneous",
)
(master_doc, "nemo", "nemo Documentation", author, "nemo", "One line description of project.", "Miscellaneous",)
]
54 changes: 23 additions & 31 deletions docs/sources/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,15 @@
# All configuration values have a default; values that are commented out
# infer to show the default.

import os
import sys
from unittest.mock import MagicMock

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import nemo
import os
import sys
from unittest.mock import MagicMock

sys.path.insert(0, os.path.abspath("."))
sys.path.insert(0, os.path.abspath("../../../"))
Expand Down Expand Up @@ -52,13 +53,20 @@ def __getattr__(cls, name):

# ---- Mocking up the python modules. -----

MOCK_MODULES = ['torch', 'torch.nn', 'torch.utils', 'torch.optim',
'torch.utils.data', 'torch.utils.data.sampler',
'torchvision', 'torchvision.models',
'torchtext',
'h5py', 'kaldi_io',
'transformers'
]
MOCK_MODULES = [
'torch',
'torch.nn',
'torch.utils',
'torch.optim',
'torch.utils.data',
'torch.utils.data.sampler',
'torchvision',
'torchvision.models',
'torchtext',
'h5py',
'kaldi_io',
'transformers',
]

sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)

Expand All @@ -84,8 +92,8 @@ def __getattr__(cls, name):
]


locale_dirs = ['locale/'] # path is example but recommended.
gettext_compact = False # optional.
locale_dirs = ['locale/'] # path is example but recommended.
gettext_compact = False # optional.


# Add any paths that contain templates here, relative to this directory.
Expand Down Expand Up @@ -171,12 +179,7 @@ def __getattr__(cls, name):
#
# This is required for the alabaster theme
# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
html_sidebars = {
"**": [
"relations.html", # needs 'show_related': True theme option to display
"searchbox.html",
]
}
html_sidebars = {"**": ["relations.html", "searchbox.html",]} # needs 'show_related': True theme option to display

html_theme_options = {
"canonical_url": "",
Expand Down Expand Up @@ -215,10 +218,7 @@ def __getattr__(cls, name):
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, "nemo.tex", "nemo Documentation", "AI App Design team",
"manual")
]
latex_documents = [(master_doc, "nemo.tex", "nemo Documentation", "AI App Design team", "manual",)]

# -- Options for manual page output ---------------------------------------

Expand All @@ -232,13 +232,5 @@ def __getattr__(cls, name):
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(
master_doc,
"nemo",
"nemo Documentation",
author,
"nemo",
"One line description of project.",
"Miscellaneous",
)
(master_doc, "nemo", "nemo Documentation", author, "nemo", "One line description of project.", "Miscellaneous",)
]
29 changes: 11 additions & 18 deletions examples/applications/asr_service/app/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# Copyright (c) 2019 NVIDIA Corporation
import os
from flask import Flask

from ruamel.yaml import YAML

import nemo
import nemo.collections.asr as nemo_asr
from app import routes # noqa
from flask import Flask

app = Flask(__name__)
# make sure WORK_DIR exists before calling your service
Expand All @@ -28,34 +30,25 @@

# Instantiate necessary Neural Modules
# Note that data layer is missing from here
neural_factory = nemo.core.NeuralModuleFactory(
placement=nemo.core.DeviceType.GPU,
backend=nemo.core.Backend.PyTorch)
data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
factory=neural_factory)
neural_factory = nemo.core.NeuralModuleFactory(placement=nemo.core.DeviceType.GPU, backend=nemo.core.Backend.PyTorch)
data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(factory=neural_factory)
jasper_encoder = nemo_asr.JasperEncoder(
jasper=jasper_model_definition['JasperEncoder']['jasper'],
activation=jasper_model_definition['JasperEncoder']['activation'],
feat_in=jasper_model_definition[
'AudioToMelSpectrogramPreprocessor']['features'])
feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'],
)
jasper_encoder.restore_from(CHECKPOINT_ENCODER, local_rank=0)
jasper_decoder = nemo_asr.JasperDecoderForCTC(
feat_in=1024,
num_classes=len(labels))
jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(labels))
jasper_decoder.restore_from(CHECKPOINT_DECODER, local_rank=0)
greedy_decoder = nemo_asr.GreedyCTCDecoder()

if ENABLE_NGRAM and os.path.isfile(LM_PATH):
beam_search_with_lm = nemo_asr.BeamSearchDecoderWithLM(
vocab=labels,
beam_width=64,
alpha=2.0,
beta=1.0,
lm_path=LM_PATH,
num_cpus=max(os.cpu_count(), 1))
vocab=labels, beam_width=64, alpha=2.0, beta=1.0, lm_path=LM_PATH, num_cpus=max(os.cpu_count(), 1),
)
else:
print("Beam search is not enabled")

from app import routes # noqa

if __name__ == '__main__':
app.run()
39 changes: 21 additions & 18 deletions examples/applications/asr_service/app/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,47 +3,50 @@
import os
import time

import nemo
import nemo.collections.asr as nemo_asr
from app import (
ENABLE_NGRAM,
MODEL_YAML,
WORK_DIR,
app,
data_preprocessor,
greedy_decoder,
jasper_decoder,
jasper_encoder,
neural_factory,
)
from flask import request
from werkzeug.utils import secure_filename

from app import app, data_preprocessor, jasper_encoder, jasper_decoder, \
greedy_decoder, neural_factory, MODEL_YAML, WORK_DIR, ENABLE_NGRAM
try:
from app import beam_search_with_lm
except ImportError:
print("Not using Beam Search Decoder with LM")
ENABLE_NGRAM = False
import nemo
import nemo.collections.asr as nemo_asr


def wav_to_text(manifest, greedy=True):
from ruamel.yaml import YAML

yaml = YAML(typ="safe")
with open(MODEL_YAML) as f:
jasper_model_definition = yaml.load(f)
labels = jasper_model_definition['labels']

# Instantiate necessary neural modules
data_layer = nemo_asr.AudioToTextDataLayer(
shuffle=False,
manifest_filepath=manifest,
labels=labels, batch_size=1)
data_layer = nemo_asr.AudioToTextDataLayer(shuffle=False, manifest_filepath=manifest, labels=labels, batch_size=1)

# Define inference DAG
audio_signal, audio_signal_len, _, _ = data_layer()
processed_signal, processed_signal_len = data_preprocessor(
input_signal=audio_signal,
length=audio_signal_len)
encoded, encoded_len = jasper_encoder(audio_signal=processed_signal,
length=processed_signal_len)
processed_signal, processed_signal_len = data_preprocessor(input_signal=audio_signal, length=audio_signal_len)
encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=processed_signal_len)
log_probs = jasper_decoder(encoder_output=encoded)
predictions = greedy_decoder(log_probs=log_probs)

if ENABLE_NGRAM:
print('Running with beam search')
beam_predictions = beam_search_with_lm(
log_probs=log_probs, log_probs_length=encoded_len)
beam_predictions = beam_search_with_lm(log_probs=log_probs, log_probs_length=encoded_len)
eval_tensors = [beam_predictions]

if greedy:
Expand All @@ -52,6 +55,7 @@ def wav_to_text(manifest, greedy=True):
tensors = neural_factory.infer(tensors=eval_tensors)
if greedy:
from nemo.collections.asr.helpers import post_process_predictions

prediction = post_process_predictions(tensors[0], labels)
else:
prediction = tensors[0][0][0][0][1]
Expand Down Expand Up @@ -79,8 +83,7 @@ def transcribe_file():
greedy = True
if request.form.get('beam'):
if not ENABLE_NGRAM:
return ("Error: Beam Search with ngram LM is not enabled "
"on this server")
return "Error: Beam Search with ngram LM is not enabled " "on this server"
greedy = False
file_path = os.path.join(WORK_DIR, secure_filename(f.filename))
f.save(file_path)
Expand All @@ -89,7 +92,7 @@ def transcribe_file():
manifest['audio_filepath'] = file_path
manifest['duration'] = 18000
manifest['text'] = 'todo'
with open(file_path+".json", 'w') as fout:
with open(file_path + ".json", 'w') as fout:
fout.write(json.dumps(manifest))
start_t = time.time()
transcription = wav_to_text(file_path + ".json", greedy=greedy)
Expand Down
Loading

0 comments on commit b371e31

Please sign in to comment.