NVIDIA · stasbel · Jan 25, 2020 · Jan 24, 2020 · Jan 24, 2020 · Jan 25, 2020
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -14,11 +14,16 @@ pipeline {
         sh 'python -c "import torch; print(torch.__version__)"'
       }
     }
-    stage('PEP8 Checks') {
+    stage('Install test requirements') {
       steps {
-        sh 'pycodestyle . --exclude=./tests/other/jasper.py,./tests/other/jasper_zero_dl.py,setup.py,./nemo/collections/nlp/utils/metrics/sacrebleu.py,./docs/sources/source/conf.py,./collections/nemo_nlp/build,./tests/test_squad.py,./nemo/package_info.py,./examples/asr/jasper_aishell_infer.py,./examples/asr/jasper_eval.py,./examples/nlp/asr_postprocessor.py,./examples/nlp/sentence_classification_with_bert.py,./examples/nlp/transformer_lm.py'
+        sh 'pip install -r requirements/requirements_test.txt'
       }
-    } 
+    }
+    stage('Code formatting checks') {
+      steps {
+        sh 'python setup.py check_style'
+      }
+    }
 
     stage('Unittests') {
       steps {

diff --git a/docs/docs_zh/sources/source/conf.py b/docs/docs_zh/sources/source/conf.py
@@ -20,6 +20,7 @@
 import os
 import sys
 from unittest.mock import MagicMock
+
 import nemo
 
 sys.path.insert(0, os.path.abspath("."))
@@ -49,13 +50,20 @@ def __getattr__(cls, name):
 
 # ---- Mocking up the python modules. -----
 
-MOCK_MODULES = ['torch', 'torch.nn', 'torch.utils', 'torch.optim',
-                'torch.utils.data', 'torch.utils.data.sampler',
-                'torchvision', 'torchvision.models',
-                'torchtext',
-                'h5py', 'kaldi_io',
-                'transformers'
-                ]
+MOCK_MODULES = [
+    'torch',
+    'torch.nn',
+    'torch.utils',
+    'torch.optim',
+    'torch.utils.data',
+    'torch.utils.data.sampler',
+    'torchvision',
+    'torchvision.models',
+    'torchtext',
+    'h5py',
+    'kaldi_io',
+    'transformers',
+]
 
 sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
 
@@ -163,12 +171,7 @@ def __getattr__(cls, name):
 #
 # This is required for the alabaster theme
 # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
-html_sidebars = {
-    "**": [
-        "relations.html",  # needs 'show_related': True theme option to display
-        "searchbox.html",
-    ]
-}
+html_sidebars = {"**": ["relations.html", "searchbox.html",]}  # needs 'show_related': True theme option to display
 
 html_theme_options = {
     "canonical_url": "",
@@ -207,10 +210,7 @@ def __getattr__(cls, name):
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
-latex_documents = [
-    (master_doc, "nemo.tex", "nemo Documentation", "AI App Design team",
-     "manual")
-]
+latex_documents = [(master_doc, "nemo.tex", "nemo Documentation", "AI App Design team", "manual",)]
 
 # -- Options for manual page output ---------------------------------------
 
@@ -224,13 +224,5 @@ def __getattr__(cls, name):
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (
-        master_doc,
-        "nemo",
-        "nemo Documentation",
-        author,
-        "nemo",
-        "One line description of project.",
-        "Miscellaneous",
-    )
+    (master_doc, "nemo", "nemo Documentation", author, "nemo", "One line description of project.", "Miscellaneous",)
 ]
diff --git a/docs/sources/source/conf.py b/docs/sources/source/conf.py
@@ -13,14 +13,15 @@
 # All configuration values have a default; values that are commented out
 # infer to show the default.
 
+import os
+import sys
+from unittest.mock import MagicMock
+
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
 import nemo
-import os
-import sys
-from unittest.mock import MagicMock
 
 sys.path.insert(0, os.path.abspath("."))
 sys.path.insert(0, os.path.abspath("../../../"))
@@ -52,13 +53,20 @@ def __getattr__(cls, name):
 
 # ---- Mocking up the python modules. -----
 
-MOCK_MODULES = ['torch', 'torch.nn', 'torch.utils', 'torch.optim',
-                'torch.utils.data', 'torch.utils.data.sampler',
-                'torchvision', 'torchvision.models',
-                'torchtext',
-                'h5py', 'kaldi_io',
-                'transformers'
-                ]
+MOCK_MODULES = [
+    'torch',
+    'torch.nn',
+    'torch.utils',
+    'torch.optim',
+    'torch.utils.data',
+    'torch.utils.data.sampler',
+    'torchvision',
+    'torchvision.models',
+    'torchtext',
+    'h5py',
+    'kaldi_io',
+    'transformers',
+]
 
 sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
 
@@ -83,10 +91,8 @@ def __getattr__(cls, name):
     "sphinxcontrib.bibtex",
 ]
 
-
-locale_dirs = ['locale/']   # path is example but recommended.
-gettext_compact = False     # optional.
-
+locale_dirs = ['locale/']  # path is example but recommended.
+gettext_compact = False  # optional.
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ["_templates"]
@@ -171,12 +177,7 @@ def __getattr__(cls, name):
 #
 # This is required for the alabaster theme
 # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
-html_sidebars = {
-    "**": [
-        "relations.html",  # needs 'show_related': True theme option to display
-        "searchbox.html",
-    ]
-}
+html_sidebars = {"**": ["relations.html", "searchbox.html",]}  # needs 'show_related': True theme option to display
 
 html_theme_options = {
     "canonical_url": "",
@@ -215,10 +216,7 @@ def __getattr__(cls, name):
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
-latex_documents = [
-    (master_doc, "nemo.tex", "nemo Documentation", "AI App Design team",
-     "manual")
-]
+latex_documents = [(master_doc, "nemo.tex", "nemo Documentation", "AI App Design team", "manual",)]
 
 # -- Options for manual page output ---------------------------------------
 
@@ -232,13 +230,5 @@ def __getattr__(cls, name):
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (
-        master_doc,
-        "nemo",
-        "nemo Documentation",
-        author,
-        "nemo",
-        "One line description of project.",
-        "Miscellaneous",
-    )
+    (master_doc, "nemo", "nemo Documentation", author, "nemo", "One line description of project.", "Miscellaneous",)
 ]
diff --git a/examples/applications/asr_service/app/__init__.py b/examples/applications/asr_service/app/__init__.py
@@ -1,5 +1,7 @@
 # Copyright (c) 2019 NVIDIA Corporation
 import os
+
+from app import routes  # noqa
 from flask import Flask
 from ruamel.yaml import YAML
 
@@ -19,7 +21,6 @@
 # This is only necessary if ENABLE_NGRAM = True. Otherwise, set to empty string
 LM_PATH = "<PATH_TO_KENLM_BINARY>"
 
-
 # Read model YAML
 yaml = YAML(typ="safe")
 with open(MODEL_YAML) as f:
@@ -28,34 +29,24 @@
 
 # Instantiate necessary Neural Modules
 # Note that data layer is missing from here
-neural_factory = nemo.core.NeuralModuleFactory(
-    placement=nemo.core.DeviceType.GPU,
-    backend=nemo.core.Backend.PyTorch)
-data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
-        factory=neural_factory)
+neural_factory = nemo.core.NeuralModuleFactory(placement=nemo.core.DeviceType.GPU, backend=nemo.core.Backend.PyTorch)
+data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(factory=neural_factory)
 jasper_encoder = nemo_asr.JasperEncoder(
     jasper=jasper_model_definition['JasperEncoder']['jasper'],
     activation=jasper_model_definition['JasperEncoder']['activation'],
-    feat_in=jasper_model_definition[
-        'AudioToMelSpectrogramPreprocessor']['features'])
+    feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'],
+)
 jasper_encoder.restore_from(CHECKPOINT_ENCODER, local_rank=0)
-jasper_decoder = nemo_asr.JasperDecoderForCTC(
-    feat_in=1024,
-    num_classes=len(labels))
+jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(labels))
 jasper_decoder.restore_from(CHECKPOINT_DECODER, local_rank=0)
 greedy_decoder = nemo_asr.GreedyCTCDecoder()
 
 if ENABLE_NGRAM and os.path.isfile(LM_PATH):
     beam_search_with_lm = nemo_asr.BeamSearchDecoderWithLM(
-        vocab=labels,
-        beam_width=64,
-        alpha=2.0,
-        beta=1.0,
-        lm_path=LM_PATH,
-        num_cpus=max(os.cpu_count(), 1))
+        vocab=labels, beam_width=64, alpha=2.0, beta=1.0, lm_path=LM_PATH, num_cpus=max(os.cpu_count(), 1),
+    )
 else:
     print("Beam search is not enabled")
 
-from app import routes  # noqa
 if __name__ == '__main__':
     app.run()
diff --git a/examples/applications/asr_service/app/routes.py b/examples/applications/asr_service/app/routes.py
@@ -3,47 +3,51 @@
 import os
 import time
 
+from app import (
+    ENABLE_NGRAM,
+    MODEL_YAML,
+    WORK_DIR,
+    app,
+    data_preprocessor,
+    greedy_decoder,
+    jasper_decoder,
+    jasper_encoder,
+    neural_factory,
+)
 from flask import request
 from werkzeug.utils import secure_filename
 
-from app import app, data_preprocessor, jasper_encoder, jasper_decoder, \
-    greedy_decoder, neural_factory, MODEL_YAML, WORK_DIR, ENABLE_NGRAM
+import nemo
+import nemo.collections.asr as nemo_asr
+
 try:
     from app import beam_search_with_lm
 except ImportError:
     print("Not using Beam Search Decoder with LM")
     ENABLE_NGRAM = False
-import nemo
-import nemo.collections.asr as nemo_asr
 
 
 def wav_to_text(manifest, greedy=True):
     from ruamel.yaml import YAML
+
     yaml = YAML(typ="safe")
     with open(MODEL_YAML) as f:
         jasper_model_definition = yaml.load(f)
     labels = jasper_model_definition['labels']
 
     # Instantiate necessary neural modules
-    data_layer = nemo_asr.AudioToTextDataLayer(
-        shuffle=False,
-        manifest_filepath=manifest,
-        labels=labels, batch_size=1)
+    data_layer = nemo_asr.AudioToTextDataLayer(shuffle=False, manifest_filepath=manifest, labels=labels, batch_size=1)
 
     # Define inference DAG
     audio_signal, audio_signal_len, _, _ = data_layer()
-    processed_signal, processed_signal_len = data_preprocessor(
-        input_signal=audio_signal,
-        length=audio_signal_len)
-    encoded, encoded_len = jasper_encoder(audio_signal=processed_signal,
-                                          length=processed_signal_len)
+    processed_signal, processed_signal_len = data_preprocessor(input_signal=audio_signal, length=audio_signal_len)
+    encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=processed_signal_len)
     log_probs = jasper_decoder(encoder_output=encoded)
     predictions = greedy_decoder(log_probs=log_probs)
 
     if ENABLE_NGRAM:
         print('Running with beam search')
-        beam_predictions = beam_search_with_lm(
-            log_probs=log_probs, log_probs_length=encoded_len)
+        beam_predictions = beam_search_with_lm(log_probs=log_probs, log_probs_length=encoded_len)
         eval_tensors = [beam_predictions]
 
     if greedy:
@@ -52,6 +56,7 @@ def wav_to_text(manifest, greedy=True):
     tensors = neural_factory.infer(tensors=eval_tensors)
     if greedy:
         from nemo.collections.asr.helpers import post_process_predictions
+
         prediction = post_process_predictions(tensors[0], labels)
     else:
         prediction = tensors[0][0][0][0][1]
@@ -79,8 +84,7 @@ def transcribe_file():
         greedy = True
         if request.form.get('beam'):
             if not ENABLE_NGRAM:
-                return ("Error: Beam Search with ngram LM is not enabled "
-                        "on this server")
+                return "Error: Beam Search with ngram LM is not enabled " "on this server"
             greedy = False
         file_path = os.path.join(WORK_DIR, secure_filename(f.filename))
         f.save(file_path)
@@ -89,7 +93,7 @@ def transcribe_file():
         manifest['audio_filepath'] = file_path
         manifest['duration'] = 18000
         manifest['text'] = 'todo'
-        with open(file_path+".json", 'w') as fout:
+        with open(file_path + ".json", 'w') as fout:
             fout.write(json.dumps(manifest))
         start_t = time.time()
         transcription = wav_to_text(file_path + ".json", greedy=greedy)

diff --git a/examples/applications/asr_service/recognize.html b/examples/applications/asr_service/recognize.html
@@ -1,15 +1,15 @@
 <html>
 <h1 align="center">Japer Speech Recognizer</h1>
 <h3 align="center">powered by NeMo</h3>
-   <body style="border:3px solid green">
-   <div align="center">
-   <p>Upload your .wav single-channel 16kHz file for transcription!</p>
-      <form action = "http://<flask_service_ip>:5000/transcribe_file" method = "POST"
-         enctype = "multipart/form-data">
-         <input type = "file" name = "file" />
-         <input type = "checkbox" name="beam"> Rescore with ngram
-         <input type = "submit"/>
-      </form>
-   </div>
-   </body>
+<body style="border:3px solid green">
+<div align="center">
+    <p>Upload your .wav single-channel 16kHz file for transcription!</p>
+    <form action="http://<flask_service_ip>:5000/transcribe_file" method="POST"
+          enctype="multipart/form-data">
+        <input type="file" name="file"/>
+        <input type="checkbox" name="beam"> Rescore with ngram
+        <input type="submit"/>
+    </form>
+</div>
+</body>
 </html>